Skip to content

DO NOT MERGE: Perform alias analysis in C/C++ dataflow #177

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
344 changes: 344 additions & 0 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/AliasedFlow.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,344 @@
private import cpp as Cpp
private import SsaInternals as Ssa
private import codeql.ssa.Ssa as SsaImplCommon
private import DataFlowPrivate
private import DataFlowUtil as Public
private import DataFlowNodes as Nodes
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.internal.IRCppLanguage

private module SsaInput implements SsaImplCommon::InputSig<Cpp::Location> {
import SsaInternalsCommon::InputSigCommon

class SourceVariable = Ssa::SourceVariable;

/**
* Holds if `instr` flows to the destination address of a `StoreInstruction`
* and flows from a read of some definition.
*/
private predicate fwd(Node1Impl n, int indirectionIndex) {
nodeHasInstruction1(n, any(VariableAddressInstruction vai), indirectionIndex)
or
exists(Node1Impl n0 |
fwd(n0, indirectionIndex) and
simpleLocalFlowStep1(n0, n, _)
)
}

/**
* Holds if `instr` flows to the destination address of a `StoreInstruction`
*/
private predicate revStore(Node1Impl n, int indirectionIndex, int k) {
fwd(pragma[only_bind_into](n), pragma[only_bind_into](indirectionIndex)) and
(
indirectionIndex > k and
nodeHasOperand1(n, any(StoreInstruction store).getDestinationAddressOperand(),
indirectionIndex - k)
or
exists(Node1Impl n1 |
revStore(n1, pragma[only_bind_into](indirectionIndex), k) and
simpleLocalFlowStep1(n, n1, _)
)
)
}

private newtype TStoreNode1Impl =
MkStoreNode1Impl(Node1Impl n, int indirectionIndex, int k) { revStore(n, indirectionIndex, k) }

/**
* This predicate holds if
* ```
* conversionFlow(instr1.getAUse(), instr2, _, false)
* ```
* and both `instr1` and `instr2` are instructions on a path from a read of
* some definition to the destination address of a `StoreInstruction`.
*/
private predicate flowStoreStep(TStoreNode1Impl node1, TStoreNode1Impl node2) {
exists(Node1Impl n1, Node1Impl n2, int indirectionIndex, int k |
node1 =
MkStoreNode1Impl(n1, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and
node2 =
MkStoreNode1Impl(n2, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and
simpleLocalFlowStep1(n1, n2, _)
)
}

private predicate storeSink(TStoreNode1Impl sink) {
exists(Node1Impl n, int indirectionIndex, int k |
sink = MkStoreNode1Impl(n, indirectionIndex, k) and
// Subtract one because a store writes to the _indirection_ of the address operand
nodeHasOperand1(n, any(StoreInstruction store).getDestinationAddressOperand(),
indirectionIndex - k)
)
}

private predicate storeSource(TStoreNode1Impl source) {
exists(Node1Impl n, int indirectionIndex, int k |
source = MkStoreNode1Impl(n, indirectionIndex, k) and
nodeHasInstruction1(n, any(VariableAddressInstruction vai), indirectionIndex)
)
}

private predicate flowStorePlusImpl(TStoreNode1Impl node1, TStoreNode1Impl node2) =
doublyBoundedFastTC(flowStoreStep/2, storeSource/1, storeSink/1)(node1, node2)

private predicate flowStoreStepTCImpl(TStoreNode1Impl node1, TStoreNode1Impl node2) {
storeSource(node1) and
storeSink(node2) and
(
flowStorePlusImpl(node1, node2)
or
node1 = node2
)
}

private predicate flowStoreStepTC(Node1Impl n1, Node1Impl n2, int indirectionIndex, int k) {
exists(TStoreNode1Impl node1, TStoreNode1Impl node2 |
node1 =
MkStoreNode1Impl(n1, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and
node2 =
MkStoreNode1Impl(n2, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and
flowStoreStepTCImpl(node1, node2)
)
}

/**
* Holds if `instr` flows to the destination address of a `StoreInstruction`
*/
private predicate revLoad(Node1Impl n, int indirectionIndex) {
fwd(pragma[only_bind_into](n), pragma[only_bind_into](indirectionIndex)) and
(
nodeHasOperand1(n, _, indirectionIndex)
or
exists(Node1Impl n1 |
revLoad(n1, pragma[only_bind_into](indirectionIndex)) and
simpleLocalFlowStep1(n, n1, _)
)
)
}

private newtype TLoadNode1Impl =
MkLoadNode1Impl(Node1Impl n, int indirectionIndex) { revLoad(n, indirectionIndex) }

private predicate flowLoadStep(TLoadNode1Impl node1, TLoadNode1Impl node2) {
exists(Node1Impl n1, Node1Impl n2, int indirectionIndex |
node1 = MkLoadNode1Impl(n1, pragma[only_bind_into](indirectionIndex)) and
node2 = MkLoadNode1Impl(n2, pragma[only_bind_into](indirectionIndex)) and
simpleLocalFlowStep1(n1, n2, _)
)
}

private predicate loadSink(TLoadNode1Impl sink) {
exists(Node1Impl n, int indirectionIndex |
sink = MkLoadNode1Impl(n, indirectionIndex) and
nodeHasOperand1(n, _, indirectionIndex)
)
}

private predicate loadSource(TLoadNode1Impl source) {
exists(Node1Impl n, int indirectionIndex |
source = MkLoadNode1Impl(n, indirectionIndex) and
nodeHasInstruction1(n, any(VariableAddressInstruction vai), indirectionIndex)
)
}

private predicate flowLoadPlusImpl(TLoadNode1Impl node1, TLoadNode1Impl node2) =
doublyBoundedFastTC(flowLoadStep/2, loadSource/1, loadSink/1)(node1, node2)

private predicate flowLoadStepTCImpl(TLoadNode1Impl node1, TLoadNode1Impl node2) {
loadSource(node1) and
loadSink(node2) and
(
flowLoadPlusImpl(node1, node2)
or
node1 = node2
)
}

private predicate flowLoadStepTC(Node1Impl n1, Node1Impl n2, int indirectionIndex) {
exists(TLoadNode1Impl node1, TLoadNode1Impl node2 |
node1 = MkLoadNode1Impl(n1, pragma[only_bind_into](indirectionIndex)) and
node2 = MkLoadNode1Impl(n2, pragma[only_bind_into](indirectionIndex)) and
flowLoadStepTCImpl(node1, node2)
)
}

/**
* Holds if the `i`'th instruction in `bb` writes to `v` through an alias.
* `certain` is `true` if write is guaranteed to overwrite the entire
* allocation.
*/
additional predicate variableWrite(
BasicBlock bb, int i, SourceVariable sv, boolean certain, Node1Impl store
) {
certain = true and
exists(
Node1Impl vai, VariableAddressInstruction vaiInstr, StoreInstruction storeInstr, int index,
Node1Impl dest, int k, Ssa::DefImpl def, int lower
|
flowStoreStepTC(vai, dest, index, k) and
nodeHasInstruction1(vai, vaiInstr, index) and
nodeHasOperand1(dest, storeInstr.getDestinationAddressOperand(), index - k) and
sv.getIRVariable() = vaiInstr.getIRVariable() and
lower =
pragma[only_bind_out](getMinIndirectionsForType(storeInstr
.getDestinationAddress()
.getResultType())) and
sv.getIndirection() = index + lower and
nodeHasInstruction1(store, storeInstr, index - k) and
def.getNode() = store and
def.hasIndexInBlock(bb, i)
)
}

predicate variableWrite(BasicBlock bb, int i, SourceVariable sv, boolean certain) {
variableWrite(bb, i, sv, certain, _)
}

additional predicate variableRead(
BasicBlock bb, int i, SourceVariable sv, boolean certain, Node1Impl load
) {
certain = true and
exists(Node1Impl vai, int index, VariableAddressInstruction vaiInstr, Ssa::UseImpl use |
flowLoadStepTC(vai, load, index) and
nodeHasInstruction1(vai, vaiInstr, index) and
sv.getIRVariable() = vaiInstr.getIRVariable() and
sv.getIndirection() = index and
use.getNode() = load and
use.hasIndexInBlock(bb, i)
)
}

predicate variableRead(BasicBlock bb, int i, SourceVariable sv, boolean certain) {
variableRead(bb, i, sv, certain, _)
}
}

private module AliasedSsa = SsaImplCommon::Make<Cpp::Location, SsaInput>;

private newtype TAliasedNode =
TNode1(Node1Impl n) or
TPhiNode(AliasedSsa::DefinitionExt phi) {
phi instanceof AliasedSsa::PhiNode or
phi instanceof AliasedSsa::PhiReadNode
}

abstract private class AliasedNode extends TAliasedNode {
abstract string toString();

Instruction asInstruction() { none() }

abstract Cpp::Function getFunction();

abstract predicate isGLValue();

abstract Cpp::Type getType();

abstract Cpp::Location getLocation();
}

class AliasedNodeImpl = AliasedNode;

private class Node1 extends AliasedNode, TNode1 {
Node1Impl n;

Node1() { this = TNode1(n) }

Node1Impl getImpl() { result = n }

final override string toString() { result = n.toString() }

final override Instruction asInstruction() { result = n.asInstruction() }

final override Cpp::Function getFunction() { result = n.getFunction() }

final override predicate isGLValue() { n.isGLValue() }

final override Cpp::Type getType() { result = n.getType() }

final override Cpp::Location getLocation() { result = n.getLocation() }
}

private class PhiNode extends AliasedNode, TPhiNode {
AliasedSsa::DefinitionExt phi;

PhiNode() { this = TPhiNode(phi) }

final override string toString() { result = phi.toString() }

AliasedSsa::DefinitionExt getPhi() { result = phi }

final override Cpp::Function getFunction() { result = phi.getBasicBlock().getEnclosingFunction() }

final override predicate isGLValue() { phi.getSourceVariable().isGLValue() }

final override Cpp::Type getType() { result = phi.getSourceVariable().getType() }

final override Cpp::Location getLocation() { result = phi.getLocation() }
}

class AliasedPhiNodeImpl = PhiNode;

private predicate step(SsaInput::SourceVariable sv, IRBlock bb1, int i1, AliasedNode node2) {
exists(AliasedSsa::DefinitionExt def, Node1Impl load, IRBlock bb2, int i2 |
AliasedSsa::adjacentDefReadExt(def, sv, bb1, i1, bb2, i2) and
SsaInput::variableRead(bb2, i2, sv, _, load) and
TNode1(load) = node2
)
}

private predicate access(SsaInput::SourceVariable sv, IRBlock bb, int i, AliasedNode node1) {
exists(Node1Impl n | node1 = TNode1(n) |
SsaInput::variableWrite(bb, i, sv, _, n)
or
SsaInput::variableRead(bb, i, sv, _, n)
)
or
node1.(PhiNode).getPhi().definesAt(sv, bb, i, _)
}

private predicate stepToPhi(SsaInput::SourceVariable sv, IRBlock bb, int i, PhiNode node) {
exists(AliasedSsa::DefinitionExt phi |
AliasedSsa::lastRefRedefExt(_, sv, bb, i, phi) and
node.getPhi() = phi
)
}

predicate into(Public::Node node1, TPhiNode node2) {
exists(Node1Impl n |
node1 = Nodes::TNode1(n) and
aliasedFlow(TNode1(n), node2)
)
}

predicate step1(Public::Node node1, Public::Node node2) {
exists(Node1Impl n1, Node1Impl n2 |
node1 = Nodes::TNode1(n1) and
node2 = Nodes::TNode1(n2) and
aliasedFlow(TNode1(n1), TNode1(n2))
)
}

predicate step2(TPhiNode node1, TPhiNode node2) { aliasedFlow(node1, node2) }

predicate out(TPhiNode node1, Public::Node node2) {
exists(Node1Impl n |
node2 = Nodes::TNode1(n) and
aliasedFlow(node1, TNode1(n))
)
}

private predicate aliasedFlow(AliasedNode node1, AliasedNode node2) {
node1 != node2 and
(
exists(IRBlock bb, int i, SsaInput::SourceVariable sv |
access(sv, bb, i, node1) and
step(sv, bb, i, node2)
)
or
exists(IRBlock bb, int i, SsaInput::SourceVariable sv |
access(sv, bb, i, node1) and
stepToPhi(sv, bb, i, node2)
)
)
}
38 changes: 38 additions & 0 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowNodes.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
private import cpp
private import DataFlowPrivate
private import semmle.code.cpp.ir.IR
private import DataFlowImplCommon as DataFlowImplCommon
private import SsaInternals as Ssa
private import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import AliasedFlow

/**
* The IR dataflow graph consists of the following nodes:
* - `Node1`, which injects most instructions and operands directly into the
* dataflow graph, as well as indirections of these instructions and
* operands.
* - `VariableNode`, which is used to model flow through global variables.
* - `PostUpdateNodeImpl`, which is used to model the state of an object after
* an update after a number of loads.
* - `SsaPhiNode`, which represents phi nodes as computed by the shared SSA
* library.
*/
cached
newtype TIRDataFlowNode =
TNode1(Node1Impl node) { DataFlowImplCommon::forceCachingInSameStage() } or
TGlobalLikeVariableNode(GlobalLikeVariable var, int indirectionIndex) {
indirectionIndex =
[getMinIndirectionsForType(var.getUnspecifiedType()) .. Ssa::getMaxIndirectionsForType(var.getUnspecifiedType())]
} or
TSsaIteratorNode(IteratorFlow::IteratorFlowNode n) or
TBodyLessParameterNodeImpl(Parameter p, int indirectionIndex) {
// Rule out parameters of catch blocks.
not exists(p.getCatchBlock()) and
// We subtract one because `getMaxIndirectionsForType` returns the maximum
// indirection for a glvalue of a given type, and this doesn't apply to
// parameters.
indirectionIndex = [0 .. Ssa::getMaxIndirectionsForType(p.getUnspecifiedType()) - 1] and
not any(InitializeParameterInstruction init).getParameter() = p
} or
TAliasedPhiNode(AliasedPhiNodeImpl n) or
TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn)
910 changes: 871 additions & 39 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Large diffs are not rendered by default.

786 changes: 125 additions & 661 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Large diffs are not rendered by default.

70 changes: 47 additions & 23 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll
Original file line number Diff line number Diff line change
@@ -7,12 +7,18 @@ private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.DataFlow
private import DataFlowUtil
private import DataFlowPrivate
private import DataFlowNodes
private import SsaInternals as Ssa

private IndirectReturnOutNode0 getIndirectReturnOutNode0(CallInstruction call, int d) {
result.getCallInstruction() = call and
result.getIndirectionIndex() = d
}

/**
* Gets the instruction that goes into `input` for `call`.
*/
DataFlow::Node callInput(CallInstruction call, FunctionInput input) {
Node1Impl callInput0(CallInstruction call, FunctionInput input) {
// An argument or qualifier
exists(int index |
result.asOperand() = call.getArgumentOperand(index) and
@@ -21,79 +27,97 @@ DataFlow::Node callInput(CallInstruction call, FunctionInput input) {
or
// A value pointed to by an argument or qualifier
exists(int index, int indirectionIndex |
hasOperandAndIndex(result, call.getArgumentOperand(index), indirectionIndex) and
hasOperandAndIndex1(result, call.getArgumentOperand(index), indirectionIndex) and
input.isParameterDerefOrQualifierObject(index, indirectionIndex)
)
or
exists(int ind |
result = getIndirectReturnOutNode(call, ind) and
result = getIndirectReturnOutNode0(call, ind) and
input.isReturnValueDeref(ind)
)
}

DataFlow::Node callInput(CallInstruction call, FunctionInput input) {
result = TNode1(callInput0(call, input))
}

/**
* Gets the node that represents the output of `call` with kind `output` at
* indirection index `indirectionIndex`.
*/
private Node callOutputWithIndirectionIndex(
private Node1Impl callOutputWithIndirectionIndex0(
CallInstruction call, FunctionOutput output, int indirectionIndex
) {
// The return value
simpleOutNode(result, call) and
simpleOutNode1(result, call) and
output.isReturnValue() and
indirectionIndex = 0
or
// The side effect of a call on the value pointed to by an argument or qualifier
exists(int index |
result.(IndirectArgumentOutNode).getArgumentIndex() = index and
result.(IndirectArgumentOutNode).getIndirectionIndex() = indirectionIndex - 1 and
result.(IndirectArgumentOutNode).getCallInstruction() = call and
result.(IndirectArgumentOutNode0).getArgumentIndex() = index and
result.(IndirectArgumentOutNode0).getIndirectionIndex() = indirectionIndex - 1 and
result.(IndirectArgumentOutNode0).getCallInstruction() = call and
output.isParameterDerefOrQualifierObject(index, indirectionIndex - 1)
)
or
result = getIndirectReturnOutNode(call, indirectionIndex) and
result = getIndirectReturnOutNode0(call, indirectionIndex) and
output.isReturnValueDeref(indirectionIndex)
}

/**
* Gets the instruction that holds the `output` for `call`.
*/
Node1Impl callOutput0(CallInstruction call, FunctionOutput output) {
result = callOutputWithIndirectionIndex0(call, output, _)
}

/**
* Gets the instruction that holds the `output` for `call`.
*/
Node callOutput(CallInstruction call, FunctionOutput output) {
result = callOutputWithIndirectionIndex(call, output, _)
result = TNode1(callOutput0(call, output))
}

DataFlow::Node callInput(CallInstruction call, FunctionInput input, int d) {
exists(DataFlow::Node n | n = callInput(call, input) and d > 0 |
Node1Impl callInput0(CallInstruction call, FunctionInput input, int d) {
exists(Node1Impl n | n = callInput0(call, input) and d > 0 |
// An argument or qualifier
hasOperandAndIndex(result, n.asOperand(), d)
hasOperandAndIndex1(result, n.asOperand(), d)
or
exists(Operand operand, int indirectionIndex |
// A value pointed to by an argument or qualifier
hasOperandAndIndex(n, operand, indirectionIndex) and
hasOperandAndIndex(result, operand, indirectionIndex + d)
hasOperandAndIndex1(n, operand, indirectionIndex) and
hasOperandAndIndex1(result, operand, indirectionIndex + d)
)
)
}

private IndirectReturnOutNode getIndirectReturnOutNode(CallInstruction call, int d) {
result.getCallInstruction() = call and
result.getIndirectionIndex() = d
DataFlow::Node callInput(CallInstruction call, FunctionInput input, int d) {
result = TNode1(callInput0(call, input, d))
}

/**
* Gets the instruction that holds the `output` for `call`.
*/
bindingset[d]
Node callOutput(CallInstruction call, FunctionOutput output, int d) {
exists(DataFlow::Node n, int indirectionIndex |
n = callOutputWithIndirectionIndex(call, output, indirectionIndex) and d > 0
Node1Impl callOutput0(CallInstruction call, FunctionOutput output, int d) {
exists(Node1Impl n, int indirectionIndex |
n = callOutputWithIndirectionIndex0(call, output, indirectionIndex) and d > 0
|
// The return value
result = callOutputWithIndirectionIndex(call, output, indirectionIndex + d)
result = callOutputWithIndirectionIndex0(call, output, indirectionIndex + d)
or
// If there isn't an indirect out node for the call with indirection `d` then
// we conflate this with the underlying `CallInstruction`.
not exists(getIndirectReturnOutNode(call, indirectionIndex + d)) and
not exists(getIndirectReturnOutNode0(call, indirectionIndex + d)) and
n = result
)
}

/**
* Gets the instruction that holds the `output` for `call`.
*/
bindingset[d]
Node callOutput(CallInstruction call, FunctionOutput output, int d) {
result = TNode1(callOutput0(call, output, d))
}
23 changes: 23 additions & 0 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/Node0ToString.qll
Original file line number Diff line number Diff line change
@@ -11,8 +11,31 @@
private import semmle.code.cpp.ir.IR
private import codeql.util.Unit
private import DataFlowUtil
private import DataFlowPrivate
private import DataFlowNodes
import NormalNode0ToString // Change this import to control which version should be used.

private int getNumberOfIndirections(Node n) {
exists(Node1Impl n1 |
n = TNode1(n1) and
result = getNumberOfIndirections0(n1)
)
or
result = n.(VariableNode).getIndirectionIndex()
or
result = n.(PostUpdateNodeImpl).getIndirectionIndex()
or
result = n.(FinalParameterNode).getIndirectionIndex()
or
result = n.(BodyLessParameterNodeImpl).getIndirectionIndex()
}

/**
* Gets the number of stars (i.e., `*`s) needed to produce the `toString`
* output for `n`.
*/
string stars(Node n) { result = repeatStars(getNumberOfIndirections(n)) }

/** An abstract class to control the behavior of `Node.toString`. */
abstract class Node0ToString extends Unit {
/**
148 changes: 89 additions & 59 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@ private import semmle.code.cpp.ir.internal.IRCppLanguage
private import semmle.code.cpp.ir.dataflow.internal.ModelUtil
private import semmle.code.cpp.ir.implementation.raw.internal.TranslatedInitialization
private import DataFlowPrivate
private import DataFlowNodes
import SsaInternalsCommon

private module SourceVariables {
@@ -60,6 +61,18 @@ private module SourceVariables {

/** Gets the location of this variable. */
Location getLocation() { result = this.getBaseVariable().getLocation() }

/**
* Gets the SSA variable that represents `k` indirections of this variable.
* Note that this is the identity for `k = 0`.
*/
SourceVariable getIndirectVariable(int k) {
k >= 0 and
exists(BaseSourceVariable bv, int indirection |
sourceVariableHasBaseAndIndex(this, bv, indirection) and
sourceVariableHasBaseAndIndex(result, bv, indirection + k)
)
}
}
}

@@ -180,6 +193,8 @@ abstract class DefImpl extends TDefImpl {
/** Gets a textual representation of this element. */
abstract string toString();

abstract Node1Impl getNode();

/** Gets the block of this definition or use. */
final IRBlock getBlock() { this.hasIndexInBlock(result, _) }

@@ -245,7 +260,7 @@ abstract class UseImpl extends TUseImpl {
UseImpl() { any() }

/** Gets the node associated with this use. */
abstract Node getNode();
abstract Node1Impl getNode();

/** Gets a textual representation of this element. */
abstract string toString();
@@ -372,6 +387,8 @@ private class DefVariableAddressImpl extends DefAddressImpl {
index = 0
)
}

final override Node1Impl getNode() { none() }
}

private class DefCallAddressImpl extends DefAddressImpl {
@@ -380,6 +397,8 @@ private class DefCallAddressImpl extends DefAddressImpl {
final override predicate hasIndexInBlock(IRBlock block, int index) {
block.getInstruction(index) = v.getCallInstruction()
}

final override Node1Impl getNode() { none() }
}

private class DirectDef extends DefImpl, TDirectDefImpl {
@@ -410,6 +429,12 @@ private class DirectDef extends DefImpl, TDirectDefImpl {
override Node0Impl getValue() { isDef(_, result, address, _, _, _) }

override predicate isCertain() { isDef(true, _, address, _, _, indirectionIndex) }

final override Node1Impl getNode() {
nodeHasOperand1(result, this.getValue().asOperand(), this.getIndirectionIndex())
or
nodeHasInstruction1(result, this.getValue().asInstruction(), this.getIndirectionIndex())
}
}

private class DirectUseImpl extends UseImpl, TDirectUseImpl {
@@ -453,15 +478,7 @@ private class DirectUseImpl extends UseImpl, TDirectUseImpl {

override predicate isCertain() { isUse(true, operand, _, _, indirectionIndex) }

override Node getNode() { nodeHasOperand(result, operand, indirectionIndex) }
}

pragma[nomagic]
private predicate finalParameterNodeHasParameterAndIndex(
FinalParameterNode n, Parameter p, int indirectionIndex
) {
n.getParameter() = p and
n.getIndirectionIndex() = indirectionIndex
override Node1Impl getNode() { nodeHasOperand1(result, operand, indirectionIndex) }
}

class FinalParameterUse extends UseImpl, TFinalParameterUse {
@@ -475,7 +492,7 @@ class FinalParameterUse extends UseImpl, TFinalParameterUse {

int getArgumentIndex() { result = p.getIndex() }

override Node getNode() { finalParameterNodeHasParameterAndIndex(result, p, indirectionIndex) }
override FinalParameterNode0 getNode() { result.getUse() = this }

override int getIndirection() { result = indirectionIndex + 1 }

@@ -510,6 +527,8 @@ class FinalParameterUse extends UseImpl, TFinalParameterUse {
result instanceof UnknownDefaultLocation
}

Type getType() { result = getTypeImpl(p.getUnderlyingType(), indirectionIndex) }

override BaseIRVariable getBaseSourceVariable() { result.getIRVariable().getAst() = p }
}

@@ -572,7 +591,7 @@ class GlobalUse extends UseImpl, TGlobalUse {

override string toString() { result = "Use of " + global }

override FinalGlobalValue getNode() { result.getGlobalUse() = this }
override FinalGlobalValue0 getNode() { result.getGlobalUse() = this }

override int getIndirection() { isGlobalUse(global, f, result, indirectionIndex) }

@@ -667,6 +686,8 @@ class GlobalDefImpl extends DefImpl, TGlobalDefImpl {
override string toString() { result = "Def of " + this.getSourceVariable() }

override Location getLocation() { result = f.getLocation() }

final override InitialGlobalValue0 getNode() { getDefImpl(result.getDef()) = this }
}

/**
@@ -677,37 +698,27 @@ predicate adjacentDefRead(IRBlock bb1, int i1, SourceVariable sv, IRBlock bb2, i
adjacentDefReadExt(_, sv, bb1, i1, bb2, i2)
}

predicate useToNode(IRBlock bb, int i, SourceVariable sv, Node nodeTo) {
predicate useToNode(IRBlock bb, int i, SourceVariable sv, Node1Impl nodeTo) {
useToNode0(bb, i, sv, nodeTo)
}

predicate useToNode0(IRBlock bb, int i, SourceVariable sv, Node1Impl nodeTo) {
exists(UseImpl use |
use.hasIndexInBlock(bb, i, sv) and
nodeTo = use.getNode()
)
}

pragma[noinline]
predicate outNodeHasAddressAndIndex(
IndirectArgumentOutNode out, Operand address, int indirectionIndex
) {
out.getAddressOperand() = address and
out.getIndirectionIndex() = indirectionIndex
}

/**
* INTERNAL: Do not use.
*
* Holds if `node` is the node that corresponds to the definition of `def`.
*/
predicate defToNode(
Node node, DefinitionExt def, SourceVariable sv, IRBlock bb, int i, boolean uncertain
Node1Impl node, DefinitionExt def, SourceVariable sv, IRBlock bb, int i, boolean uncertain
) {
def.definesAt(sv, bb, i, _) and
(
nodeHasOperand(node, def.getValue().asOperand(), def.getIndirectionIndex())
or
nodeHasInstruction(node, def.getValue().asInstruction(), def.getIndirectionIndex())
or
node.(InitialGlobalValue).getGlobalDef() = def
) and
def.hasIndexInBlock(bb, i, sv) and
node = def.getNode() and
if def.isCertain() then uncertain = false else uncertain = true
}

@@ -719,7 +730,7 @@ predicate defToNode(
*
* `uncertain` is `true` if this is an uncertain definition.
*/
predicate nodeToDefOrUse(Node node, SourceVariable sv, IRBlock bb, int i, boolean uncertain) {
predicate nodeToDefOrUse(Node1Impl node, SourceVariable sv, IRBlock bb, int i, boolean uncertain) {
defToNode(node, _, sv, bb, i, uncertain)
or
// Node -> Use
@@ -731,14 +742,14 @@ predicate nodeToDefOrUse(Node node, SourceVariable sv, IRBlock bb, int i, boolea
* Perform a single conversion-like step from `nFrom` to `nTo`. This relation
* only holds when there is no use-use relation out of `nTo`.
*/
private predicate indirectConversionFlowStep(Node nFrom, Node nTo) {
private predicate indirectConversionFlowStep(Node1Impl nFrom, Node1Impl nTo) {
not exists(SourceVariable sv, IRBlock bb2, int i2 |
useToNode(bb2, i2, sv, nTo) and
adjacentDefRead(bb2, i2, sv, _, _)
) and
exists(Operand op1, Operand op2, int indirectionIndex, Instruction instr |
hasOperandAndIndex(nFrom, op1, pragma[only_bind_into](indirectionIndex)) and
hasOperandAndIndex(nTo, op2, pragma[only_bind_into](indirectionIndex)) and
hasOperandAndIndex1(nFrom, op1, pragma[only_bind_into](indirectionIndex)) and
hasOperandAndIndex1(nTo, op2, pragma[only_bind_into](indirectionIndex)) and
instr = op2.getDef() and
conversionFlow(op1, instr, _, _)
)
@@ -748,7 +759,15 @@ private predicate indirectConversionFlowStep(Node nFrom, Node nTo) {
* Holds if `node` is a phi input node that should receive flow from the
* definition to (or use of) `sv` at `(bb1, i1)`.
*/
private predicate phiToNode(SsaPhiInputNode node, SourceVariable sv, IRBlock bb1, int i1) {
private predicate phiToNode(SsaPhiInputNode0 node, SourceVariable sv, IRBlock bb1, int i1) {
phiToNode0(node, sv, bb1, i1)
}

/**
* Holds if `node` is a phi input node that should receive flow from the
* definition to (or use of) `sv` at `(bb1, i1)`.
*/
private predicate phiToNode0(SsaPhiInputNode0 node, SourceVariable sv, IRBlock bb1, int i1) {
exists(PhiNode phi, IRBlock input |
phi.hasInputFromBlock(_, sv, bb1, i1, input) and
node.getPhiNode() = phi and
@@ -765,7 +784,7 @@ private predicate phiToNode(SsaPhiInputNode node, SourceVariable sv, IRBlock bb1
* is _not_ guaranteed to overwrite the entire allocation.
*/
private predicate ssaFlowImpl(
IRBlock bb1, int i1, SourceVariable sv, Node nodeFrom, Node nodeTo, boolean uncertain
IRBlock bb1, int i1, SourceVariable sv, Node1Impl nodeFrom, Node1Impl nodeTo, boolean uncertain
) {
nodeToDefOrUse(nodeFrom, sv, bb1, i1, uncertain) and
(
@@ -780,7 +799,7 @@ private predicate ssaFlowImpl(
}

/** Gets a node that represents the prior definition of `node`. */
private Node getAPriorDefinition(DefinitionExt next) {
private Node1Impl getAPriorDefinition(DefinitionExt next) {
exists(IRBlock bb, int i, SourceVariable sv |
lastRefRedefExt(_, pragma[only_bind_into](sv), pragma[only_bind_into](bb),
pragma[only_bind_into](i), _, next) and
@@ -810,14 +829,14 @@ private predicate inOut(FIO::FunctionInput input, FIO::FunctionOutput output) {
* first argument of `strcpy`).
* - a conversion that flows to such an input.
*/
private predicate modeledFlowBarrier(Node n) {
private predicate modeledFlowBarrier(Node1Impl n) {
exists(
FIO::FunctionInput input, FIO::FunctionOutput output, CallInstruction call,
PartialFlow::PartialFlowFunction partialFlowFunc
|
n = callInput(call, input) and
n = callInput0(call, input) and
inOut(input, output) and
exists(callOutput(call, output)) and
exists(callOutput0(call, output)) and
partialFlowFunc = call.getStaticCallTarget() and
not partialFlowFunc.isPartialWrite(output)
|
@@ -826,17 +845,17 @@ private predicate modeledFlowBarrier(Node n) {
call.getStaticCallTarget().(Taint::TaintFunction).hasTaintFlow(_, output)
)
or
exists(Operand operand, Instruction instr, Node n0, int indirectionIndex |
exists(Operand operand, Instruction instr, Node1Impl n0, int indirectionIndex |
modeledFlowBarrier(n0) and
nodeHasInstruction(n0, instr, indirectionIndex) and
nodeHasInstruction1(n0, instr, indirectionIndex) and
conversionFlow(operand, instr, false, _) and
nodeHasOperand(n, operand, indirectionIndex)
nodeHasOperand1(n, operand, indirectionIndex)
)
}

/** Holds if there is def-use or use-use flow from `nodeFrom` to `nodeTo`. */
predicate ssaFlow(Node nodeFrom, Node nodeTo) {
exists(Node nFrom, boolean uncertain, IRBlock bb, int i, SourceVariable sv |
predicate ssaFlow(Node1Impl nodeFrom, Node1Impl nodeTo) {
exists(Node1Impl nFrom, boolean uncertain, IRBlock bb, int i, SourceVariable sv |
ssaFlowImpl(bb, i, sv, nFrom, nodeTo, uncertain) and
not modeledFlowBarrier(nFrom) and
nodeFrom != nodeTo
@@ -872,24 +891,24 @@ private predicate isArgumentOfCallableOperand(DataFlowCall call, Operand operand
)
}

private predicate isArgumentOfCallable(DataFlowCall call, Node n) {
private predicate isArgumentOfCallable(DataFlowCall call, Node1Impl n) {
isArgumentOfCallableOperand(call, n.asOperand())
or
exists(Operand op |
n.(IndirectOperand).hasOperandAndIndirectionIndex(op, _) and
n.(IndirectOperand1).hasOperandAndIndirectionIndex(op, _) and
isArgumentOfCallableOperand(call, op)
)
or
exists(Instruction instr |
n.(IndirectInstruction).hasInstructionAndIndirectionIndex(instr, _) and
n.(IndirectInstruction1).hasInstructionAndIndirectionIndex(instr, _) and
isArgumentOfCallableInstruction(call, instr)
)
}

/**
* Holds if there is use-use flow from `pun`'s pre-update node to `n`.
*/
private predicate postUpdateNodeToFirstUse(PostUpdateNode pun, Node n) {
private predicate postUpdateNodeToFirstUse(PostUpdateNode0 pun, Node1Impl n) {
// We cannot mark a `PointerArithmeticInstruction` that computes an offset
// based on some SSA
// variable `x` as a use of `x` since this creates taint-flow in the
@@ -905,7 +924,7 @@ private predicate postUpdateNodeToFirstUse(PostUpdateNode pun, Node n) {
// So this predicate recurses back along conversions and `PointerArithmetic`
// instructions to find the first use that has provides use-use flow, and
// uses that target as the target of the `nodeFrom`.
exists(Node adjusted, IRBlock bb1, int i1, SourceVariable sv |
exists(Node1Impl adjusted, IRBlock bb1, int i1, SourceVariable sv |
indirectConversionFlowStep*(adjusted, pun.getPreUpdateNode()) and
useToNode(bb1, i1, sv, adjusted)
|
@@ -918,9 +937,9 @@ private predicate postUpdateNodeToFirstUse(PostUpdateNode pun, Node n) {
)
}

private predicate stepUntilNotInCall(DataFlowCall call, Node n1, Node n2) {
private predicate stepUntilNotInCall(DataFlowCall call, Node1Impl n1, Node1Impl n2) {
isArgumentOfCallable(call, n1) and
exists(Node mid | ssaFlowImpl(_, _, _, n1, mid, _) |
exists(Node1Impl mid | ssaFlowImpl(_, _, _, n1, mid, _) |
isArgumentOfCallable(call, mid) and
stepUntilNotInCall(call, mid, n2)
or
@@ -931,7 +950,7 @@ private predicate stepUntilNotInCall(DataFlowCall call, Node n1, Node n2) {

bindingset[n1, n2]
pragma[inline_late]
private predicate isArgumentOfSameCall(DataFlowCall call, Node n1, Node n2) {
private predicate isArgumentOfSameCall(DataFlowCall call, Node1Impl n1, Node1Impl n2) {
isArgumentOfCallable(call, n1) and isArgumentOfCallable(call, n2)
}

@@ -952,8 +971,8 @@ private predicate isArgumentOfSameCall(DataFlowCall call, Node n1, Node n2) {
* similarly we want flow from the second argument of `write_first_argument` to `x`
* on the next line.
*/
predicate postUpdateFlow(PostUpdateNode pun, Node nodeTo) {
exists(Node preUpdate, Node mid |
predicate postUpdateFlow(PostUpdateNode0 pun, Node1Impl nodeTo) {
exists(Node1Impl preUpdate, Node1Impl mid |
preUpdate = pun.getPreUpdateNode() and
postUpdateNodeToFirstUse(pun, mid)
|
@@ -968,7 +987,7 @@ predicate postUpdateFlow(PostUpdateNode pun, Node nodeTo) {
}

/** Holds if `nodeTo` receives flow from the phi node `nodeFrom`. */
predicate fromPhiNode(SsaPhiNode nodeFrom, Node nodeTo) {
predicate fromPhiNode(SsaPhiNode0 nodeFrom, Node1Impl nodeTo) {
exists(PhiNode phi, SourceVariable sv, IRBlock bb1, int i1 |
phi = nodeFrom.getPhiNode() and
phi.definesAt(sv, bb1, i1, _)
@@ -1183,13 +1202,24 @@ class DefinitionExt extends SsaImpl::DefinitionExt {

/** Gets a node that represents a read of this SSA definition. */
pragma[nomagic]
Node getARead() {
Node getARead() { result = TNode1(this.getARead0()) }

pragma[nomagic]
Node1Impl getARead0() {
exists(SourceVariable sv, IRBlock bb, int i | SsaCached::ssaDefReachesReadExt(sv, this, bb, i) |
useToNode(bb, i, sv, result)
useToNode0(bb, i, sv, result)
or
phiToNode(result, sv, bb, i)
phiToNode0(result, sv, bb, i)
)
}

/** INTERNAL: Do not use. */
Node1Impl getNode() { result = getDefImpl(this).getNode() }

/** Holds if this definition is is the `index`'th member in `block`. */
predicate hasIndexInBlock(IRBlock block, int index, SourceVariable sv) {
getDefImpl(this).hasIndexInBlock(block, index, sv)
}
}

import SsaCached
Original file line number Diff line number Diff line change
@@ -319,7 +319,7 @@ private module Config implements ProductFlow::StateConfigSig {
// In the above case, this barrier blocks flow from the indirect node
// for `p` to `p[1]`.
exists(Operand operand, PointerAddInstruction add |
node.(IndirectOperand).hasOperandAndIndirectionIndex(operand, _) and
node.(DataFlow::IndirectOperand).hasOperandAndIndirectionIndex(operand, _) and
add.getLeftOperand() = operand and
add.getRight().(ConstantInstruction).getValue() != "0"
)
11 changes: 9 additions & 2 deletions cpp/ql/src/Likely Bugs/Memory Management/AllocaInLoop.ql
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
import cpp
import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
import semmle.code.cpp.ir.dataflow.DataFlow
import semmle.code.cpp.ir.IR

/** Gets a loop that contains `e`. */
Loop getAnEnclosingLoopOfExpr(Expr e) { result = getAnEnclosingLoopOfStmt(e.getEnclosingStmt()) }
@@ -45,9 +46,15 @@ private Expr getExpr(DataFlow::Node node) {
or
result = node.asOperand().getUse().getAst()
or
result = node.(DataFlow::RawIndirectInstruction).getInstruction().getAst()
exists(Instruction i |
node.(DataFlow::IndirectInstruction).hasInstructionAndIndirectionIndex(i, _) and
result = i.getAst()
)
or
result = node.(DataFlow::RawIndirectOperand).getOperand().getUse().getAst()
exists(Operand op |
node.(DataFlow::IndirectOperand).hasOperandAndIndirectionIndex(op, _) and
result = op.getUse().getAst()
)
}

/**