From 4f94d6e76f98a314d968f7d7c3129afd8cb02d55 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 27 Aug 2024 17:22:53 +0200 Subject: [PATCH 1/7] Data flow: use AP depth in stage 2, instead of boolean --- .../codeql/dataflow/internal/DataFlowImpl.qll | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index f42da2f2fc00..6a2c86a9a829 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -3624,10 +3624,14 @@ module MakeImpl Lang> { class Typ = Unit; - class Ap = Boolean; + class Ap instanceof int { + Ap() { this = [0 .. Config::accessPathLimit()] } + + string toString() { result = "depth=" + this } + } class ApNil extends Ap { - ApNil() { this = false } + ApNil() { this = 0 } } bindingset[result, ap] @@ -3637,24 +3641,31 @@ module MakeImpl Lang> { bindingset[c, t, tail] Ap apCons(Content c, Typ t, Ap tail) { - result = true and + result = tail + 1 and exists(c) and - exists(t) and - if tail = true then Config::accessPathLimit() > 1 else any() + exists(t) } class ApHeadContent = Unit; pragma[inline] - ApHeadContent getHeadContent(Ap ap) { exists(result) and ap = true } + ApHeadContent getHeadContent(Ap ap) { exists(result) and ap > 0 } ApHeadContent projectToHeadContent(Content c) { any() } - class ApOption = BooleanOption; + class ApOption instanceof int { + ApOption() { this instanceof Ap or this = -1 } + + string toString() { + this = -1 and result = "" + or + result = this.(Ap).toString() + } + } - ApOption apNone() { result = TBooleanNone() } + ApOption apNone() { result = -1 } - ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + ApOption apSome(Ap ap) { result = ap } import CachedCallContextSensitivity import NoLocalCallContext @@ -3694,7 +3705,7 @@ module MakeImpl Lang> { ( notExpectsContent(node) or - ap = true and + ap > 0 and expectsContentCand(node) ) } @@ -3724,7 +3735,10 @@ module MakeImpl Lang> { class ApNil = ApproxAccessPathFrontNil; - PrevStage::Ap getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + PrevStage::Ap getApprox(Ap ap) { + // TODO: returning multiple APs here is a hack + if ap = TApproxFrontNil() then result = 0 else result = [1 .. Config::accessPathLimit()] + } Typ getTyp(DataFlowType t) { any() } From 76b18fd532d42926ad67d0fca8ed2f4ea54a7118 Mon Sep 17 00:00:00 2001 From: Asger F Date: Thu, 29 Aug 2024 15:14:03 +0200 Subject: [PATCH 2/7] Data flow: Add nilApprox() helper --- shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 6a2c86a9a829..1edf24e87f27 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -545,6 +545,8 @@ module MakeImpl Lang> { private module Stage1 implements StageSig { class Ap = Unit; + class ApNil = Unit; + private class Cc = boolean; /* Begin: Stage 1 logic. */ @@ -1297,6 +1299,8 @@ module MakeImpl Lang> { private signature module StageSig { class Ap; + class ApNil extends Ap; + predicate revFlow(NodeEx node); predicate revFlowAp(NodeEx node, Ap ap); @@ -1446,6 +1450,8 @@ module MakeImpl Lang> { PrevStage::revFlow(node) and result = getTyp(node.getDataFlowType()) } + private ApApprox nilApprox() { result instanceof PrevStage::ApNil } + pragma[nomagic] private predicate flowThroughOutOfCall( DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, From 9e1fb69dba9b16f16a05fb5f5f4bfb51d641d22b Mon Sep 17 00:00:00 2001 From: Asger F Date: Thu, 29 Aug 2024 15:36:55 +0200 Subject: [PATCH 3/7] Data flow: prune 'apa' values after a store or load --- .../codeql/dataflow/internal/DataFlowImpl.qll | 65 ++++++++++++++----- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 1edf24e87f27..cd3677a6471a 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1350,8 +1350,11 @@ module MakeImpl Lang> { class ApNil extends Ap; - bindingset[result, ap] - ApApprox getApprox(Ap ap); + bindingset[ap, node] + ApApprox getApproxAtNode(Ap ap, NodeEx node); + + bindingset[ap, approx] + predicate isValidApprox(Ap ap, ApApprox approx); Typ getTyp(DataFlowType t); @@ -1535,7 +1538,7 @@ module MakeImpl Lang> { summaryCtx = TSummaryCtxNone() and t = getNodeTyp(node) and ap instanceof ApNil and - apa = getApprox(ap) + apa = nilApprox() or exists(NodeEx mid, FlowState state0, Typ t0, LocalCc localCc | fwdFlow(mid, state0, cc, summaryCtx, t0, ap, apa) and @@ -1556,14 +1559,14 @@ module MakeImpl Lang> { exists(Content c, Typ t0, Ap ap0 | fwdFlowStore(_, t0, ap0, c, t, node, state, cc, summaryCtx) and ap = apCons(c, t0, ap0) and - apa = getApprox(ap) + apa = getApproxAtNode(ap, node) ) or // read exists(Typ t0, Ap ap0, Content c | fwdFlowRead(t0, ap0, c, _, node, state, cc, summaryCtx) and fwdFlowConsCand(t0, ap0, c, t, ap) and - apa = getApprox(ap) + apa = getApproxAtNode(ap, node) ) or // flow into a callable @@ -2048,7 +2051,7 @@ module MakeImpl Lang> { exists(NodeEx node, FlowState state | sourceNode(node, state) and (if hasSourceCallCtx() then cc = true else cc = false) and - PrevStage::revFlow(node, state, getApprox(any(ApNil nil))) and + PrevStage::revFlow(node, state, nilApprox()) and c = node.getEnclosingCallable() ) or @@ -2084,8 +2087,8 @@ module MakeImpl Lang> { not outBarrier(ret, state) and kind = ret.getKind() and parameterFlowThroughAllowed(p, kind) and - argApa = getApprox(argAp) and - PrevStage::returnMayFlowThrough(ret, pragma[only_bind_into](argApa), apa, kind) + PrevStage::returnMayFlowThrough(ret, pragma[only_bind_into](argApa), apa, kind) and + isValidApprox(argAp, argApa) ) } @@ -2426,7 +2429,7 @@ module MakeImpl Lang> { revFlow(pragma[only_bind_into](p), state, TReturnCtxMaybeFlowThrough(pos), apSome(returnAp), pragma[only_bind_into](ap)) and parameterFlowThroughAllowed(p, pos.getKind()) and - PrevStage::parameterMayFlowThrough(p, getApprox(ap)) + PrevStage::parameterMayFlowThrough(p, getApproxAtNode(ap, p)) } pragma[nomagic] @@ -3640,8 +3643,11 @@ module MakeImpl Lang> { ApNil() { this = 0 } } - bindingset[result, ap] - PrevStage::Ap getApprox(Ap ap) { any() } + bindingset[ap, node] + PrevStage::Ap getApproxAtNode(Ap ap, NodeEx node) { any() } + + bindingset[ap, approx] + predicate isValidApprox(Ap ap, PrevStage::Ap approx) { any() } Typ getTyp(DataFlowType t) { any() } @@ -3741,9 +3747,18 @@ module MakeImpl Lang> { class ApNil = ApproxAccessPathFrontNil; - PrevStage::Ap getApprox(Ap ap) { - // TODO: returning multiple APs here is a hack - if ap = TApproxFrontNil() then result = 0 else result = [1 .. Config::accessPathLimit()] + bindingset[ap, approx] + predicate isValidApprox(Ap ap, PrevStage::Ap approx) { + if ap instanceof ApNil then approx = 0 else approx > 0 + } + + bindingset[ap, node] + PrevStage::Ap getApproxAtNode(Ap ap, NodeEx node) { + if ap = TApproxFrontNil() + then result = 0 + else ( + PrevStage::revFlow(node, _, _, _, result) and result >= 1 + ) } Typ getTyp(DataFlowType t) { any() } @@ -3871,7 +3886,11 @@ module MakeImpl Lang> { class ApNil = AccessPathFrontNil; - PrevStage::Ap getApprox(Ap ap) { result = ap.toApprox() } + bindingset[ap, node] + PrevStage::Ap getApproxAtNode(Ap ap, NodeEx node) { result = ap.toApprox() and exists(node) } + + bindingset[ap, approx] + predicate isValidApprox(Ap ap, PrevStage::Ap approx) { approx = ap.toApprox() } Typ getTyp(DataFlowType t) { result = t } @@ -4160,7 +4179,13 @@ module MakeImpl Lang> { class ApNil = AccessPathApproxNil; pragma[nomagic] - PrevStage::Ap getApprox(Ap ap) { result = ap.getFront() } + private PrevStage::Ap getApprox(Ap ap) { result = ap.getFront() } + + bindingset[ap, node] + PrevStage::Ap getApproxAtNode(Ap ap, NodeEx node) { result = getApprox(ap) and exists(node) } + + bindingset[ap, approx] + predicate isValidApprox(Ap ap, PrevStage::Ap approx) { approx = getApprox(ap) } Typ getTyp(DataFlowType t) { result = t } @@ -4363,7 +4388,13 @@ module MakeImpl Lang> { class ApNil = AccessPathNil; pragma[nomagic] - PrevStage::Ap getApprox(Ap ap) { result = ap.getApprox() } + private PrevStage::Ap getApprox(Ap ap) { result = ap.getApprox() } + + bindingset[ap, node] + PrevStage::Ap getApproxAtNode(Ap ap, NodeEx node) { result = getApprox(ap) and exists(node) } + + bindingset[ap, approx] + predicate isValidApprox(Ap ap, PrevStage::Ap approx) { approx = getApprox(ap) } Typ getTyp(DataFlowType t) { result = t } From ae807ef8923dd030fad35a6fbbfaafa74314fe20 Mon Sep 17 00:00:00 2001 From: Asger F Date: Thu, 29 Aug 2024 10:31:46 +0200 Subject: [PATCH 4/7] Data flow: Duplicate call to 'revFlow' This needs to be joined differently for some of the disjuncts in the exists --- shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index cd3677a6471a..3f8db5d4b5a3 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -2717,7 +2717,9 @@ module MakeImpl Lang> { or callEdgeReturn(_, _, node, _, next, _, ap) and apNext = ap - or + ) + or + exists(NodeEx next, Ap apNext | revFlow(next, pragma[only_bind_into](state), apNext) | storeStepCand(node, _, _, next, _, _) or readStepCand(node, _, next) From 505ee417fa118d5b6f9ccdbd05851aa04c4340a3 Mon Sep 17 00:00:00 2001 From: Asger F Date: Thu, 29 Aug 2024 10:46:46 +0200 Subject: [PATCH 5/7] Data flow: remove unused 'apNext' --- shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 3f8db5d4b5a3..be119cc951f4 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -2719,7 +2719,7 @@ module MakeImpl Lang> { apNext = ap ) or - exists(NodeEx next, Ap apNext | revFlow(next, pragma[only_bind_into](state), apNext) | + exists(NodeEx next | revFlow(next, pragma[only_bind_into](state), _) | storeStepCand(node, _, _, next, _, _) or readStepCand(node, _, next) From b6c52888f9cd9d08cb5da90856cf9968f9f53124 Mon Sep 17 00:00:00 2001 From: Asger F Date: Thu, 29 Aug 2024 10:47:44 +0200 Subject: [PATCH 6/7] Data flow: unify apNext with ap as they're always equal in this clause --- .../codeql/dataflow/internal/DataFlowImpl.qll | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index be119cc951f4..49b1dd62b800 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -2704,19 +2704,15 @@ module MakeImpl Lang> { private predicate localFlowExit(NodeEx node, FlowState state, Ap ap) { revFlow(node, pragma[only_bind_into](state), pragma[only_bind_into](ap)) and ( - exists(NodeEx next, Ap apNext | revFlow(next, pragma[only_bind_into](state), apNext) | - jumpStepEx(node, next) and - apNext = ap + exists(NodeEx next | revFlow(next, pragma[only_bind_into](state), ap) | + jumpStepEx(node, next) or additionalJumpStep(node, next, _) and - apNext = ap and ap instanceof ApNil or - callEdgeArgParam(_, _, node, next, _, ap) and - apNext = ap + callEdgeArgParam(_, _, node, next, _, ap) or - callEdgeReturn(_, _, node, _, next, _, ap) and - apNext = ap + callEdgeReturn(_, _, node, _, next, _, ap) ) or exists(NodeEx next | revFlow(next, pragma[only_bind_into](state), _) | From dbae8d6ba62a9fe41fde8dbae8637772be3bc23b Mon Sep 17 00:00:00 2001 From: Asger F Date: Thu, 29 Aug 2024 10:48:23 +0200 Subject: [PATCH 7/7] Data flow: add only_bind_out to 'next' --- shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 49b1dd62b800..fcf1a2a75096 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -2704,7 +2704,9 @@ module MakeImpl Lang> { private predicate localFlowExit(NodeEx node, FlowState state, Ap ap) { revFlow(node, pragma[only_bind_into](state), pragma[only_bind_into](ap)) and ( - exists(NodeEx next | revFlow(next, pragma[only_bind_into](state), ap) | + exists(NodeEx next | + revFlow(pragma[only_bind_out](next), pragma[only_bind_into](state), ap) + | jumpStepEx(node, next) or additionalJumpStep(node, next, _) and @@ -2715,7 +2717,9 @@ module MakeImpl Lang> { callEdgeReturn(_, _, node, _, next, _, ap) ) or - exists(NodeEx next | revFlow(next, pragma[only_bind_into](state), _) | + exists(NodeEx next | + revFlow(pragma[only_bind_out](next), pragma[only_bind_into](state), _) + | storeStepCand(node, _, _, next, _, _) or readStepCand(node, _, next)