Skip to content

Commit d569f86

Browse files
committed
Java: Diff-informed regex queries
An alternative to dynamic dispatch would have been to use parameterised modules. I tried that but abandoned it because it led to cascading changes and noise in too many places. The parameterisation used here has to be propagated through three different library files, and that propagation becomes invisible (for better or worse) when using dynamic dispatch.
1 parent 25e288a commit d569f86

File tree

5 files changed

+60
-0
lines changed

5 files changed

+60
-0
lines changed
+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
private import java
2+
private import semmle.code.java.dataflow.DataFlow
3+
private import codeql.util.Unit
4+
5+
/**
6+
* An extension point to allow a query to detect only the regular expressions
7+
* it needs in diff-informed incremental mode. The data-flow analysis that's
8+
* modified by this class has its sources as (certain) string literals and its
9+
* sinks as regular-expression matches.
10+
*/
11+
class RegexDiffInformedConfig instanceof Unit {
12+
/**
13+
* Holds if discovery of regular expressions should be diff-informed, which
14+
* is possible when there cannot be any elements selected by the query in the
15+
* diff range except the regular expressions and (locations derived from) the
16+
* places where they are matched against.
17+
*/
18+
abstract predicate observeDiffInformedIncrementalMode();
19+
20+
/**
21+
* Gets a location of a regex match that will be part of the query results.
22+
* If the query does not select the match locations, this predicate can be
23+
* `none()` for performance.
24+
*/
25+
abstract Location getASelectedSinkLocation(DataFlow::Node sink);
26+
27+
string toString() { result = "RegexDiffInformedConfig" }
28+
}

Diff for: java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll

+9
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import java
66
import semmle.code.java.dataflow.ExternalFlow
77
private import semmle.code.java.dataflow.DataFlow
88
private import semmle.code.java.security.SecurityTests
9+
private import RegexDiffInformed
910

1011
private class ExploitableStringLiteral extends StringLiteral {
1112
ExploitableStringLiteral() { this.getValue().matches(["%+%", "%*%", "%{%}%"]) }
@@ -157,6 +158,14 @@ private module RegexFlowConfig implements DataFlow::ConfigSig {
157158
}
158159

159160
int fieldFlowBranchLimit() { result = 1 }
161+
162+
predicate observeDiffInformedIncrementalMode() {
163+
exists(RegexDiffInformedConfig c | c.observeDiffInformedIncrementalMode())
164+
}
165+
166+
Location getASelectedSinkLocation(DataFlow::Node sink) {
167+
exists(RegexDiffInformedConfig c | result = c.getASelectedSinkLocation(sink))
168+
}
160169
}
161170

162171
private module RegexFlow = DataFlow::Global<RegexFlowConfig>;

Diff for: java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll

+7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ private import semmle.code.java.regex.RegexTreeView::RegexTreeView as TreeView
44
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView> as SuperlinearBackTracking
55
import semmle.code.java.dataflow.DataFlow
66
import semmle.code.java.regex.RegexFlowConfigs
7+
import semmle.code.java.regex.RegexDiffInformed
78
import semmle.code.java.dataflow.FlowSources
89
private import semmle.code.java.security.Sanitizers
910

@@ -33,6 +34,12 @@ private class LengthRestrictedMethod extends Method {
3334
}
3435
}
3536

37+
class PolynomialRedDosDiffInformed extends RegexDiffInformedConfig {
38+
override predicate observeDiffInformedIncrementalMode() { not PolynomialRedosFlow::hasSourceInDiffRange() }
39+
40+
override Location getASelectedSinkLocation(DataFlow::Node sink) { result = sink.getLocation() }
41+
}
42+
3643
/** A configuration for Polynomial ReDoS queries. */
3744
module PolynomialRedosConfig implements DataFlow::ConfigSig {
3845
predicate isSource(DataFlow::Node src) { src instanceof ActiveThreatModelSource }

Diff for: java/ql/src/Security/CWE/CWE-020/OverlyLargeRange.ql

+8
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,17 @@
1212
* external/cwe/cwe-020
1313
*/
1414

15+
import semmle.code.java.regex.RegexDiffInformed
16+
import semmle.code.java.dataflow.DataFlow
1517
private import semmle.code.java.regex.RegexTreeView::RegexTreeView as TreeView
1618
import codeql.regex.OverlyLargeRangeQuery::Make<TreeView>
1719

20+
class OverlyLargeRangeDiffInformed extends RegexDiffInformedConfig {
21+
override predicate observeDiffInformedIncrementalMode() { any() }
22+
23+
override Location getASelectedSinkLocation(DataFlow::Node sink) { none() }
24+
}
25+
1826
TreeView::RegExpCharacterClass potentialMisparsedCharClass() {
1927
// nested char classes are currently misparsed
2028
result.getAChild().(TreeView::RegExpNormalChar).getValue() = "["

Diff for: java/ql/src/Security/CWE/CWE-730/ReDoS.ql

+8
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,17 @@
1414
* external/cwe/cwe-400
1515
*/
1616

17+
import semmle.code.java.regex.RegexDiffInformed
18+
import semmle.code.java.dataflow.DataFlow
1719
private import semmle.code.java.regex.RegexTreeView::RegexTreeView as TreeView
1820
import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView> as ExponentialBackTracking
1921

22+
class ReDoSDiffInformed extends RegexDiffInformedConfig {
23+
override predicate observeDiffInformedIncrementalMode() { any() }
24+
25+
override Location getASelectedSinkLocation(DataFlow::Node sink) { none() }
26+
}
27+
2028
from TreeView::RegExpTerm t, string pump, ExponentialBackTracking::State s, string prefixMsg
2129
where
2230
ExponentialBackTracking::hasReDoSResult(t, pump, s, prefixMsg) and

0 commit comments

Comments
 (0)