Skip to content

Commit e8533c1

Browse files
costinalex-spies
andauthored
ESQL: Improve error message for ( and [ (#124177)
Due to recent grammar changes made ( token to no longer be reported by its text rather by his internal token name. Due to the use of pushMode, the symbol is not treated as a literal rather as a symbol. To address this, the parser listener looks at the error message and changes the message before returning it to the user. Replace hacky regex approach with Vocabulary substitution (not as pluggable as it could be yet much better) Fix #124145 Relates #123085 #121948 Co-authored-by: Alexander Spies <alexander.spies@elastic.co>
1 parent 4a0c935 commit e8533c1

File tree

3 files changed

+69
-4
lines changed

3 files changed

+69
-4
lines changed

docs/changelog/124177.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 124177
2+
summary: "Improve error message for ( and ["
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 124145

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.antlr.v4.runtime.Recognizer;
1515
import org.antlr.v4.runtime.Token;
1616
import org.antlr.v4.runtime.TokenSource;
17+
import org.antlr.v4.runtime.VocabularyImpl;
1718
import org.antlr.v4.runtime.atn.PredictionMode;
1819
import org.elasticsearch.logging.LogManager;
1920
import org.elasticsearch.logging.Logger;
@@ -23,6 +24,7 @@
2324
import org.elasticsearch.xpack.esql.telemetry.PlanTelemetry;
2425

2526
import java.util.BitSet;
27+
import java.util.Map;
2628
import java.util.function.BiFunction;
2729
import java.util.function.Function;
2830
import java.util.regex.Matcher;
@@ -45,6 +47,45 @@ public class EsqlParser {
4547
*/
4648
public static final int MAX_LENGTH = 1_000_000;
4749

50+
private static void replaceSymbolWithLiteral(Map<String, String> symbolReplacements, String[] literalNames, String[] symbolicNames) {
51+
for (int i = 0, replacements = symbolReplacements.size(); i < symbolicNames.length && replacements > 0; i++) {
52+
String symName = symbolicNames[i];
53+
if (symName != null) {
54+
String replacement = symbolReplacements.get(symName);
55+
if (replacement != null && literalNames[i] == null) {
56+
// literals are single quoted
57+
literalNames[i] = "'" + replacement + "'";
58+
replacements--;
59+
}
60+
}
61+
}
62+
}
63+
64+
/**
65+
* Add the literal name to a number of tokens that due to ANTLR internals/ATN
66+
* have their symbolic name returns instead during error reporting.
67+
* When reporting token errors, ANTLR uses the Vocabulary class to get the displayName
68+
* (if set), otherwise falls back to the literal one and eventually uses the symbol name.
69+
* Since the Vocabulary is static and not pluggable, this code modifies the underlying
70+
* arrays by setting the literal string manually based on the token index.
71+
* This is needed since some symbols, especially around setting up the mode, end up losing
72+
* their literal representation.
73+
* NB: this code is highly dependent on the ANTLR internals and thus will likely break
74+
* during upgrades.
75+
* NB: Can't use this for replacing DEV_ since the Vocabular is static while DEV_ replacement occurs per runtime configuration
76+
*/
77+
static {
78+
Map<String, String> symbolReplacements = Map.of("LP", "(", "OPENING_BRACKET", "[");
79+
80+
// the vocabularies have the same content however are different instances
81+
// for extra reliability, perform the replacement for each map
82+
VocabularyImpl parserVocab = (VocabularyImpl) EsqlBaseParser.VOCABULARY;
83+
replaceSymbolWithLiteral(symbolReplacements, parserVocab.getLiteralNames(), parserVocab.getSymbolicNames());
84+
85+
VocabularyImpl lexerVocab = (VocabularyImpl) EsqlBaseLexer.VOCABULARY;
86+
replaceSymbolWithLiteral(symbolReplacements, lexerVocab.getLiteralNames(), lexerVocab.getSymbolicNames());
87+
}
88+
4889
private EsqlConfig config = new EsqlConfig();
4990

5091
public EsqlConfig config() {
@@ -142,11 +183,14 @@ public void syntaxError(
142183
String message,
143184
RecognitionException e
144185
) {
145-
if (recognizer instanceof EsqlBaseParser parser && parser.isDevVersion() == false) {
146-
Matcher m = REPLACE_DEV.matcher(message);
147-
message = m.replaceAll(StringUtils.EMPTY);
148-
}
186+
if (recognizer instanceof EsqlBaseParser parser) {
187+
Matcher m;
149188

189+
if (parser.isDevVersion() == false) {
190+
m = REPLACE_DEV.matcher(message);
191+
message = m.replaceAll(StringUtils.EMPTY);
192+
}
193+
}
150194
throw new ParsingException(message, e, line, charPositionInLine);
151195
}
152196
};

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3141,6 +3141,21 @@ public void testFieldNamesAsCommands() throws Exception {
31413141
}
31423142
}
31433143

3144+
// [ and ( are used to trigger a double mode causing their symbol name (instead of text) to be used in error reporting
3145+
// this test checks that their are properly replaced in the error message
3146+
public void testPreserveParanthesis() {
3147+
// test for (
3148+
expectError("row a = 1 not in", "line 1:17: mismatched input '<EOF>' expecting '('");
3149+
expectError("row a = 1 | where a not in", "line 1:27: mismatched input '<EOF>' expecting '('");
3150+
expectError("row a = 1 | where a not in (1", "line 1:30: mismatched input '<EOF>' expecting {',', ')'}");
3151+
expectError("row a = 1 | where a not in [1", "line 1:28: missing '(' at '['");
3152+
expectError("row a = 1 | where a not in 123", "line 1:28: missing '(' at '123'");
3153+
// test for [
3154+
expectError("explain", "line 1:8: mismatched input '<EOF>' expecting '['");
3155+
expectError("explain ]", "line 1:9: token recognition error at: ']'");
3156+
expectError("explain [row x = 1", "line 1:19: missing ']' at '<EOF>'");
3157+
}
3158+
31443159
static Alias alias(String name, Expression value) {
31453160
return new Alias(EMPTY, name, value);
31463161
}

0 commit comments

Comments
 (0)