14
14
import org .antlr .v4 .runtime .Recognizer ;
15
15
import org .antlr .v4 .runtime .Token ;
16
16
import org .antlr .v4 .runtime .TokenSource ;
17
+ import org .antlr .v4 .runtime .VocabularyImpl ;
17
18
import org .antlr .v4 .runtime .atn .PredictionMode ;
18
19
import org .elasticsearch .logging .LogManager ;
19
20
import org .elasticsearch .logging .Logger ;
23
24
import org .elasticsearch .xpack .esql .telemetry .PlanTelemetry ;
24
25
25
26
import java .util .BitSet ;
27
+ import java .util .Map ;
26
28
import java .util .function .BiFunction ;
27
29
import java .util .function .Function ;
28
30
import java .util .regex .Matcher ;
@@ -45,6 +47,45 @@ public class EsqlParser {
45
47
*/
46
48
public static final int MAX_LENGTH = 1_000_000 ;
47
49
50
+ private static void replaceSymbolWithLiteral (Map <String , String > symbolReplacements , String [] literalNames , String [] symbolicNames ) {
51
+ for (int i = 0 , replacements = symbolReplacements .size (); i < symbolicNames .length && replacements > 0 ; i ++) {
52
+ String symName = symbolicNames [i ];
53
+ if (symName != null ) {
54
+ String replacement = symbolReplacements .get (symName );
55
+ if (replacement != null && literalNames [i ] == null ) {
56
+ // literals are single quoted
57
+ literalNames [i ] = "'" + replacement + "'" ;
58
+ replacements --;
59
+ }
60
+ }
61
+ }
62
+ }
63
+
64
+ /**
65
+ * Add the literal name to a number of tokens that due to ANTLR internals/ATN
66
+ * have their symbolic name returns instead during error reporting.
67
+ * When reporting token errors, ANTLR uses the Vocabulary class to get the displayName
68
+ * (if set), otherwise falls back to the literal one and eventually uses the symbol name.
69
+ * Since the Vocabulary is static and not pluggable, this code modifies the underlying
70
+ * arrays by setting the literal string manually based on the token index.
71
+ * This is needed since some symbols, especially around setting up the mode, end up losing
72
+ * their literal representation.
73
+ * NB: this code is highly dependent on the ANTLR internals and thus will likely break
74
+ * during upgrades.
75
+ * NB: Can't use this for replacing DEV_ since the Vocabular is static while DEV_ replacement occurs per runtime configuration
76
+ */
77
+ static {
78
+ Map <String , String > symbolReplacements = Map .of ("LP" , "(" , "OPENING_BRACKET" , "[" );
79
+
80
+ // the vocabularies have the same content however are different instances
81
+ // for extra reliability, perform the replacement for each map
82
+ VocabularyImpl parserVocab = (VocabularyImpl ) EsqlBaseParser .VOCABULARY ;
83
+ replaceSymbolWithLiteral (symbolReplacements , parserVocab .getLiteralNames (), parserVocab .getSymbolicNames ());
84
+
85
+ VocabularyImpl lexerVocab = (VocabularyImpl ) EsqlBaseLexer .VOCABULARY ;
86
+ replaceSymbolWithLiteral (symbolReplacements , lexerVocab .getLiteralNames (), lexerVocab .getSymbolicNames ());
87
+ }
88
+
48
89
private EsqlConfig config = new EsqlConfig ();
49
90
50
91
public EsqlConfig config () {
@@ -142,11 +183,14 @@ public void syntaxError(
142
183
String message ,
143
184
RecognitionException e
144
185
) {
145
- if (recognizer instanceof EsqlBaseParser parser && parser .isDevVersion () == false ) {
146
- Matcher m = REPLACE_DEV .matcher (message );
147
- message = m .replaceAll (StringUtils .EMPTY );
148
- }
186
+ if (recognizer instanceof EsqlBaseParser parser ) {
187
+ Matcher m ;
149
188
189
+ if (parser .isDevVersion () == false ) {
190
+ m = REPLACE_DEV .matcher (message );
191
+ message = m .replaceAll (StringUtils .EMPTY );
192
+ }
193
+ }
150
194
throw new ParsingException (message , e , line , charPositionInLine );
151
195
}
152
196
};
0 commit comments