diff --git a/docs/changelog/124958.yaml b/docs/changelog/124958.yaml new file mode 100644 index 0000000000000..be7f646b7dcae --- /dev/null +++ b/docs/changelog/124958.yaml @@ -0,0 +1,6 @@ +pr: 124958 +summary: Catch parsing exception +area: ES|QL +type: bug +issues: + - 119025 \ No newline at end of file diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java index 5912f1fe58bcd..a8ee18d8b2777 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java @@ -14,6 +14,7 @@ import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.TokenSource; +import org.antlr.v4.runtime.VocabularyImpl; import org.antlr.v4.runtime.atn.PredictionMode; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -23,12 +24,15 @@ import org.elasticsearch.xpack.esql.telemetry.PlanTelemetry; import java.util.BitSet; +import java.util.EmptyStackException; +import java.util.Map; import java.util.function.BiFunction; import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import static org.elasticsearch.xpack.esql.core.util.StringUtils.isInteger; +import static org.elasticsearch.xpack.esql.parser.ParserUtils.nameOrPosition; import static org.elasticsearch.xpack.esql.parser.ParserUtils.source; public class EsqlParser { @@ -44,6 +48,45 @@ public class EsqlParser { */ public static final int MAX_LENGTH = 1_000_000; + private static void replaceSymbolWithLiteral(Map symbolReplacements, String[] literalNames, String[] symbolicNames) { + for (int i = 0, replacements = symbolReplacements.size(); i < symbolicNames.length && replacements > 0; i++) { + String symName = symbolicNames[i]; + if (symName != null) { + String replacement = symbolReplacements.get(symName); + if (replacement != null && literalNames[i] == null) { + // literals are single quoted + literalNames[i] = "'" + replacement + "'"; + replacements--; + } + } + } + } + + /** + * Add the literal name to a number of tokens that due to ANTLR internals/ATN + * have their symbolic name returns instead during error reporting. + * When reporting token errors, ANTLR uses the Vocabulary class to get the displayName + * (if set), otherwise falls back to the literal one and eventually uses the symbol name. + * Since the Vocabulary is static and not pluggable, this code modifies the underlying + * arrays by setting the literal string manually based on the token index. + * This is needed since some symbols, especially around setting up the mode, end up losing + * their literal representation. + * NB: this code is highly dependent on the ANTLR internals and thus will likely break + * during upgrades. + * NB: Can't use this for replacing DEV_ since the Vocabular is static while DEV_ replacement occurs per runtime configuration + */ + static { + Map symbolReplacements = Map.of("LP", "(", "OPENING_BRACKET", "["); + + // the vocabularies have the same content however are different instances + // for extra reliability, perform the replacement for each map + VocabularyImpl parserVocab = (VocabularyImpl) EsqlBaseParser.VOCABULARY; + replaceSymbolWithLiteral(symbolReplacements, parserVocab.getLiteralNames(), parserVocab.getSymbolicNames()); + + VocabularyImpl lexerVocab = (VocabularyImpl) EsqlBaseLexer.VOCABULARY; + replaceSymbolWithLiteral(symbolReplacements, lexerVocab.getLiteralNames(), lexerVocab.getSymbolicNames()); + } + private EsqlConfig config = new EsqlConfig(); public EsqlConfig config() { @@ -111,6 +154,9 @@ private T invokeParser( return result.apply(new AstBuilder(new ExpressionBuilder.ParsingContext(params, metrics)), tree); } catch (StackOverflowError e) { throw new ParsingException("ESQL statement is too large, causing stack overflow when generating the parsing tree: [{}]", query); + // likely thrown by an invalid popMode (such as extra closing parenthesis) + } catch (EmptyStackException ese) { + throw new ParsingException("Invalid query [{}]", query); } } @@ -141,11 +187,14 @@ public void syntaxError( String message, RecognitionException e ) { - if (recognizer instanceof EsqlBaseParser parser && parser.isDevVersion() == false) { - Matcher m = REPLACE_DEV.matcher(message); - message = m.replaceAll(StringUtils.EMPTY); - } + if (recognizer instanceof EsqlBaseParser parser) { + Matcher m; + if (parser.isDevVersion() == false) { + m = REPLACE_DEV.matcher(message); + message = m.replaceAll(StringUtils.EMPTY); + } + } throw new ParsingException(message, e, line, charPositionInLine); } }; @@ -172,7 +221,7 @@ private static class ParametrizedTokenSource extends DelegatingTokenSource { @Override public Token nextToken() { Token token = delegate.nextToken(); - if (token.getType() == EsqlBaseLexer.PARAM) { + if (token.getType() == EsqlBaseLexer.PARAM || token.getType() == EsqlBaseLexer.DOUBLE_PARAMS) { checkAnonymousParam(token); if (param > params.size()) { throw new ParsingException(source(token), "Not enough actual parameters {}", params.size()); @@ -181,8 +230,9 @@ public Token nextToken() { param++; } - if (token.getType() == EsqlBaseLexer.NAMED_OR_POSITIONAL_PARAM) { - if (isInteger(token.getText().substring(1))) { + String nameOrPosition = nameOrPosition(token); + if (nameOrPosition.isBlank() == false) { + if (isInteger(nameOrPosition)) { checkPositionalParam(token); } else { checkNamedParam(token); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index c8ff6ef1b55ae..7f5ffdbe67e99 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -2977,4 +2977,11 @@ public void testInvalidInsistAsterisk() { expectError("FROM text | EVAL x = 4 | INSIST_🐔 *", "INSIST doesn't support wildcards, found [*]"); expectError("FROM text | EVAL x = 4 | INSIST_🐔 foo*", "INSIST doesn't support wildcards, found [foo*]"); } +} + public void testUnclosedParenthesis() { + String[] queries = { "row a = )", "row ]", "from source | eval x = [1,2,3]]" }; + for (String q : queries) { + expectError(q, "Invalid query"); + } + } }