Skip to content

Commit 99b01e5

Browse files
authored
feat: complete after error syntax (#334)
* refactor: split getMinimumParserInfo to slice input and parser again * test: complete after error syntax * feat: complete after error syntax * feat: use createParser to get parserIns and remove parserWithNewInput
1 parent 07ff5dc commit 99b01e5

File tree

9 files changed

+632
-53
lines changed

9 files changed

+632
-53
lines changed

src/parser/common/basicSQL.ts

+173-51
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import {
99
ParseTreeListener,
1010
PredictionMode,
1111
ANTLRErrorListener,
12+
Parser,
1213
} from 'antlr4ng';
1314
import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
1415
import { SQLParserBase } from '../../lib/SQLParserBase';
@@ -28,6 +29,8 @@ import type { EntityCollector } from './entityCollector';
2829
import { EntityContext } from './entityCollector';
2930
import SemanticContextCollector from './semanticContextCollector';
3031

32+
export const SQL_SPLIT_SYMBOL_TEXT = ';';
33+
3134
/**
3235
* Basic SQL class, every sql needs extends it.
3336
*/
@@ -264,7 +267,6 @@ export abstract class BasicSQL<
264267
return null;
265268
}
266269
const splitListener = this.splitListener;
267-
268270
this.listen(splitListener, this._parseTree);
269271

270272
const res = splitListener.statementsContext
@@ -277,35 +279,102 @@ export abstract class BasicSQL<
277279
}
278280

279281
/**
280-
* Get a minimum boundary parser near tokenIndex.
281-
* @param input source string.
282-
* @param tokenIndex start from which index to minimize the boundary.
283-
* @param originParseTree the parse tree need to be minimized, default value is the result of parsing `input`.
284-
* @returns minimum parser info
282+
* Get the smaller range of input
283+
* @param input string
284+
* @param allTokens all tokens from input
285+
* @param tokenIndexOffset offset of the tokenIndex in the range of input
286+
* @param caretTokenIndex tokenIndex of caretPosition
287+
* @returns inputSlice: string, caretTokenIndex: number
285288
*/
286-
public getMinimumParserInfo(
289+
private splitInputBySymbolText(
287290
input: string,
288-
tokenIndex: number,
289-
originParseTree?: ParserRuleContext | null
290-
) {
291-
if (arguments.length <= 2) {
292-
this.parseWithCache(input);
293-
originParseTree = this._parseTree;
291+
allTokens: Token[],
292+
tokenIndexOffset: number,
293+
caretTokenIndex: number
294+
): { inputSlice: string; allTokens: Token[]; caretTokenIndex: number } {
295+
const tokens = allTokens.slice(tokenIndexOffset);
296+
/**
297+
* Set startToken
298+
*/
299+
let startToken: Token | null = null;
300+
for (let tokenIndex = caretTokenIndex - tokenIndexOffset; tokenIndex >= 0; tokenIndex--) {
301+
const token = tokens[tokenIndex];
302+
if (token?.text === SQL_SPLIT_SYMBOL_TEXT) {
303+
startToken = tokens[tokenIndex + 1];
304+
break;
305+
}
306+
}
307+
if (startToken === null) {
308+
startToken = tokens[0];
309+
}
310+
311+
/**
312+
* Set stopToken
313+
*/
314+
let stopToken: Token | null = null;
315+
for (
316+
let tokenIndex = caretTokenIndex - tokenIndexOffset;
317+
tokenIndex < tokens.length;
318+
tokenIndex++
319+
) {
320+
const token = tokens[tokenIndex];
321+
if (token?.text === SQL_SPLIT_SYMBOL_TEXT) {
322+
stopToken = token;
323+
break;
324+
}
325+
}
326+
if (stopToken === null) {
327+
stopToken = tokens[tokens.length - 1];
294328
}
295329

330+
const indexOffset = tokens[0].start;
331+
let startIndex = startToken.start - indexOffset;
332+
let stopIndex = stopToken.stop + 1 - indexOffset;
333+
334+
/**
335+
* Save offset of the tokenIndex in the range of input
336+
* compared to the tokenIndex in the whole input
337+
*/
338+
const _tokenIndexOffset = startToken.tokenIndex;
339+
const _caretTokenIndex = caretTokenIndex - _tokenIndexOffset;
340+
341+
/**
342+
* Get the smaller range of _input
343+
*/
344+
const _input = input.slice(startIndex, stopIndex);
345+
346+
return {
347+
inputSlice: _input,
348+
allTokens: allTokens.slice(_tokenIndexOffset),
349+
caretTokenIndex: _caretTokenIndex,
350+
};
351+
}
352+
353+
/**
354+
* Get the minimum input string that can be parsed successfully by c3.
355+
* @param input source string
356+
* @param caretTokenIndex tokenIndex of caretPosition
357+
* @param originParseTree origin parseTree
358+
* @returns MinimumInputInfo
359+
*/
360+
public getMinimumInputInfo(
361+
input: string,
362+
caretTokenIndex: number,
363+
originParseTree: ParserRuleContext | undefined
364+
): { input: string; tokenIndexOffset: number; statementCount: number } | null {
296365
if (!originParseTree || !input?.length) return null;
366+
let inputSlice = input;
297367

298-
const splitListener = this.splitListener;
299368
/**
300369
* Split sql by statement.
301370
* Try to collect candidates in as small a range as possible.
302371
*/
372+
const splitListener = this.splitListener;
303373
this.listen(splitListener, originParseTree);
374+
304375
const statementCount = splitListener.statementsContext?.length;
305376
const statementsContext = splitListener.statementsContext;
306377
let tokenIndexOffset = 0;
307-
let sqlParserIns = this._parser;
308-
let parseTree = originParseTree;
309378

310379
// If there are multiple statements.
311380
if (statementCount > 1) {
@@ -330,14 +399,14 @@ export abstract class BasicSQL<
330399
const isNextCtxValid =
331400
index === statementCount - 1 || !statementsContext[index + 1]?.exception;
332401

333-
if (ctx.stop && ctx.stop.tokenIndex < tokenIndex && isPrevCtxValid) {
402+
if (ctx.stop && ctx.stop.tokenIndex < caretTokenIndex && isPrevCtxValid) {
334403
startStatement = ctx;
335404
}
336405

337406
if (
338407
ctx.start &&
339408
!stopStatement &&
340-
ctx.start.tokenIndex > tokenIndex &&
409+
ctx.start.tokenIndex > caretTokenIndex &&
341410
isNextCtxValid
342411
) {
343412
stopStatement = ctx;
@@ -347,41 +416,64 @@ export abstract class BasicSQL<
347416

348417
// A boundary consisting of the index of the input.
349418
const startIndex = startStatement?.start?.start ?? 0;
350-
const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
419+
const stopIndex = stopStatement?.stop?.stop ?? inputSlice.length - 1;
351420

352421
/**
353422
* Save offset of the tokenIndex in the range of input
354423
* compared to the tokenIndex in the whole input
355424
*/
356425
tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
357-
tokenIndex = tokenIndex - tokenIndexOffset;
426+
inputSlice = inputSlice.slice(startIndex, stopIndex);
427+
}
358428

359-
/**
360-
* Reparse the input fragment,
361-
* and c3 will collect candidates in the newly generated parseTree.
362-
*/
363-
const inputSlice = input.slice(startIndex, stopIndex);
429+
return {
430+
input: inputSlice,
431+
tokenIndexOffset,
432+
statementCount,
433+
};
434+
}
364435

365-
const lexer = this.createLexer(inputSlice);
366-
lexer.removeErrorListeners();
367-
const tokenStream = new CommonTokenStream(lexer);
368-
tokenStream.fill();
436+
/**
437+
* Get a minimum boundary parser near caretTokenIndex.
438+
* @param input source string.
439+
* @param caretTokenIndex start from which index to minimize the boundary.
440+
* @param originParseTree the parse tree need to be minimized, default value is the result of parsing `input`.
441+
* @returns minimum parser info
442+
*/
443+
public getMinimumParserInfo(
444+
input: string,
445+
caretTokenIndex: number,
446+
originParseTree: ParserRuleContext | undefined
447+
): {
448+
parser: Parser;
449+
parseTree: ParserRuleContext;
450+
tokenIndexOffset: number;
451+
newTokenIndex: number;
452+
} | null {
453+
if (!originParseTree || !input?.length) return null;
369454

370-
const parser = this.createParserFromTokenStream(tokenStream);
371-
parser.interpreter.predictionMode = PredictionMode.SLL;
372-
parser.removeErrorListeners();
373-
parser.buildParseTrees = true;
374-
parser.errorHandler = new ErrorStrategy();
455+
const inputInfo = this.getMinimumInputInfo(input, caretTokenIndex, originParseTree);
456+
if (!inputInfo) return null;
457+
const { input: inputSlice, tokenIndexOffset } = inputInfo;
458+
caretTokenIndex = caretTokenIndex - tokenIndexOffset;
375459

376-
sqlParserIns = parser;
377-
parseTree = parser.program();
460+
let sqlParserIns = this._parser;
461+
let parseTree = originParseTree;
462+
463+
/**
464+
* Reparse the input fragment,
465+
* and c3 will collect candidates in the newly generated parseTree when input changed.
466+
*/
467+
if (inputSlice !== input) {
468+
sqlParserIns = this.createParser(inputSlice);
469+
parseTree = sqlParserIns.program();
378470
}
379471

380472
return {
381473
parser: sqlParserIns,
382474
parseTree,
383475
tokenIndexOffset,
384-
newTokenIndex: tokenIndex,
476+
newTokenIndex: caretTokenIndex,
385477
};
386478
}
387479

@@ -396,33 +488,63 @@ export abstract class BasicSQL<
396488
caretPosition: CaretPosition
397489
): Suggestions | null {
398490
this.parseWithCache(input);
399-
400491
if (!this._parseTree) return null;
401492

402-
const allTokens = this.getAllTokens(input);
493+
let allTokens = this.getAllTokens(input);
403494
let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);
404-
405495
if (!caretTokenIndex && caretTokenIndex !== 0) return null;
406496

407-
const minimumParser = this.getMinimumParserInfo(input, caretTokenIndex);
497+
const inputInfo = this.getMinimumInputInfo(input, caretTokenIndex, this._parseTree);
498+
if (!inputInfo) return null;
499+
const { input: _input, tokenIndexOffset, statementCount } = inputInfo;
500+
let inputSlice = _input;
501+
502+
/**
503+
* Split the inputSlice by separator to get the smaller range of inputSlice.
504+
*/
505+
if (inputSlice.includes(SQL_SPLIT_SYMBOL_TEXT)) {
506+
const {
507+
inputSlice: _inputSlice,
508+
allTokens: _allTokens,
509+
caretTokenIndex: _caretTokenIndex,
510+
} = this.splitInputBySymbolText(
511+
inputSlice,
512+
allTokens,
513+
tokenIndexOffset,
514+
caretTokenIndex
515+
);
516+
517+
allTokens = _allTokens;
518+
caretTokenIndex = _caretTokenIndex;
519+
inputSlice = _inputSlice;
520+
} else {
521+
if (statementCount > 1) {
522+
caretTokenIndex = caretTokenIndex - tokenIndexOffset;
523+
}
524+
}
525+
526+
let sqlParserIns = this._parser;
527+
let parseTree = this._parseTree;
408528

409-
if (!minimumParser) return null;
529+
/**
530+
* Reparse the input fragment,
531+
* and c3 will collect candidates in the newly generated parseTree when input changed.
532+
*/
533+
if (inputSlice !== input) {
534+
sqlParserIns = this.createParser(inputSlice);
535+
parseTree = sqlParserIns.program();
536+
}
410537

411-
const {
412-
parser: sqlParserIns,
413-
tokenIndexOffset,
414-
newTokenIndex,
415-
parseTree: c3Context,
416-
} = minimumParser;
417538
const core = new CodeCompletionCore(sqlParserIns);
418539
core.preferredRules = this.preferredRules;
419540

420-
const candidates = core.collectCandidates(newTokenIndex, c3Context);
541+
const candidates = core.collectCandidates(caretTokenIndex, parseTree);
421542
const originalSuggestions = this.processCandidates(
422543
candidates,
423544
allTokens,
424-
newTokenIndex,
425-
tokenIndexOffset
545+
caretTokenIndex,
546+
0
547+
// tokenIndexOffset
426548
);
427549

428550
const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax.map(

src/parser/common/semanticContextCollector.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ import {
66
SemanticContext,
77
SqlSplitStrategy,
88
} from '../common/types';
9-
10-
export const SQL_SPLIT_SYMBOL_TEXT = ';';
9+
import { SQL_SPLIT_SYMBOL_TEXT } from './basicSQL';
1110

1211
abstract class SemanticContextCollector {
1312
constructor(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import { FlinkSQL } from 'src/parser/flink';
2+
import { CaretPosition, EntityContextType } from 'src/parser/common/types';
3+
4+
describe('FlinkSQL Complete After Syntax Error', () => {
5+
const flink = new FlinkSQL();
6+
7+
const sql1 = `SELECT FROM tb2;\nINSERT INTO `;
8+
const sql2 = `SELECT FROM tb3;\nCREATE TABLE `;
9+
const sql3 = `SELECT FROM t1;\nSL`;
10+
11+
test('Syntax error but end with semi, should suggest tableName', () => {
12+
const pos: CaretPosition = {
13+
lineNumber: 2,
14+
column: 13,
15+
};
16+
const suggestion = flink.getSuggestionAtCaretPosition(sql1, pos);
17+
expect(suggestion).not.toBeUndefined();
18+
19+
// syntax
20+
const syntaxes = suggestion?.syntax;
21+
expect(syntaxes.length).toBe(1);
22+
expect(syntaxes[0].syntaxContextType).toBe(EntityContextType.TABLE);
23+
24+
// keyword
25+
const keywords = suggestion?.keywords;
26+
expect(keywords.length).toBe(0);
27+
});
28+
29+
test('Syntax error but end with semi, should suggest tableNameCreate', () => {
30+
const pos: CaretPosition = {
31+
lineNumber: 2,
32+
column: 14,
33+
};
34+
const suggestion = flink.getSuggestionAtCaretPosition(sql2, pos);
35+
expect(suggestion).not.toBeUndefined();
36+
37+
// syntax
38+
const syntaxes = suggestion?.syntax;
39+
expect(syntaxes.length).toBe(1);
40+
expect(syntaxes[0].syntaxContextType).toBe(EntityContextType.TABLE_CREATE);
41+
42+
// keyword
43+
const keywords = suggestion?.keywords;
44+
expect(keywords).toMatchUnorderedArray(['IF', 'IF NOT EXISTS']);
45+
});
46+
47+
test('Syntax error but end with semi, should suggest filter token', () => {
48+
const pos: CaretPosition = {
49+
lineNumber: 2,
50+
column: 2,
51+
};
52+
const suggestion = flink.getSuggestionAtCaretPosition(sql3, pos);
53+
expect(suggestion).not.toBeUndefined();
54+
55+
// syntax
56+
const syntaxes = suggestion?.syntax;
57+
expect(syntaxes.length).toBe(0);
58+
59+
// keyword
60+
const filterKeywords = suggestion?.keywords?.filter(
61+
(item) => item.startsWith('S') && /S(?=.*L)/.test(item)
62+
);
63+
expect(filterKeywords).toMatchUnorderedArray(['SELECT']);
64+
});
65+
});

0 commit comments

Comments
 (0)