Skip to content

Commit

Permalink
[ES|QL] Improve column parsing (elastic#190612)
Browse files Browse the repository at this point in the history
## Summary

Fixes elastic#189913

- Adds `parts` field to `{ type: 'column' }` AST nodes. The `parts`
field contains a list of correctly parsed out column framents.
- The existing `text` and `name` fields work only if a column does not
have any escaped characters. However, I could not change/remove them, as
lots of code is currently referring them.


### Checklist

Delete any items that are not applicable to this PR.

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios

### For maintainers

- [x] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)

---------

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
  • Loading branch information
3 people authored Aug 20, 2024
1 parent 6f392ee commit 929d407
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 8 deletions.
92 changes: 92 additions & 0 deletions packages/kbn-esql-ast/src/__tests__/ast_parser.columns.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { getAstAndSyntaxErrors as parse } from '../ast_parser';

describe('Column Identifier Expressions', () => {
it('can parse un-quoted identifiers', () => {
const text = 'ROW a, b.c';
const { ast } = parse(text);

expect(ast).toMatchObject([
{
type: 'command',
args: [
{
type: 'column',
parts: ['a'],
},
{
type: 'column',
parts: ['b', 'c'],
},
],
},
]);
});

it('can parse quoted identifiers', () => {
const text = 'ROW `a`, `b`.c, `d`.`👍`.`123``123`';
const { ast } = parse(text);

expect(ast).toMatchObject([
{
type: 'command',
args: [
{
type: 'column',
parts: ['a'],
},
{
type: 'column',
parts: ['b', 'c'],
},
{
type: 'column',
parts: ['d', '👍', '123`123'],
},
],
},
]);
});

it('can mix quoted and un-quoted identifiers', () => {
const text = 'ROW part1.part2.`part``3️⃣`';
const { ast } = parse(text);

expect(ast).toMatchObject([
{
type: 'command',
args: [
{
type: 'column',
parts: ['part1', 'part2', 'part`3️⃣'],
},
],
},
]);
});

it('in KEEP command', () => {
const text = 'FROM a | KEEP a.b';
const { ast } = parse(text);

expect(ast).toMatchObject([
{},
{
type: 'command',
args: [
{
type: 'column',
parts: ['a', 'b'],
},
],
},
]);
});
});
33 changes: 25 additions & 8 deletions packages/kbn-esql-ast/src/ast_helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
*/

import { type Token, type ParserRuleContext, type TerminalNode } from 'antlr4';
import type {
ArithmeticUnaryContext,
DecimalValueContext,
InlineCastContext,
IntegerValueContext,
QualifiedIntegerLiteralContext,
import {
QualifiedNameContext,
type ArithmeticUnaryContext,
type DecimalValueContext,
type InlineCastContext,
type IntegerValueContext,
type QualifiedIntegerLiteralContext,
QualifiedNamePatternContext,
} from './antlr/esql_parser';
import { getPosition } from './ast_position_utils';
import { DOUBLE_TICKS_REGEX, SINGLE_BACKTICK, TICKS_REGEX } from './constants';
Expand All @@ -39,6 +41,7 @@ import type {
FunctionSubtype,
ESQLNumericLiteral,
} from './types';
import { parseIdentifier } from './parser/helpers';

export function nonNullable<T>(v: T): v is NonNullable<T> {
return v != null;
Expand Down Expand Up @@ -360,22 +363,36 @@ export function createSource(
}

export function createColumnStar(ctx: TerminalNode): ESQLColumn {
const text = ctx.getText();

return {
type: 'column',
name: ctx.getText(),
text: ctx.getText(),
name: text,
parts: [text],
text,
location: getPosition(ctx.symbol),
incomplete: ctx.getText() === '',
quoted: false,
};
}

export function createColumn(ctx: ParserRuleContext): ESQLColumn {
const parts: string[] = [];
if (ctx instanceof QualifiedNamePatternContext) {
parts.push(
...ctx.identifierPattern_list().map((identifier) => parseIdentifier(identifier.getText()))
);
} else if (ctx instanceof QualifiedNameContext) {
parts.push(...ctx.identifier_list().map((identifier) => parseIdentifier(identifier.getText())));
} else {
parts.push(sanitizeIdentifierString(ctx));
}
const text = sanitizeIdentifierString(ctx);
const hasQuotes = Boolean(getQuotedText(ctx) || isQuoted(ctx.getText()));
return {
type: 'column' as const,
name: text,
parts,
text: ctx.getText(),
location: getPosition(ctx.start, ctx.stop),
incomplete: Boolean(ctx.exception || text === ''),
Expand Down
37 changes: 37 additions & 0 deletions packages/kbn-esql-ast/src/parser/helpers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

export const isQuotedIdentifier = (text: string): boolean => {
const firstChar = text[0];
const lastChar = text[text.length - 1];

return firstChar === '`' && lastChar === '`';
};

export const parseIdentifier = (text: string): string => {
const isQuoted = isQuotedIdentifier(text);

if (!isQuoted) {
return text;
}

return text.slice(1, -1).replace(/``/g, '`');
};

export const regexUnquotedIdentifierPattern = /^([a-z\*_\@]{1})[a-z0-9_\*]*$/i;

export const formatIdentifier = (text: string): string => {
if (regexUnquotedIdentifierPattern.test(text)) {
return text;
}

return `\`${text.replace(/`/g, '``')}\``;
};

export const formatIdentifierParts = (parts: string[]): string =>
parts.map(formatIdentifier).join('.');
16 changes: 16 additions & 0 deletions packages/kbn-esql-ast/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,22 @@ export interface ESQLSource extends ESQLAstBaseItem {

export interface ESQLColumn extends ESQLAstBaseItem {
type: 'column';

/**
* An identifier can be composed of multiple parts, e.g: part1.part2.`part``3️⃣`.
* This property contains the parsed unquoted parts of the identifier.
* For example: `['part1', 'part2', 'part`3️⃣']`.
*/
parts: string[];

/**
* @deprecated
*
* An identifier can be composed of multiple parts, e.g: part1.part2.`part3️⃣`
*
* Each part can be quoted or not quoted independently. A single `quoted`
* property is not enough to represent the identifier. Use `parts` instead.
*/
quoted: boolean;
}

Expand Down

0 comments on commit 929d407

Please sign in to comment.