Skip to content

Commit

Permalink
feat(spark): add spark expression column
Browse files Browse the repository at this point in the history
  • Loading branch information
LuckyFBB committed Oct 16, 2024
1 parent 091d76d commit ca4d102
Show file tree
Hide file tree
Showing 11 changed files with 5,456 additions and 5,026 deletions.
4 changes: 2 additions & 2 deletions src/grammar/hive/HiveSqlParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. You may obtain a copy of the
License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing permissions and limitations under the
Expand Down
36 changes: 24 additions & 12 deletions src/grammar/spark/SparkSqlParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,10 @@ columnName
| {this.shouldMatchEmpty()}?
;

columnNamePath
: multipartIdentifier
;

columnNameSeq
: columnName (COMMA columnName)*
;
Expand All @@ -469,11 +473,23 @@ identifierReference
;

queryOrganization
: (KW_ORDER KW_BY order+=sortItem (COMMA order+=sortItem)*)? (
KW_CLUSTER KW_BY clusterBy+=expression (COMMA clusterBy+=expression)*
)? (KW_DISTRIBUTE KW_BY distributeBy+=expression (COMMA distributeBy+=expression)*)? (
KW_SORT KW_BY sort+=sortItem (COMMA sort+=sortItem)*
)? windowClause? (KW_LIMIT (KW_ALL | limit=expression))? (KW_OFFSET offset=expression)?
: (KW_ORDER KW_BY orderOrSortByClause)? (KW_CLUSTER KW_BY clusterOrDistributeBy)? (
KW_DISTRIBUTE KW_BY clusterOrDistributeBy
)? (KW_SORT KW_BY orderOrSortByClause)? windowClause? limitClause? (
KW_OFFSET offset=expression
)?
;

limitClause
: KW_LIMIT (KW_ALL | limit=expression)
;

orderOrSortByClause
: sortItem (COMMA sortItem)*
;

clusterOrDistributeBy
: expression (COMMA expression)*
;

multiInsertQueryBody
Expand Down Expand Up @@ -825,11 +841,7 @@ tableArgumentPartitioning
| partition+=expression
)
)
) (
(KW_ORDER | KW_SORT) KW_BY (
((LEFT_PAREN sortItem (COMMA sortItem)* RIGHT_PAREN) | sortItem)
)
)?
) ((KW_ORDER | KW_SORT) KW_BY ( ((LEFT_PAREN orderOrSortByClause RIGHT_PAREN) | sortItem)))?
;

functionTableNamedArgumentExpression
Expand Down Expand Up @@ -1013,7 +1025,7 @@ primaryExpression
| identifier ARROW expression
| LEFT_PAREN identifier (COMMA identifier)+ RIGHT_PAREN ARROW expression
| value=primaryExpression LEFT_BRACKET index=valueExpression RIGHT_BRACKET
| identifier
| columnNamePath
| base=primaryExpression DOT fieldName=identifier
| LEFT_PAREN expression RIGHT_PAREN
| KW_EXTRACT LEFT_PAREN field=identifier KW_FROM source=valueExpression RIGHT_PAREN
Expand Down Expand Up @@ -1286,7 +1298,7 @@ windowSpec
(KW_PARTITION | KW_DISTRIBUTE) KW_BY partition+=expression (
COMMA partition+=expression
)*
)? ((KW_ORDER | KW_SORT) KW_BY sortItem (COMMA sortItem)*)?
)? ((KW_ORDER | KW_SORT) KW_BY orderOrSortByClause)?
) windowFrame? RIGHT_PAREN
;

Expand Down
6 changes: 5 additions & 1 deletion src/lib/spark/SparkSqlParser.interp

Large diffs are not rendered by default.

10,173 changes: 5,164 additions & 5,009 deletions src/lib/spark/SparkSqlParser.ts

Large diffs are not rendered by default.

44 changes: 44 additions & 0 deletions src/lib/spark/SparkSqlParserListener.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,14 @@ import { TableNameContext } from "./SparkSqlParser.js";
import { ViewNameCreateContext } from "./SparkSqlParser.js";
import { ViewNameContext } from "./SparkSqlParser.js";
import { ColumnNameContext } from "./SparkSqlParser.js";
import { ColumnNamePathContext } from "./SparkSqlParser.js";
import { ColumnNameSeqContext } from "./SparkSqlParser.js";
import { ColumnNameCreateContext } from "./SparkSqlParser.js";
import { IdentifierReferenceContext } from "./SparkSqlParser.js";
import { QueryOrganizationContext } from "./SparkSqlParser.js";
import { LimitClauseContext } from "./SparkSqlParser.js";
import { OrderOrSortByClauseContext } from "./SparkSqlParser.js";
import { ClusterOrDistributeByContext } from "./SparkSqlParser.js";
import { MultiInsertQueryBodyContext } from "./SparkSqlParser.js";
import { QueryTermContext } from "./SparkSqlParser.js";
import { QueryPrimaryContext } from "./SparkSqlParser.js";
Expand Down Expand Up @@ -1913,6 +1917,16 @@ export class SparkSqlParserListener implements ParseTreeListener {
* @param ctx the parse tree
*/
exitColumnName?: (ctx: ColumnNameContext) => void;
/**
* Enter a parse tree produced by `SparkSqlParser.columnNamePath`.
* @param ctx the parse tree
*/
enterColumnNamePath?: (ctx: ColumnNamePathContext) => void;
/**
* Exit a parse tree produced by `SparkSqlParser.columnNamePath`.
* @param ctx the parse tree
*/
exitColumnNamePath?: (ctx: ColumnNamePathContext) => void;
/**
* Enter a parse tree produced by `SparkSqlParser.columnNameSeq`.
* @param ctx the parse tree
Expand Down Expand Up @@ -1953,6 +1967,36 @@ export class SparkSqlParserListener implements ParseTreeListener {
* @param ctx the parse tree
*/
exitQueryOrganization?: (ctx: QueryOrganizationContext) => void;
/**
* Enter a parse tree produced by `SparkSqlParser.limitClause`.
* @param ctx the parse tree
*/
enterLimitClause?: (ctx: LimitClauseContext) => void;
/**
* Exit a parse tree produced by `SparkSqlParser.limitClause`.
* @param ctx the parse tree
*/
exitLimitClause?: (ctx: LimitClauseContext) => void;
/**
* Enter a parse tree produced by `SparkSqlParser.orderOrSortByClause`.
* @param ctx the parse tree
*/
enterOrderOrSortByClause?: (ctx: OrderOrSortByClauseContext) => void;
/**
* Exit a parse tree produced by `SparkSqlParser.orderOrSortByClause`.
* @param ctx the parse tree
*/
exitOrderOrSortByClause?: (ctx: OrderOrSortByClauseContext) => void;
/**
* Enter a parse tree produced by `SparkSqlParser.clusterOrDistributeBy`.
* @param ctx the parse tree
*/
enterClusterOrDistributeBy?: (ctx: ClusterOrDistributeByContext) => void;
/**
* Exit a parse tree produced by `SparkSqlParser.clusterOrDistributeBy`.
* @param ctx the parse tree
*/
exitClusterOrDistributeBy?: (ctx: ClusterOrDistributeByContext) => void;
/**
* Enter a parse tree produced by `SparkSqlParser.multiInsertQueryBody`.
* @param ctx the parse tree
Expand Down
28 changes: 28 additions & 0 deletions src/lib/spark/SparkSqlParserVisitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,14 @@ import { TableNameContext } from "./SparkSqlParser.js";
import { ViewNameCreateContext } from "./SparkSqlParser.js";
import { ViewNameContext } from "./SparkSqlParser.js";
import { ColumnNameContext } from "./SparkSqlParser.js";
import { ColumnNamePathContext } from "./SparkSqlParser.js";
import { ColumnNameSeqContext } from "./SparkSqlParser.js";
import { ColumnNameCreateContext } from "./SparkSqlParser.js";
import { IdentifierReferenceContext } from "./SparkSqlParser.js";
import { QueryOrganizationContext } from "./SparkSqlParser.js";
import { LimitClauseContext } from "./SparkSqlParser.js";
import { OrderOrSortByClauseContext } from "./SparkSqlParser.js";
import { ClusterOrDistributeByContext } from "./SparkSqlParser.js";
import { MultiInsertQueryBodyContext } from "./SparkSqlParser.js";
import { QueryTermContext } from "./SparkSqlParser.js";
import { QueryPrimaryContext } from "./SparkSqlParser.js";
Expand Down Expand Up @@ -1255,6 +1259,12 @@ export class SparkSqlParserVisitor<Result> extends AbstractParseTreeVisitor<Resu
* @return the visitor result
*/
visitColumnName?: (ctx: ColumnNameContext) => Result;
/**
* Visit a parse tree produced by `SparkSqlParser.columnNamePath`.
* @param ctx the parse tree
* @return the visitor result
*/
visitColumnNamePath?: (ctx: ColumnNamePathContext) => Result;
/**
* Visit a parse tree produced by `SparkSqlParser.columnNameSeq`.
* @param ctx the parse tree
Expand All @@ -1279,6 +1289,24 @@ export class SparkSqlParserVisitor<Result> extends AbstractParseTreeVisitor<Resu
* @return the visitor result
*/
visitQueryOrganization?: (ctx: QueryOrganizationContext) => Result;
/**
* Visit a parse tree produced by `SparkSqlParser.limitClause`.
* @param ctx the parse tree
* @return the visitor result
*/
visitLimitClause?: (ctx: LimitClauseContext) => Result;
/**
* Visit a parse tree produced by `SparkSqlParser.orderOrSortByClause`.
* @param ctx the parse tree
* @return the visitor result
*/
visitOrderOrSortByClause?: (ctx: OrderOrSortByClauseContext) => Result;
/**
* Visit a parse tree produced by `SparkSqlParser.clusterOrDistributeBy`.
* @param ctx the parse tree
* @return the visitor result
*/
visitClusterOrDistributeBy?: (ctx: ClusterOrDistributeByContext) => Result;
/**
* Visit a parse tree produced by `SparkSqlParser.multiInsertQueryBody`.
* @param ctx the parse tree
Expand Down
18 changes: 18 additions & 0 deletions src/parser/spark/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
SparkSqlParser.RULE_functionName,
SparkSqlParser.RULE_functionNameCreate,
SparkSqlParser.RULE_columnName,
SparkSqlParser.RULE_columnNamePath,
SparkSqlParser.RULE_columnNameCreate,
]);

Expand Down Expand Up @@ -105,6 +106,23 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
syntaxContextType = EntityContextType.COLUMN_CREATE;
break;
}
case SparkSqlParser.RULE_columnNamePath: {
if (
candidateRule.ruleList.includes(SparkSqlParser.RULE_whenClause) ||
candidateRule.ruleList.includes(SparkSqlParser.RULE_whereClause) ||
candidateRule.ruleList.includes(SparkSqlParser.RULE_joinRelation) ||
candidateRule.ruleList.includes(SparkSqlParser.RULE_orderOrSortByClause) ||
candidateRule.ruleList.includes(SparkSqlParser.RULE_groupByClause) ||
candidateRule.ruleList.includes(SparkSqlParser.RULE_aggregationClause) ||
candidateRule.ruleList.includes(SparkSqlParser.RULE_havingClause) ||
candidateRule.ruleList.includes(SparkSqlParser.RULE_windowClause) ||
candidateRule.ruleList.includes(SparkSqlParser.RULE_selectClause) ||
candidateRule.ruleList.includes(SparkSqlParser.RULE_limitClause) ||
candidateRule.ruleList.includes(SparkSqlParser.RULE_clusterOrDistributeBy)
) {
syntaxContextType = EntityContextType.COLUMN;
}
}
default:
break;
}
Expand Down
4 changes: 3 additions & 1 deletion src/parser/spark/sparkErrorListener.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export class SparkErrorListener extends ParseErrorListener {
[SparkSqlParser.RULE_functionName, 'function'],
[SparkSqlParser.RULE_functionNameCreate, 'function'],
[SparkSqlParser.RULE_columnName, 'column'],
[SparkSqlParser.RULE_columnNamePath, 'column'],
[SparkSqlParser.RULE_columnNameCreate, 'column'],
]);

Expand Down Expand Up @@ -48,7 +49,8 @@ export class SparkErrorListener extends ParseErrorListener {
case SparkSqlParser.RULE_tableName:
case SparkSqlParser.RULE_viewName:
case SparkSqlParser.RULE_functionName:
case SparkSqlParser.RULE_columnName: {
case SparkSqlParser.RULE_columnName:
case SparkSqlParser.RULE_columnNamePath: {
result.push(`{existing}${name}`);
break;
}
Expand Down
17 changes: 17 additions & 0 deletions test/parser/spark/errorListener.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const randomText = `dhsdansdnkla ndjnsla ndnalks`;
const sql1 = `ALTER VIEW`;
const sql2 = `SELECT * FROM `;
const sql3 = `DROP SCHEMA aaa aaa`;
const sql4 = `SELECT name, age FROM person ORDER BY length( `;

describe('SparkSQL validate invalid sql and test msg', () => {
const spark = new SparkSQL();
Expand Down Expand Up @@ -38,6 +39,14 @@ describe('SparkSQL validate invalid sql and test msg', () => {
);
});

test('validate unComplete sql4', () => {
const errors = spark.validate(sql4);
expect(errors.length).toBe(1);
expect(errors[0].message).toBe(
`Statement is incomplete, expecting an existing function or an existing column or a keyword`
);
});

test('validate random text cn', () => {
spark.locale = 'zh_CN';
const errors = spark.validate(randomText);
Expand All @@ -64,4 +73,12 @@ describe('SparkSQL validate invalid sql and test msg', () => {
expect(errors.length).toBe(1);
expect(errors[0].message).toBe(`'aaa' 在此位置无效,期望一个存在的namespace或者一个关键字`);
});

test('validate unComplete sql4', () => {
const errors = spark.validate(sql4);
expect(errors.length).toBe(1);
expect(errors[0].message).toBe(
`语句不完整,期望一个存在的function或者一个存在的column或者一个关键字`
);
});
});
12 changes: 11 additions & 1 deletion test/parser/spark/suggestion/fixtures/syntaxSuggestion.sql
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,14 @@ OPTIMIZE db.tb;

OPTIMIZE db.tb ZORDER BY ;

OPTIMIZE db.tb ZORDER BY name, i;
OPTIMIZE db.tb ZORDER BY name, i;

SELECT name, age FROM person ORDER BY length(age) LIMIT length(name);

SELECT id, CASE id WHEN 100 then 'bigger' WHEN id > 300 THEN '300' ELSE 'small' END FROM person;

INSERT OVERWRITE students PARTITION (student_id = 222222) SELECT name, address FROM persons WHERE name = "Dora Williams";

SELECT id, name, employee.deptno, deptname FROM employee FULL JOIN department ON employee.deptno = department.deptno;

SELECT city, sum(quantity) AS sum FROM dealer GROUP BY sum(city) HAVING max(quantity) > 15;
Loading

0 comments on commit ca4d102

Please sign in to comment.