From 6c571e1461321f331fe2db76602bb2794674c8b7 Mon Sep 17 00:00:00 2001 From: heroicNeZha <25311962+heroicNeZha@users.noreply.github.com> Date: Tue, 30 Nov 2021 15:00:46 +0800 Subject: [PATCH 1/5] feat - support chinese --- src/parser/parser.yy | 3 +- src/parser/scanner.lex | 17 ++++++- tests/tck/features/schema/Schema.feature | 58 ++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/parser/parser.yy b/src/parser/parser.yy index 3a5aab1f84b..4a105285d32 100644 --- a/src/parser/parser.yy +++ b/src/parser/parser.yy @@ -212,7 +212,7 @@ static constexpr size_t kCommentLengthLimit = 256; %token BOOL %token INTEGER %token DOUBLE -%token STRING VARIABLE LABEL IPV4 +%token STRING VARIABLE LABEL IPV4 CHINESE_LABEL %type name_label unreserved_keyword predicate_name %type expression @@ -405,6 +405,7 @@ static constexpr size_t kCommentLengthLimit = 256; name_label : LABEL { $$ = $1; } + | CHINESE_LABEL { $$ = $1; } | unreserved_keyword { $$ = $1; } ; diff --git a/src/parser/scanner.lex b/src/parser/scanner.lex index 87be1d017eb..35aacb8c3bb 100644 --- a/src/parser/scanner.lex +++ b/src/parser/scanner.lex @@ -43,7 +43,11 @@ HEX ([0-9a-fA-F]) OCT ([0-7]) IP_OCTET ([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]) - +U [\x80-\xbf] +U2 [\xc2-\xdf] +U3 [\xe0-\xef] +U4 [\xf0-\xf4] +CHINESE_LABEL ({U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U})+ %% @@ -466,6 +470,17 @@ IP_OCTET ([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]) // Must match /* */ throw GraphParser::syntax_error(*yylloc, "unterminated comment"); } +{CHINESE_LABEL} { + yylval->strval = new std::string(yytext, yyleng); + if (yylval->strval->size() > MAX_STRING) { + auto error = "Out of range of the LABEL length, " + "the max length of LABEL is " + + std::to_string(MAX_STRING) + ":"; + delete yylval->strval; + throw GraphParser::syntax_error(*yylloc, error); + } + return TokenType::CHINESE_LABEL; + } . { /** * Any other unmatched byte sequences will get us here, diff --git a/tests/tck/features/schema/Schema.feature b/tests/tck/features/schema/Schema.feature index b203ea87b62..ccdc57251c5 100644 --- a/tests/tck/features/schema/Schema.feature +++ b/tests/tck/features/schema/Schema.feature @@ -1,6 +1,7 @@ # Copyright (c) 2020 vesoft inc. All rights reserved. # # This source code is licensed under Apache 2.0 License. +@li Feature: Insert string vid of vertex and edge Scenario: insert vertex and edge test @@ -769,6 +770,63 @@ Feature: Insert string vid of vertex and edge ALTER EDGE edge_not_null_default1 CHANGE (name FIXED_STRING(10) DEFAULT 10) """ Then a ExecutionError should be raised at runtime: Invalid param! + # chinese tag and chinese prop + When executing query: + """ + CREATE TAG 队伍(名字 string); + """ + Then the execution should be successful + # show chinese tags + When executing query: + """ + SHOW TAGS + """ + Then the result should contain: + | Name | + | "队伍" | + # alter chinese tag + When executing query: + """ + ALTER TAG 队伍 ADD (类别 string); + """ + Then the execution should be successful + # desc chinese tag + When executing query: + """ + DESCRIBE TAG 队伍 + """ + Then the result should be, in any order: + | Field | Type | Null | Default | Comment | + | "名字" | "string" | "YES" | EMPTY | EMPTY | + | "类别" | "string" | "YES" | EMPTY | EMPTY | + # chinese edge and chinese prop + When executing query: + """ + CREATE EDGE 服役(); + """ + Then the execution should be successful + # show chinese edge + When executing query: + """ + SHOW EDGES; + """ + Then the result should contain: + | Name | + | "服役" | + # alter chinese edge + When executing query: + """ + ALTER EDGE 服役 ADD (时间 timestamp); + """ + Then the execution should be successful + # desc chinese edge + When executing query: + """ + DESCRIBE EDGE 服役 + """ + Then the result should be, in any order: + | Field | Type | Null | Default | Comment | + | "时间" | "timestamp" | "YES" | EMPTY | EMPTY | When executing query: """ DROP SPACE issue2009; From 00af298ca8a18d5f965a64fea48f5f9d06bec550 Mon Sep 17 00:00:00 2001 From: heroicNeZha <25311962+heroicNeZha@users.noreply.github.com> Date: Tue, 30 Nov 2021 15:03:22 +0800 Subject: [PATCH 2/5] fix - remove debug anno --- tests/tck/features/schema/Schema.feature | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/tck/features/schema/Schema.feature b/tests/tck/features/schema/Schema.feature index ccdc57251c5..458c87e03f8 100644 --- a/tests/tck/features/schema/Schema.feature +++ b/tests/tck/features/schema/Schema.feature @@ -1,7 +1,6 @@ # Copyright (c) 2020 vesoft inc. All rights reserved. # # This source code is licensed under Apache 2.0 License. -@li Feature: Insert string vid of vertex and edge Scenario: insert vertex and edge test From ed85801f3daabd6755ea6c90a2d7bc47328e7f03 Mon Sep 17 00:00:00 2001 From: heroicNeZha <25311962+heroicNeZha@users.noreply.github.com> Date: Wed, 1 Dec 2021 17:24:43 +0800 Subject: [PATCH 3/5] support `chinese` --- src/parser/scanner.lex | 4 ++-- tests/tck/features/schema/Schema.feature | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/parser/scanner.lex b/src/parser/scanner.lex index 35aacb8c3bb..53eb9d131a2 100644 --- a/src/parser/scanner.lex +++ b/src/parser/scanner.lex @@ -470,8 +470,8 @@ CHINESE_LABEL ({U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U})+ // Must match /* */ throw GraphParser::syntax_error(*yylloc, "unterminated comment"); } -{CHINESE_LABEL} { - yylval->strval = new std::string(yytext, yyleng); +\`{CHINESE_LABEL}\` { + yylval->strval = new std::string(yytext + 1, yyleng - 2); if (yylval->strval->size() > MAX_STRING) { auto error = "Out of range of the LABEL length, " "the max length of LABEL is " + diff --git a/tests/tck/features/schema/Schema.feature b/tests/tck/features/schema/Schema.feature index 458c87e03f8..92623988dde 100644 --- a/tests/tck/features/schema/Schema.feature +++ b/tests/tck/features/schema/Schema.feature @@ -772,7 +772,7 @@ Feature: Insert string vid of vertex and edge # chinese tag and chinese prop When executing query: """ - CREATE TAG 队伍(名字 string); + CREATE TAG `队伍`(`名字` string); """ Then the execution should be successful # show chinese tags @@ -786,13 +786,13 @@ Feature: Insert string vid of vertex and edge # alter chinese tag When executing query: """ - ALTER TAG 队伍 ADD (类别 string); + ALTER TAG `队伍` ADD (`类别` string); """ Then the execution should be successful # desc chinese tag When executing query: """ - DESCRIBE TAG 队伍 + DESCRIBE TAG `队伍` """ Then the result should be, in any order: | Field | Type | Null | Default | Comment | @@ -801,7 +801,7 @@ Feature: Insert string vid of vertex and edge # chinese edge and chinese prop When executing query: """ - CREATE EDGE 服役(); + CREATE EDGE `服役`(); """ Then the execution should be successful # show chinese edge @@ -815,13 +815,13 @@ Feature: Insert string vid of vertex and edge # alter chinese edge When executing query: """ - ALTER EDGE 服役 ADD (时间 timestamp); + ALTER EDGE `服役` ADD (`时间` timestamp); """ Then the execution should be successful # desc chinese edge When executing query: """ - DESCRIBE EDGE 服役 + DESCRIBE EDGE `服役` """ Then the result should be, in any order: | Field | Type | Null | Default | Comment | From 42b3ed1b1d73a6093e04e22e5d8b353d00d1013a Mon Sep 17 00:00:00 2001 From: heroicNeZha <25311962+heroicNeZha@users.noreply.github.com> Date: Thu, 2 Dec 2021 11:04:22 +0800 Subject: [PATCH 4/5] add test cases --- tests/tck/features/schema/Schema.feature | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/tck/features/schema/Schema.feature b/tests/tck/features/schema/Schema.feature index 92623988dde..13a095973de 100644 --- a/tests/tck/features/schema/Schema.feature +++ b/tests/tck/features/schema/Schema.feature @@ -769,6 +769,12 @@ Feature: Insert string vid of vertex and edge ALTER EDGE edge_not_null_default1 CHANGE (name FIXED_STRING(10) DEFAULT 10) """ Then a ExecutionError should be raised at runtime: Invalid param! + # chinese tag without quote mark + When executing query: + """ + CREATE TAG 队伍(名字 string); + """ + Then a SyntaxError should be raised at runtime: # chinese tag and chinese prop When executing query: """ From ba483c2701d6dbb395f520d25444ca666488d88f Mon Sep 17 00:00:00 2001 From: heroicNeZha <25311962+heroicNeZha@users.noreply.github.com> Date: Thu, 2 Dec 2021 13:53:50 +0800 Subject: [PATCH 5/5] fix encode bug --- tests/tck/features/schema/Schema.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tck/features/schema/Schema.feature b/tests/tck/features/schema/Schema.feature index 13a095973de..f30f79a6d27 100644 --- a/tests/tck/features/schema/Schema.feature +++ b/tests/tck/features/schema/Schema.feature @@ -772,7 +772,7 @@ Feature: Insert string vid of vertex and edge # chinese tag without quote mark When executing query: """ - CREATE TAG 队伍(名字 string); + CREATE TAG 队伍( 名字 string); """ Then a SyntaxError should be raised at runtime: # chinese tag and chinese prop