From a77bd571f62a602f45c2da58ce5f52d89265646d Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Wed, 9 Sep 2020 12:14:34 +0000 Subject: Support 'id' as field name in C++ document selection lexing/parsing 'id' is normally a reserved keyword and wasn't explicitly allowed for the purpose of field name identifiers. Change `ID` to be lexed as a string token to allow for preserving the original casing when used as an identifier. --- document/src/tests/documentselectparsertest.cpp | 9 +++++-- .../src/vespa/document/select/grammar/lexer.ll | 2 +- .../src/vespa/document/select/grammar/parser.yy | 29 ++++++++++++++++------ 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/document/src/tests/documentselectparsertest.cpp b/document/src/tests/documentselectparsertest.cpp index 6fd9ab80faa..30b2bfbb1b4 100644 --- a/document/src/tests/documentselectparsertest.cpp +++ b/document/src/tests/documentselectparsertest.cpp @@ -94,9 +94,11 @@ void DocumentSelectParserTest::SetUp() Struct("usergroup.header"), Struct("usergroup.body")); builder.document(875463456, "user", - Struct("user.header"), Struct("user.body")); + Struct("user.header").addField("id", DataType::T_INT), + Struct("user.body")); builder.document(567463442, "group", - Struct("group.header"), Struct("group.body")); + Struct("group.header").addField("iD", DataType::T_INT), + Struct("group.body")); _repo = std::make_unique(builder.config()); _parser = std::make_unique(*_repo, _bucketIdFactory); @@ -1455,6 +1457,9 @@ TEST_F(DocumentSelectParserTest, special_tokens_are_allowed_as_freestanding_iden EXPECT_EQ("(== (ID id.user) (FIELD user user))", parse_to_tree("id.user == user.user")); EXPECT_EQ("(NOT (DOCTYPE group))", parse_to_tree("not group")); EXPECT_EQ("(== (ID id.group) (FIELD group group))", parse_to_tree("id.group == group.group")); + EXPECT_EQ("(== (FIELD user id) (ID id.user))", parse_to_tree("user.id == id.user")); + // Case is preserved for special ID field + EXPECT_EQ("(== (FIELD group iD) (ID id.user))", parse_to_tree("group.iD == id.user")); } TEST_F(DocumentSelectParserTest, test_can_build_field_value_from_field_expr_node) diff --git a/document/src/vespa/document/select/grammar/lexer.ll b/document/src/vespa/document/select/grammar/lexer.ll index 1222aac02a2..d52cf0db7a9 100644 --- a/document/src/vespa/document/select/grammar/lexer.ll +++ b/document/src/vespa/document/select/grammar/lexer.ll @@ -119,7 +119,6 @@ SQ_STRING \'(\\([\\tnfr']|x{HEXDIGIT}{2})|[^'\\])*\' \[{WS}*(${IDCHARS}|{DECIMAL}){WS}*\] STRING_TOKEN(FP_ARRAY_LOOKUP) /* Primary tokens are case insensitive */ -(?i:"id") NAMED_TOKEN(ID) (?i:"null") NAMED_TOKEN(NULL) (?i:"true") NAMED_TOKEN(TRUE) (?i:"false") NAMED_TOKEN(FALSE) @@ -128,6 +127,7 @@ SQ_STRING \'(\\([\\tnfr']|x{HEXDIGIT}{2})|[^'\\])*\' (?i:"not") NAMED_TOKEN(NOT) /* We expose the verbatim input as the token value, as these may also be used for identifiers... */ +(?i:"id") STRING_TOKEN(ID) (?i:"user") STRING_TOKEN(USER) (?i:"group") STRING_TOKEN(GROUP) (?i:"scheme") STRING_TOKEN(SCHEME) diff --git a/document/src/vespa/document/select/grammar/parser.yy b/document/src/vespa/document/select/grammar/parser.yy index 9d5b5825330..9e4a1d1a222 100644 --- a/document/src/vespa/document/select/grammar/parser.yy +++ b/document/src/vespa/document/select/grammar/parser.yy @@ -50,7 +50,6 @@ %token LE "<=" %token GT ">" %token LT "<" -%token ID %token NOW_FUNC /* @@ -73,7 +72,7 @@ %token FP_MAP_LOOKUP FP_ARRAY_LOOKUP %token FLOAT %token INTEGER -%token USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE +%token ID USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE %type ident mangled_ident %type bool_ @@ -84,7 +83,7 @@ %type field_spec %destructor { delete $$; } IDENTIFIER STRING FP_MAP_LOOKUP FP_ARRAY_LOOKUP -%destructor { delete $$; } USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE +%destructor { delete $$; } ID USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE %destructor { delete $$; } null_ bool_ number string doc_type ident id_arg id_spec %destructor { delete $$; } variable mangled_ident field_spec value arith_expr %destructor { delete $$; } comparison leaf logical_expr expression @@ -244,9 +243,18 @@ id_arg ; id_spec - : ID %prec NON_DOT { $$ = new IdValueNode(bucket_id_factory, "id", ""); } /* Prefer shifting instead of reducing */ - | ID "." id_arg { $$ = new IdValueNode(bucket_id_factory, "id", *steal($3)); } - | ID "." IDENTIFIER "(" ")" { $$ = new FunctionValueNode(*steal($3), std::make_unique(bucket_id_factory, "id", "")); } + : ID %prec NON_DOT { + (void)steal($1); // Explicitly discard. + $$ = new IdValueNode(bucket_id_factory, "id", ""); // Prefer shifting instead of reducing. + } + | ID "." id_arg { + (void)steal($1); // Explicitly discard. + $$ = new IdValueNode(bucket_id_factory, "id", *steal($3)); + } + | ID "." IDENTIFIER "(" ")" { + (void)steal($1); // Explicitly discard. + $$ = new FunctionValueNode(*steal($3), std::make_unique(bucket_id_factory, "id", "")); + } ; variable @@ -254,12 +262,17 @@ variable ; /* FIXME this is a horrible leftover of post-parsed fieldpath processing */ - /* At least we verify structural integrity at initial parse-time now... */ - /* Post-parsing should be replaced with an actual parse-time built AST! */ + /* At least we verify structural integrity at initial parse-time now... */ + /* Post-parsing should be replaced with an actual parse-time built AST! */ + /* This rule is only used after matching an initial valid identifier, so */ + /* we add some special casing of lexer keywords that today are allowed as */ + /* regular field names (but not as document type names). Not pretty, but */ + /* it avoids parser ambiguities. */ mangled_ident : ident { $$ = $1; } | mangled_ident FP_MAP_LOOKUP { $1->append(*steal($2)); $$ = $1; } | mangled_ident FP_ARRAY_LOOKUP { $1->append(*steal($2)); $$ = $1; } + | ID { $$ = $1; } ; field_spec -- cgit v1.2.3 From b0f3636fcc9fab70547cae12f9b6529b238cf1be Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Wed, 9 Sep 2020 14:17:16 +0200 Subject: Verify existing semantics of 'id' as field name in Java parser --- .../com/yahoo/document/select/DocumentSelectorTestCase.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/document/src/test/java/com/yahoo/document/select/DocumentSelectorTestCase.java b/document/src/test/java/com/yahoo/document/select/DocumentSelectorTestCase.java index feeac3d9da0..5e5e2394e49 100644 --- a/document/src/test/java/com/yahoo/document/select/DocumentSelectorTestCase.java +++ b/document/src/test/java/com/yahoo/document/select/DocumentSelectorTestCase.java @@ -88,8 +88,12 @@ public class DocumentSelectorTestCase { manager.registerDocumentType(new DocumentType("andornot")); manager.registerDocumentType(new DocumentType("idid")); manager.registerDocumentType(new DocumentType("usergroup")); - manager.registerDocumentType(new DocumentType("user")); - manager.registerDocumentType(new DocumentType("group")); + var userType = new DocumentType("user"); + userType.addField("id", DataType.INT); + manager.registerDocumentType(userType); + var groupType = new DocumentType("group"); + groupType.addField("iD", DataType.INT); // For checking case preservation + manager.registerDocumentType(groupType); } @Test @@ -157,6 +161,8 @@ public class DocumentSelectorTestCase { assertParse(null, "true or or_t or ortype"); assertParse(null, "user or group"); assertParse(null, "user.foo or group.bar"); + assertParse("user.id == id.user", "user.id == id.user"); + assertParse("group.iD == id.user", "group.iD == id.user"); // Casing is preserved } @Test -- cgit v1.2.3