summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-09-10 09:18:11 +0200
committerGitHub <noreply@github.com>2020-09-10 09:18:11 +0200
commit20984d06571d15e99765a4678a630e3b102c7b6c (patch)
tree2915cd824d8d10df48f539f3a90f5e3b0568c3b5
parent366162504449bc93187b6d7ce8b47f0fe99e8437 (diff)
parentc70947f3e6b49d2757ae93e4b03d3f402f712de9 (diff)
Merge pull request #14361 from vespa-engine/revert-14347-revert-14338-vekterli/support-id-as-field-name-in-cpp-docsel-parser
Revert "Revert "Support 'id' as field name in C++ document selection lexing/parsing" MERGEOK"
-rw-r--r--document/src/test/java/com/yahoo/document/select/DocumentSelectorTestCase.java10
-rw-r--r--document/src/tests/documentselectparsertest.cpp9
-rw-r--r--document/src/vespa/document/select/grammar/lexer.ll2
-rw-r--r--document/src/vespa/document/select/grammar/parser.yy29
4 files changed, 37 insertions, 13 deletions
diff --git a/document/src/test/java/com/yahoo/document/select/DocumentSelectorTestCase.java b/document/src/test/java/com/yahoo/document/select/DocumentSelectorTestCase.java
index feeac3d9da0..5e5e2394e49 100644
--- a/document/src/test/java/com/yahoo/document/select/DocumentSelectorTestCase.java
+++ b/document/src/test/java/com/yahoo/document/select/DocumentSelectorTestCase.java
@@ -88,8 +88,12 @@ public class DocumentSelectorTestCase {
manager.registerDocumentType(new DocumentType("andornot"));
manager.registerDocumentType(new DocumentType("idid"));
manager.registerDocumentType(new DocumentType("usergroup"));
- manager.registerDocumentType(new DocumentType("user"));
- manager.registerDocumentType(new DocumentType("group"));
+ var userType = new DocumentType("user");
+ userType.addField("id", DataType.INT);
+ manager.registerDocumentType(userType);
+ var groupType = new DocumentType("group");
+ groupType.addField("iD", DataType.INT); // For checking case preservation
+ manager.registerDocumentType(groupType);
}
@Test
@@ -157,6 +161,8 @@ public class DocumentSelectorTestCase {
assertParse(null, "true or or_t or ortype");
assertParse(null, "user or group");
assertParse(null, "user.foo or group.bar");
+ assertParse("user.id == id.user", "user.id == id.user");
+ assertParse("group.iD == id.user", "group.iD == id.user"); // Casing is preserved
}
@Test
diff --git a/document/src/tests/documentselectparsertest.cpp b/document/src/tests/documentselectparsertest.cpp
index 6fd9ab80faa..30b2bfbb1b4 100644
--- a/document/src/tests/documentselectparsertest.cpp
+++ b/document/src/tests/documentselectparsertest.cpp
@@ -94,9 +94,11 @@ void DocumentSelectParserTest::SetUp()
Struct("usergroup.header"),
Struct("usergroup.body"));
builder.document(875463456, "user",
- Struct("user.header"), Struct("user.body"));
+ Struct("user.header").addField("id", DataType::T_INT),
+ Struct("user.body"));
builder.document(567463442, "group",
- Struct("group.header"), Struct("group.body"));
+ Struct("group.header").addField("iD", DataType::T_INT),
+ Struct("group.body"));
_repo = std::make_unique<DocumentTypeRepo>(builder.config());
_parser = std::make_unique<select::Parser>(*_repo, _bucketIdFactory);
@@ -1455,6 +1457,9 @@ TEST_F(DocumentSelectParserTest, special_tokens_are_allowed_as_freestanding_iden
EXPECT_EQ("(== (ID id.user) (FIELD user user))", parse_to_tree("id.user == user.user"));
EXPECT_EQ("(NOT (DOCTYPE group))", parse_to_tree("not group"));
EXPECT_EQ("(== (ID id.group) (FIELD group group))", parse_to_tree("id.group == group.group"));
+ EXPECT_EQ("(== (FIELD user id) (ID id.user))", parse_to_tree("user.id == id.user"));
+ // Case is preserved for special ID field
+ EXPECT_EQ("(== (FIELD group iD) (ID id.user))", parse_to_tree("group.iD == id.user"));
}
TEST_F(DocumentSelectParserTest, test_can_build_field_value_from_field_expr_node)
diff --git a/document/src/vespa/document/select/grammar/lexer.ll b/document/src/vespa/document/select/grammar/lexer.ll
index 1222aac02a2..d52cf0db7a9 100644
--- a/document/src/vespa/document/select/grammar/lexer.ll
+++ b/document/src/vespa/document/select/grammar/lexer.ll
@@ -119,7 +119,6 @@ SQ_STRING \'(\\([\\tnfr']|x{HEXDIGIT}{2})|[^'\\])*\'
\[{WS}*(${IDCHARS}|{DECIMAL}){WS}*\] STRING_TOKEN(FP_ARRAY_LOOKUP)
/* Primary tokens are case insensitive */
-(?i:"id") NAMED_TOKEN(ID)
(?i:"null") NAMED_TOKEN(NULL)
(?i:"true") NAMED_TOKEN(TRUE)
(?i:"false") NAMED_TOKEN(FALSE)
@@ -128,6 +127,7 @@ SQ_STRING \'(\\([\\tnfr']|x{HEXDIGIT}{2})|[^'\\])*\'
(?i:"not") NAMED_TOKEN(NOT)
/* We expose the verbatim input as the token value, as these may also be used for identifiers... */
+(?i:"id") STRING_TOKEN(ID)
(?i:"user") STRING_TOKEN(USER)
(?i:"group") STRING_TOKEN(GROUP)
(?i:"scheme") STRING_TOKEN(SCHEME)
diff --git a/document/src/vespa/document/select/grammar/parser.yy b/document/src/vespa/document/select/grammar/parser.yy
index 9d5b5825330..9e4a1d1a222 100644
--- a/document/src/vespa/document/select/grammar/parser.yy
+++ b/document/src/vespa/document/select/grammar/parser.yy
@@ -50,7 +50,6 @@
%token LE "<="
%token GT ">"
%token LT "<"
-%token ID
%token NOW_FUNC
/*
@@ -73,7 +72,7 @@
%token <string_val> FP_MAP_LOOKUP FP_ARRAY_LOOKUP
%token <double_val> FLOAT
%token <i64_val> INTEGER
-%token <string_val> USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE
+%token <string_val> ID USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE
%type <string_val> ident mangled_ident
%type <abstract_node> bool_
@@ -84,7 +83,7 @@
%type <field_expr_node> field_spec
%destructor { delete $$; } IDENTIFIER STRING FP_MAP_LOOKUP FP_ARRAY_LOOKUP
-%destructor { delete $$; } USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE
+%destructor { delete $$; } ID USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE
%destructor { delete $$; } null_ bool_ number string doc_type ident id_arg id_spec
%destructor { delete $$; } variable mangled_ident field_spec value arith_expr
%destructor { delete $$; } comparison leaf logical_expr expression
@@ -244,9 +243,18 @@ id_arg
;
id_spec
- : ID %prec NON_DOT { $$ = new IdValueNode(bucket_id_factory, "id", ""); } /* Prefer shifting instead of reducing */
- | ID "." id_arg { $$ = new IdValueNode(bucket_id_factory, "id", *steal<string>($3)); }
- | ID "." IDENTIFIER "(" ")" { $$ = new FunctionValueNode(*steal<string>($3), std::make_unique<IdValueNode>(bucket_id_factory, "id", "")); }
+ : ID %prec NON_DOT {
+ (void)steal<string>($1); // Explicitly discard.
+ $$ = new IdValueNode(bucket_id_factory, "id", ""); // Prefer shifting instead of reducing.
+ }
+ | ID "." id_arg {
+ (void)steal<string>($1); // Explicitly discard.
+ $$ = new IdValueNode(bucket_id_factory, "id", *steal<string>($3));
+ }
+ | ID "." IDENTIFIER "(" ")" {
+ (void)steal<string>($1); // Explicitly discard.
+ $$ = new FunctionValueNode(*steal<string>($3), std::make_unique<IdValueNode>(bucket_id_factory, "id", ""));
+ }
;
variable
@@ -254,12 +262,17 @@ variable
;
/* FIXME this is a horrible leftover of post-parsed fieldpath processing */
- /* At least we verify structural integrity at initial parse-time now... */
- /* Post-parsing should be replaced with an actual parse-time built AST! */
+ /* At least we verify structural integrity at initial parse-time now... */
+ /* Post-parsing should be replaced with an actual parse-time built AST! */
+ /* This rule is only used after matching an initial valid identifier, so */
+ /* we add some special casing of lexer keywords that today are allowed as */
+ /* regular field names (but not as document type names). Not pretty, but */
+ /* it avoids parser ambiguities. */
mangled_ident
: ident { $$ = $1; }
| mangled_ident FP_MAP_LOOKUP { $1->append(*steal<string>($2)); $$ = $1; }
| mangled_ident FP_ARRAY_LOOKUP { $1->append(*steal<string>($2)); $$ = $1; }
+ | ID { $$ = $1; }
;
field_spec