summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2017-10-16 15:40:50 +0000
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2017-11-14 13:39:49 +0000
commit5519e892e22c7f6fd8da068ff7ab876ee3161faf (patch)
tree88e5b2be5d8f265317b2a03b6b39e2395ce628bc /document
parent69fcc4a227ffb93c5a825077421d462b1427ba24 (diff)
Use new C++ document selection parser
Based on Flex/Bison and replaces old Spirit.Classic parser. New parser is pure and does not require any locking, unlike the previous implementation. This also removes parsing of the deprecated searchcolumn feature. Adds build-time dependencies on Flex and Bison.
Diffstat (limited to 'document')
-rw-r--r--document/src/tests/bucketselectortest.cpp2
-rw-r--r--document/src/tests/documentselectparsertest.cpp510
-rw-r--r--document/src/vespa/document/bucket/bucketselector.cpp26
-rw-r--r--document/src/vespa/document/select/.gitignore3
-rw-r--r--document/src/vespa/document/select/CMakeLists.txt16
-rw-r--r--document/src/vespa/document/select/branch.cpp6
-rw-r--r--document/src/vespa/document/select/cloningvisitor.cpp12
-rw-r--r--document/src/vespa/document/select/cloningvisitor.h1
-rw-r--r--document/src/vespa/document/select/constant.cpp24
-rw-r--r--document/src/vespa/document/select/constant.h9
-rw-r--r--document/src/vespa/document/select/gid_filter.cpp1
-rw-r--r--document/src/vespa/document/select/grammar/lexer.ll182
-rw-r--r--document/src/vespa/document/select/grammar/parser.yy374
-rw-r--r--document/src/vespa/document/select/node.h3
-rw-r--r--document/src/vespa/document/select/orderingselector.cpp1
-rw-r--r--document/src/vespa/document/select/parse_utils.cpp37
-rw-r--r--document/src/vespa/document/select/parse_utils.h17
-rw-r--r--document/src/vespa/document/select/parser.cpp1498
-rw-r--r--document/src/vespa/document/select/parser.h45
-rw-r--r--document/src/vespa/document/select/parsing_failed_exception.cpp9
-rw-r--r--document/src/vespa/document/select/parsing_failed_exception.h10
-rw-r--r--document/src/vespa/document/select/scanner.h21
-rw-r--r--document/src/vespa/document/select/traversingvisitor.cpp6
-rw-r--r--document/src/vespa/document/select/traversingvisitor.h1
-rw-r--r--document/src/vespa/document/select/valuenodes.cpp128
-rw-r--r--document/src/vespa/document/select/valuenodes.h90
-rw-r--r--document/src/vespa/document/select/visitor.h3
27 files changed, 1238 insertions, 1797 deletions
diff --git a/document/src/tests/bucketselectortest.cpp b/document/src/tests/bucketselectortest.cpp
index 0f8520745f1..e0857a32dba 100644
--- a/document/src/tests/bucketselectortest.cpp
+++ b/document/src/tests/bucketselectortest.cpp
@@ -85,8 +85,6 @@ void BucketSelectorTest::testSimple()
ASSERT_BUCKET_COUNT("id.bucket == 0x4000000000000258", 1u); // Bucket 600
ASSERT_BUCKET_COUNT("(testdoctype1 and id.bucket=0)", 1u);
- ASSERT_BUCKET_COUNT("searchcolumn.3 = 1", 21845u);
-
// Check that the correct buckets is found
ASSERT_BUCKET("id = \"userdoc:ns:123:foobar\"",
document::BucketId(58, 123));
diff --git a/document/src/tests/documentselectparsertest.cpp b/document/src/tests/documentselectparsertest.cpp
index c5715ae5114..db7b48cdc3a 100644
--- a/document/src/tests/documentselectparsertest.cpp
+++ b/document/src/tests/documentselectparsertest.cpp
@@ -1,6 +1,5 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
#include <cppunit/TestFixture.h>
#include <cppunit/extensions/HelperMacros.h>
#include <vespa/document/repo/configbuilder.h>
@@ -12,12 +11,16 @@
#include <vespa/document/select/visitor.h>
#include <vespa/document/select/bodyfielddetector.h>
#include <vespa/document/select/valuenode.h>
+#include <vespa/document/select/valuenodes.h>
#include <vespa/document/select/branch.h>
#include <vespa/document/select/simpleparser.h>
#include <vespa/document/select/constant.h>
#include <vespa/document/select/invalidconstant.h>
#include <vespa/document/select/doctype.h>
#include <vespa/document/select/compare.h>
+#include <vespa/document/select/parse_utils.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <limits>
using namespace document::config_builder;
@@ -34,6 +37,15 @@ class DocumentSelectParserTest : public CppUnit::TestFixture {
CPPUNIT_TEST(testThatComplexFieldValuesHaveCorrectFieldNames);
CPPUNIT_TEST(testBodyFieldDetection);
CPPUNIT_TEST(testDocumentUpdates);
+ CPPUNIT_TEST(test_syntax_error_reporting);
+ CPPUNIT_TEST(test_operator_precedence);
+ CPPUNIT_TEST(test_token_used_as_ident_preserves_casing);
+ CPPUNIT_TEST(test_ambiguous_field_spec_expression_is_handled_correctly);
+ CPPUNIT_TEST(test_can_build_field_value_from_field_expr_node);
+ CPPUNIT_TEST(test_can_build_function_call_from_field_expr_node);
+ CPPUNIT_TEST(test_function_call_on_doctype_throws_exception);
+ CPPUNIT_TEST(test_parse_utilities_handle_well_formed_input);
+ CPPUNIT_TEST(test_parse_utilities_handle_malformed_input);
CPPUNIT_TEST_SUITE_END();
BucketIdFactory _bucketIdFactory;
@@ -51,11 +63,13 @@ class DocumentSelectParserTest : public CppUnit::TestFixture {
const std::string& hstr);
std::unique_ptr<select::FieldValueNode>
- parseFieldValue(const std::string expression);
+ parseFieldValue(const std::string& expression);
template <typename ContainsType>
select::ResultList doParse(const vespalib::stringref& expr,
const ContainsType& t);
+
+ std::string parse_to_tree(const std::string& str);
public:
DocumentSelectParserTest()
@@ -88,7 +102,15 @@ public:
void testDocumentUpdates2();
void testDocumentUpdates3();
void testDocumentUpdates4();
- void testDocumentUpdates5();
+ void test_syntax_error_reporting();
+ void test_operator_precedence();
+ void test_token_used_as_ident_preserves_casing();
+ void test_ambiguous_field_spec_expression_is_handled_correctly();
+ void test_can_build_field_value_from_field_expr_node();
+ void test_can_build_function_call_from_field_expr_node();
+ void test_function_call_on_doctype_throws_exception();
+ void test_parse_utilities_handle_well_formed_input();
+ void test_parse_utilities_handle_malformed_input();
};
CPPUNIT_TEST_SUITE_REGISTRATION(DocumentSelectParserTest);
@@ -111,9 +133,9 @@ void DocumentSelectParserTest::setUp()
builder.document(-1673092522, "usergroup",
Struct("usergroup.header"),
Struct("usergroup.body"));
- _repo.reset(new DocumentTypeRepo(builder.config()));
+ _repo = std::make_unique<DocumentTypeRepo>(builder.config());
- _parser.reset(new select::Parser(*_repo, _bucketIdFactory));
+ _parser = std::make_unique<select::Parser>(*_repo, _bucketIdFactory);
}
Document::SP DocumentSelectParserTest::createDoc(
@@ -319,11 +341,45 @@ void verifyParse(const std::string& query, const char* expected = 0) {
}
}
+void DocumentSelectParserTest::test_syntax_error_reporting() {
+ createDocs();
+
+ verifyFailedParse("testdoctype1.headerval == aaa", "ParsingFailedException: "
+ "syntax error, unexpected end of input, expecting . at column 30 "
+ "when parsing selection 'testdoctype1.headerval == aaa'");
+ // TODO improve error reporting of broken escape sequences. Current error messages
+ // are not too helpful since we simply fail to parse the string token altogether.
+ verifyFailedParse("testdoctype1.headerval == \"tab\\x0notcomplete\"",
+ "ParsingFailedException: Unexpected character: '\\\"' at column 27 "
+ "when parsing selection 'testdoctype1.headerval == \"tab\\x0notcomplete\"'");
+ verifyFailedParse("testdoctype1.headerval == \"tab\\ysf\"",
+ "ParsingFailedException: Unexpected character: '\\\"' at column 27 "
+ "when parsing selection 'testdoctype1.headerval == \"tab\\ysf\"'");
+ // Test illegal operator
+ verifyFailedParse("testdoctype1.headerval <> 12", "ParsingFailedException: syntax error, "
+ "unexpected > at column 25 when parsing selection 'testdoctype1.headerval <> 12'");
+
+ // This will trigger a missing doctype error instead of syntax error, as "fal"
+ // will be reduced into a doctype rule.
+ verifyFailedParse("fal se", "ParsingFailedException: Document type 'fal' "
+ "not found at column 1 when parsing selection 'fal se'");
+
+ verifyFailedParse("mytype", "ParsingFailedException: Document type 'mytype' not found");
+
+ verifyFailedParse("mytype.foo.bar", "ParsingFailedException: Document type 'mytype' not found");
+
+ verifyFailedParse("testdoctype1 == 8", "ParsingFailedException: syntax error, unexpected ==, "
+ "expecting end of input at column 14 when parsing selection 'testdoctype1 == 8'");
+
+ verifyFailedParse("(1 + 2)", "ParsingFailedException: expected field spec, "
+ "doctype, bool or comparison at column 1 when parsing selection '(1 + 2)'");
+}
+
void DocumentSelectParserTest::testParseTerminals()
{
createDocs();
- // Test number value
+ // Test number value
verifyParse("", "true");
verifyParse("testdoctype1.headerval == 123");
verifyParse("testdoctype1.headerval == +123.53", "testdoctype1.headerval == 123.53");
@@ -332,10 +388,8 @@ void DocumentSelectParserTest::testParseTerminals()
"testdoctype1.headerval == 2.34124e+08");
verifyParse("testdoctype1.headerval == -234123.523E-3",
"testdoctype1.headerval == -234.124");
- verifyFailedParse("testdoctype1.headerval == aaa", "ParsingFailedException: "
- "Unexpected token at position 23 ('== aaa') in query "
- "'testdoctype1.headerval == aaa', at fullParse in ");
- // Test string value
+
+ // Test string value
verifyParse("testdoctype1.headerval == \"test\"");
std::unique_ptr<select::Node> node(
_parser->parse("testdoctype1.headerval == \"test\""));
@@ -345,64 +399,46 @@ void DocumentSelectParserTest::testParseTerminals()
dynamic_cast<const select::FieldValueNode&>(compnode.getLeft()));
const select::StringValueNode& vnode(
dynamic_cast<const select::StringValueNode&>(compnode.getRight()));
- /*
- CPPUNIT_ASSERT_EQUAL(vespalib::string("testdoctype1"),
- fnode.getDocType()->getName());
- */
+
CPPUNIT_ASSERT_EQUAL(vespalib::string("headerval"), fnode.getFieldName());
CPPUNIT_ASSERT_EQUAL(vespalib::string("test"), vnode.getValue());
- // Test whitespace
+ // Test whitespace
verifyParse("testdoctype1.headerval == \"te st \"");
verifyParse(" \t testdoctype1.headerval\t== \t \"test\"\t",
"testdoctype1.headerval == \"test\"");
- // Test escaping
+ // Test escaping
verifyParse("testdoctype1.headerval == \"tab\\ttest\"");
verifyParse("testdoctype1.headerval == \"tab\\x09test\"",
"testdoctype1.headerval == \"tab\\ttest\"");
verifyParse("testdoctype1.headerval == \"tab\\x055test\"");
- verifyFailedParse("testdoctype1.headerval == \"tab\\x0notcomplete\"",
- "ParsingFailedException: Unexpected token at position 23 "
- "('== \"tab\\x0') in query 'testdoctype1.headerval == \"tab\\x0notcomplete\"', "
- "at fullParse in ");
- verifyFailedParse("testdoctype1.headerval == \"tab\\ysf\"",
- "ParsingFailedException: Unexpected token at position 23 "
- "('== \"tab\\ys') in query 'testdoctype1.headerval == \"tab\\ysf\"', "
- "at fullParse in ");
node = _parser->parse("testdoctype1.headerval == \"\\tt\\x48 \\n\"");
select::Compare& escapednode(dynamic_cast<select::Compare&>(*node));
const select::StringValueNode& escval(
dynamic_cast<const select::StringValueNode&>(escapednode.getRight()));
CPPUNIT_ASSERT_EQUAL(vespalib::string("\ttH \n"), escval.getValue());
- // Test illegal operator
- verifyFailedParse("testdoctype1.headerval <> 12", "ParsingFailedException: Unexpected"
- " token at position 23 ('<> 12') in query 'testdoctype1.headerval <> 12', at");
- // Test <= <, > >=
+ // Test <= <, > >=
verifyParse("testdoctype1.headerval >= 123");
verifyParse("testdoctype1.headerval > 123");
verifyParse("testdoctype1.headerval <= 123");
verifyParse("testdoctype1.headerval < 123");
verifyParse("testdoctype1.headerval != 123");
- // Test defined
+ // Test defined
verifyParse("testdoctype1.headerval", "testdoctype1.headerval != null");
- // Test bools
- verifyParse("TRUE");
- verifyParse("FALSE");
+ // Test bools
+ verifyParse("TRUE", "true");
+ verifyParse("FALSE", "false");
verifyParse("true");
verifyParse("false");
- verifyParse("faLSe");
- verifyFailedParse("fal se", "ParsingFailedException: Unexpected token at "
- "position 4 ('se') in query 'fal se', at");
+ verifyParse("faLSe", "false");
- // Test document types
+ // Test document types
verifyParse("testdoctype1");
- verifyFailedParse("mytype", "ParsingFailedException: Document type mytype "
- "not found");
verifyParse("_test_doctype3_");
verifyParse("_test_doctype3_._only_in_child_ == 0");
- // Test document id with simple parser.
+ // Test document id with simple parser.
verifySimpleParse("id == \"userdoc:ns:mytest\"");
verifySimpleParse("id.namespace == \"myspace\"");
verifySimpleParse("id.scheme == \"userdoc\"");
@@ -411,7 +447,7 @@ void DocumentSelectParserTest::testParseTerminals()
verifySimpleParse("id.user == 1234");
verifySimpleParse("id.user == 0x12456ab", "id.user == 19158699");
- // Test document id
+ // Test document id
verifyParse("id == \"userdoc:ns:mytest\"");
verifyParse("id.namespace == \"myspace\"");
verifyParse("id.scheme == \"userdoc\"");
@@ -429,28 +465,23 @@ void DocumentSelectParserTest::testParseTerminals()
"id.bucket == -9223372036854775566");
verifyParse("id.gid == \"gid(0xd755743aea262650274d70f0)\"");
- // Test search column
- verifyParse("searchcolumn.10 == 2");
-
- // Test other operators
+ // Test other operators
verifyParse("id.scheme = \"*doc\"");
verifyParse("testdoctype1.hstringval =~ \"(john|barry|shrek)\"");
- // Verify functions
+ // Verify functions
verifyParse("id.hash() == 124");
verifyParse("id.specific.hash() == 124");
verifyParse("testdoctype1.hstringval.lowercase() == \"chang\"");
verifyParse("testdoctype1.hstringval.lowercase().hash() == 124");
- verifyFailedParse("testdoctype1 == 8", "ParsingFailedException: Unexpected token"
- " at position 13 ('== 8') in query 'testdoctype1 == 8', at fullParse in ");
verifyParse("testdoctype1.hintval > now()");
verifyParse("testdoctype1.hintval > now().abs()");
- // Value grouping
+ // Value grouping
verifyParse("(123) < (200)");
verifyParse("(\"hmm\") < (id.scheme)");
- // Arithmetics
+ // Arithmetics
verifyParse("1 + 2 > 1");
verifyParse("1 - 2 > 1");
verifyParse("1 * 2 > 1");
@@ -459,11 +490,11 @@ void DocumentSelectParserTest::testParseTerminals()
verifyParse("(1 + 2) * (4 - 2) == 1");
verifyParse("23 + 643 / 34 % 10 > 34");
- // CJK stuff
+ // CJK stuff
verifyParse("testdoctype1.hstringval = \"\xE4\xB8\xBA\xE4\xBB\x80\"",
"testdoctype1.hstringval = \"\\xe4\\xb8\\xba\\xe4\\xbb\\x80\"");
- // Strange doctype names
+ // Strange doctype names
verifyParse("notandor");
verifyParse("ornotand");
verifyParse("andornot");
@@ -475,16 +506,16 @@ void DocumentSelectParserTest::testParseBranches()
{
createDocs();
- verifyParse("TRUE or FALSE aNd FALSE oR TRUE");
- verifyParse("TRUE and FALSE or FALSE and TRUE");
- verifyParse("TRUE or FALSE and FALSE or TRUE");
- verifyParse("(TRUE or FALSE) and (FALSE or TRUE)");
+ verifyParse("TRUE or FALSE aNd FALSE oR TRUE", "true or false and false or true");
+ verifyParse("TRUE and FALSE or FALSE and TRUE", "true and false or false and true");
+ verifyParse("TRUE or FALSE and FALSE or TRUE", "true or false and false or true");
+ verifyParse("(TRUE or FALSE) and (FALSE or TRUE)", "(true or false) and (false or true)");
verifyParse("true or (not false) and not true");
- // Test number branching with node branches
+ // Test number branching with node branches
verifyParse("((243) < 300 and (\"FOO\").lowercase() == (\"foo\"))");
- // Strange doctype names
+ // Strange doctype names
verifyParse("notandor and ornotand");
verifyParse("ornotand or andornot");
verifyParse("not andornot");
@@ -554,7 +585,7 @@ void DocumentSelectParserTest::testOperators0()
std::cerr << ost.str() << "\n";
} // */
- // Check that comparison operators work.
+ // Check that comparison operators work.
PARSE("", *_doc[0], True);
PARSE("30 < 10", *_doc[0], False);
PARSE("10 < 30", *_doc[0], True);
@@ -593,7 +624,7 @@ void DocumentSelectParserTest::testOperators1()
{
createDocs();
- // Mix of types should within numbers, but otherwise not match
+ // Mix of types should within numbers, but otherwise not match
PARSE("30 < 10.2", *_doc[0], False);
PARSE("10.2 < 30", *_doc[0], True);
PARSE("30 < \"foo\"", *_doc[0], Invalid);
@@ -606,7 +637,7 @@ void DocumentSelectParserTest::testOperators1()
PARSE("14.3 == null", *_doc[0], False);
PARSE("null = 0", *_doc[0], False);
- // Field values
+ // Field values
PARSE("testdoctype1.headerval = 24", *_doc[0], True);
PARSE("testdoctype1.headerval = 24", *_doc[1], False);
PARSE("testdoctype1.headerval = 13", *_doc[0], False);
@@ -625,11 +656,11 @@ void DocumentSelectParserTest::testOperators1()
PARSE("testdoctype1.byteweightedset == 7", *_doc[1], False);
PARSE("testdoctype1.byteweightedset == 5", *_doc[1], True);
- // Document types
+ // Document types
PARSE("testdoctype1", *_doc[0], True);
PARSE("testdoctype2", *_doc[0], False);
- // Inherited doctypes
+ // Inherited doctypes
PARSE("testdoctype2", *_doc[4], True);
PARSE("testdoctype2", *_doc[3], False);
PARSE("testdoctype1", *_doc[4], True);
@@ -640,7 +671,7 @@ void DocumentSelectParserTest::testOperators2()
{
createDocs();
- // Id values
+ // Id values
PARSEI("id == \"doc:myspace:anything\"", *_doc[0], True);
PARSEI(" iD== \"doc:myspace:anything\" ", *_doc[0], True);
PARSEI("id == \"doc:myspa:nything\"", *_doc[0], False);
@@ -702,16 +733,13 @@ void DocumentSelectParserTest::testOperators3()
PARSEI("id.user = 1234", *_doc[8], True);
PARSEI("id.group == \"1234\"", *_doc[8], True);
PARSEI("id.group == \"mygroup\"", *_doc[9], True);
-
- // Searchcolumn policy
- PARSE("searchcolumn.10 == 8", *_doc[0], True);
}
void DocumentSelectParserTest::testOperators4()
{
createDocs();
- // Branch operators
+ // Branch operators
PARSEI("true and false", *_doc[0], False);
PARSEI("true and true", *_doc[0], True);
PARSEI("true or false", *_doc[0], True);
@@ -723,7 +751,7 @@ void DocumentSelectParserTest::testOperators4()
PARSEI("true and not false or false", *_doc[0], True);
PARSEI("((243 < 300) and (\"FOO\".lowercase() == \"foo\"))", *_doc[0], True);
- // Invalid branching. testdoctype1.content = 1 is invalid
+ // Invalid branching. testdoctype1.content = 1 is invalid
PARSE("testdoctype1.content = 1 and true", *_doc[0], Invalid);
PARSE("testdoctype1.content = 1 or true", *_doc[0], True);
PARSE("testdoctype1.content = 1 and false", *_doc[0], False);
@@ -738,7 +766,7 @@ void DocumentSelectParserTest::testOperators5()
{
createDocs();
- // Functions
+ // Functions
PARSE("testdoctype1.hstringval.lowercase() == \"Yet\"", *_doc[3], False);
PARSE("testdoctype1.hstringval.lowercase() == \"yet\"", *_doc[3], True);
PARSE("testdoctype1.hfloatval.lowercase() == \"yet\"", *_doc[3], Invalid);
@@ -754,7 +782,7 @@ void DocumentSelectParserTest::testOperators5()
PARSE("now() < 1311862500", *_doc[10], False);
PARSE("now() > 1611862500", *_doc[10], False);
- // Arithmetics
+ // Arithmetics
PARSEI("id.specific.hash() % 10 = 8", *_doc[0], True);
PARSEI("id.specific.hash() % 10 = 2", *_doc[0], False);
PARSEI("\"foo\" + \"bar\" = \"foobar\"", *_doc[0], True);
@@ -767,14 +795,18 @@ void DocumentSelectParserTest::testOperators6()
{
createDocs();
- // CJK
- // Assuming the characters " \ ? * is not used as part of CJK tokens
+ // CJK
+ // Assuming the characters " \ ? * is not used as part of CJK tokens
PARSE("testdoctype1.content=\"\xE4\xB8\xBA\xE4\xBB\x80\"", *_doc[3], True);
PARSE("testdoctype1.content=\"\xE4\xB7\xBA\xE4\xBB\x80\"", *_doc[3], False);
- // Structs and arrays
+ // Structs and arrays
PARSE("testdoctype1.mystruct", *_doc[0], False);
PARSE("testdoctype1.mystruct", *_doc[1], True);
+ PARSE("(testdoctype1.mystruct)", *_doc[0], False);
+ PARSE("(testdoctype1.mystruct)", *_doc[1], True);
+ PARSE("(((testdoctype1.mystruct)))", *_doc[0], False);
+ PARSE("(((testdoctype1.mystruct)))", *_doc[1], True);
PARSE("testdoctype1.mystruct", *_doc[2], False);
PARSE("testdoctype1.mystruct == testdoctype1.mystruct", *_doc[0], True);
PARSE("testdoctype1.mystruct == testdoctype1.mystruct", *_doc[1], True);
@@ -812,6 +844,7 @@ void DocumentSelectParserTest::testOperators7()
PARSE("testdoctype1.structarray", *_doc[1], True);
PARSE("testdoctype1.structarray.key == 15", *_doc[1], True);
PARSE("testdoctype1.structarray[1].key == 16", *_doc[1], True);
+ PARSE("testdoctype1.structarray[1].key", *_doc[1], True); // "key is set?" expr
PARSE("testdoctype1.structarray[1].key = 16", *_doc[1], True);
PARSE("testdoctype1.structarray.value == \"structval1\"", *_doc[0], False);
PARSE("testdoctype1.structarray[4].value == \"structval1\"", *_doc[0], False);
@@ -952,7 +985,6 @@ namespace {
void visitArithmeticValueNode(const select::ArithmeticValueNode &) override {}
void visitFunctionValueNode(const select::FunctionValueNode &) override {}
void visitIdValueNode(const select::IdValueNode &) override {}
- void visitSearchColumnValueNode(const select::SearchColumnValueNode &) override {}
void visitFieldValueNode(const select::FieldValueNode &) override {}
void visitFloatValueNode(const select::FloatValueNode &) override {}
void visitVariableValueNode(const select::VariableValueNode &) override {}
@@ -977,15 +1009,14 @@ void DocumentSelectParserTest::testVisitor()
TestVisitor v;
root->visit(v);
+
std::string expected =
- "OR(CONSTANT(true), "
- "AND(DOCTYPE(testdoctype1), "
- "AND(OR(NOT(COMPARE(id.user = 12)), "
- "COMPARE(testdoctype1.hstringval = \"ola\")), "
- "COMPARE(testdoctype1.headerval != null)"
- ")"
- ")"
- ")";
+ "OR(CONSTANT(true), "
+ "AND(AND(DOCTYPE(testdoctype1), "
+ "OR(NOT(COMPARE(id.user = 12)), "
+ "COMPARE(testdoctype1.hstringval = \"ola\"))), "
+ "COMPARE(testdoctype1.headerval != null)))";
+
CPPUNIT_ASSERT_EQUAL(expected, v.getVisitString());
}
@@ -1093,13 +1124,15 @@ void DocumentSelectParserTest::testDocumentUpdates0()
PARSEI("\"\" =~ \"\"", *_update[0], True);
PARSEI("30 = 10", *_update[0], False);
PARSEI("30 = 30", *_update[0], True);
+ PARSEI("(30 = 10)", *_update[0], False);
+ PARSEI("(30 = 30)", *_update[0], True);
}
void DocumentSelectParserTest::testDocumentUpdates1()
{
createDocs();
- // Mix of types should within numbers, but otherwise not match
+ // Mix of types should within numbers, but otherwise not match
PARSEI("30 < 10.2", *_update[0], False);
PARSEI("10.2 < 30", *_update[0], True);
PARSEI("30 < \"foo\"", *_update[0], Invalid);
@@ -1112,17 +1145,18 @@ void DocumentSelectParserTest::testDocumentUpdates1()
PARSEI("14.3 == null", *_update[0], False);
PARSEI("null = 0", *_update[0], False);
- // Field values
+ // Field values
PARSE("testdoctype1.headerval = 24", *_update[0], Invalid);
PARSE("testdoctype1.hfloatval = 2.0", *_update[0], Invalid);
PARSE("testdoctype1.content = \"bar\"", *_update[0], Invalid);
PARSE("testdoctype1.hstringval == testdoctype1.content", *_update[0], Invalid);
- // Document types
+ // Document types
PARSE("testdoctype1", *_update[0], True);
+ PARSE("(testdoctype1)", *_update[0], True);
PARSE("testdoctype2", *_update[0], False);
- // Inherited doctypes
+ // Inherited doctypes
PARSE("testdoctype2", *_update[4], True);
PARSE("testdoctype2", *_update[3], False);
PARSE("testdoctype1", *_update[4], True);
@@ -1133,7 +1167,7 @@ void DocumentSelectParserTest::testDocumentUpdates2()
{
createDocs();
- // Id values
+ // Id values
PARSEI("id == \"doc:myspace:anything\"", *_update[0], True);
PARSEI(" iD== \"doc:myspace:anything\" ", *_update[0], True);
PARSEI("id == \"doc:myspa:nything\"", *_update[0], False);
@@ -1159,7 +1193,7 @@ void DocumentSelectParserTest::testDocumentUpdates3()
{
createDocs();
- // Branch operators
+ // Branch operators
PARSEI("true and false", *_update[0], False);
PARSEI("true and true", *_update[0], True);
PARSEI("true or false", *_update[0], True);
@@ -1171,7 +1205,7 @@ void DocumentSelectParserTest::testDocumentUpdates3()
PARSEI("true and not false or false", *_update[0], True);
PARSEI("((243 < 300) and (\"FOO\".lowercase() == \"foo\"))", *_update[0], True);
- // Invalid branching. testdoctype1.content = 1 is invalid
+ // Invalid branching. testdoctype1.content = 1 is invalid
PARSE("testdoctype1.content = 1 and true", *_update[0], Invalid);
PARSE("testdoctype1.content = 1 or true", *_update[0], True);
PARSE("testdoctype1.content = 1 and false", *_update[0], False);
@@ -1186,7 +1220,7 @@ void DocumentSelectParserTest::testDocumentUpdates4()
{
createDocs();
- // Functions
+ // Functions
PARSEI("\"bar\".hash() == -2012135647395072713", *_update[0], True);
PARSEI("\"bar\".hash().abs() == 2012135647395072713", *_update[0], True);
PARSEI("null.hash() == 123", *_update[0], Invalid);
@@ -1195,7 +1229,7 @@ void DocumentSelectParserTest::testDocumentUpdates4()
PARSEI("\"foo\".hash() == 123", *_update[0], False);
PARSEI("(234).hash() == 123", *_update[0], False);
- // Arithmetics
+ // Arithmetics
PARSEI("id.specific.hash() % 10 = 8", *_update[0], True);
PARSEI("id.specific.hash() % 10 = 2", *_update[0], False);
PARSEI("\"foo\" + \"bar\" = \"foobar\"", *_update[0], True);
@@ -1221,7 +1255,7 @@ void DocumentSelectParserTest::testUtf8()
}
std::unique_ptr<select::FieldValueNode>
-DocumentSelectParserTest::parseFieldValue(const std::string expression) {
+DocumentSelectParserTest::parseFieldValue(const std::string& expression) {
return std::unique_ptr<select::FieldValueNode>(dynamic_cast<select::FieldValueNode *>(
dynamic_cast<const select::Compare &>(*_parser->parse(expression)).getLeft().clone().release()));
}
@@ -1246,4 +1280,296 @@ void DocumentSelectParserTest::testThatComplexFieldValuesHaveCorrectFieldNames()
parseFieldValue("testdoctype1.headerval.meow.meow{test}")->getRealFieldName());
}
+namespace {
+
+class OperatorVisitor : public select::Visitor {
+private:
+ std::ostringstream data;
+public:
+ void visitConstant(const select::Constant& node) override {
+ data << node;
+ }
+
+ void
+ visitInvalidConstant(const select::InvalidConstant& node) override {
+ (void) node;
+ assert(false);
+ }
+
+ void visitDocumentType(const select::DocType& node) override {
+ data << "(DOCTYPE " << node << ")";
+ }
+
+ void visitComparison(const select::Compare& node) override {
+ data << '(' << node.getOperator() << ' ';
+ node.getLeft().visit(*this);
+ data << ' ';
+ node.getRight().visit(*this);
+ data << ')';
+ }
+
+ void visitAndBranch(const select::And& node) override {
+ data << "(AND ";
+ node.getLeft().visit(*this);
+ data << " ";
+ node.getRight().visit(*this);
+ data << ")";
+ }
+
+ void visitOrBranch(const select::Or& node) override {
+ data << "(OR ";
+ node.getLeft().visit(*this);
+ data << " ";
+ node.getRight().visit(*this);
+ data << ")";
+ }
+
+ void visitNotBranch(const select::Not& node) override {
+ data << "(NOT ";
+ node.getChild().visit(*this);
+ data << ")";
+ }
+
+ void visitArithmeticValueNode(const select::ArithmeticValueNode& node) override {
+ data << '(' << node.getOperatorName() << ' ';
+ node.getLeft().visit(*this);
+ data << ' ';
+ node.getRight().visit(*this);
+ data << ')';
+ }
+ void visitFunctionValueNode(const select::FunctionValueNode& node) override {
+ data << '(' << node.getFunctionName() << ' ';
+ node.getChild().visit(*this);
+ data << ')';
+ }
+ void visitIdValueNode(const select::IdValueNode& node) override {
+ data << "(ID " << node.toString() << ')';
+ }
+ void visitFieldValueNode(const select::FieldValueNode& node) override {
+ data << "(FIELD " << node.getDocType() << ' ' << node.getFieldName() << ')';
+ }
+ void visitFloatValueNode(const select::FloatValueNode& node) override {
+ data << node.getValue();
+ }
+ void visitVariableValueNode(const select::VariableValueNode& node) override {
+ data << "(VAR " << node.getVariableName() << ')';
+ }
+ void visitIntegerValueNode(const select::IntegerValueNode& node) override {
+ data << node.getValue();
+ }
+ void visitCurrentTimeValueNode(const select::CurrentTimeValueNode&) override {}
+ void visitStringValueNode(const select::StringValueNode& str) override {
+ data << '"' << str.getValue() << '"';
+ }
+ void visitNullValueNode(const select::NullValueNode&) override {
+ data << "null";
+ }
+ void visitInvalidValueNode(const select::InvalidValueNode&) override {
+ data << "INVALID!";
+ }
+
+ std::string visit_string() { return data.str(); }
+};
+
+template <typename NodeType>
+std::string node_to_string(const NodeType& node) {
+ OperatorVisitor v;
+ node.visit(v);
+ return v.visit_string();
+}
+
+}
+
+std::string DocumentSelectParserTest::parse_to_tree(const std::string& str) {
+ std::unique_ptr<select::Node> root(_parser->parse(str));
+ return node_to_string(*root);
+}
+
+void DocumentSelectParserTest::test_operator_precedence() {
+ createDocs();
+ using namespace std::string_literals;
+
+ CPPUNIT_ASSERT_EQUAL("(AND true false)"s, parse_to_tree("true and false"));
+ CPPUNIT_ASSERT_EQUAL("(AND (NOT false) true)"s, parse_to_tree("not false and true"));
+ CPPUNIT_ASSERT_EQUAL("(NOT (AND false true))"s, parse_to_tree("not (false and true)"));
+ CPPUNIT_ASSERT_EQUAL("(NOT (DOCTYPE testdoctype1))"s, parse_to_tree("not testdoctype1"));
+ CPPUNIT_ASSERT_EQUAL("(NOT (DOCTYPE (testdoctype1)))"s, parse_to_tree("not (testdoctype1)"));
+ CPPUNIT_ASSERT_EQUAL("(NOT (DOCTYPE (testdoctype1)))"s, parse_to_tree("(not (testdoctype1))"));
+ CPPUNIT_ASSERT_EQUAL("(OR (== 1 2) (== 3 4))"s, parse_to_tree("1==2 or 3==4"));
+ CPPUNIT_ASSERT_EQUAL("(!= (+ (+ 1 2) 3) 0)"s, parse_to_tree("1+2+3 != 0"));
+ CPPUNIT_ASSERT_EQUAL("(!= (+ (+ 1.1 2.2) 3.3) 4.4)"s, parse_to_tree("1.1+2.2+3.3 != 4.4"));
+ CPPUNIT_ASSERT_EQUAL("(!= (- (- 1 2) 3) 0)"s, parse_to_tree("1-2-3 != 0"));
+ CPPUNIT_ASSERT_EQUAL("(!= (+ (+ 1 2) 3) 0)"s, parse_to_tree("1 + 2 + 3 != 0"));
+ CPPUNIT_ASSERT_EQUAL("(!= (+ 1 (* 2 3)) 0)"s, parse_to_tree("1 + 2 * 3 != 0"));
+ CPPUNIT_ASSERT_EQUAL("(!= (- (/ (* 1 2) 3) 4) 0)"s, parse_to_tree("1 * 2 / 3 - 4 != 0"));
+ CPPUNIT_ASSERT_EQUAL("(!= (/ (* 1 2) (- 3 4)) 0)"s, parse_to_tree("1 * 2 / (3 - 4) != 0"));
+ CPPUNIT_ASSERT_EQUAL("(OR (AND true (NOT (== 1 2))) false)"s,
+ parse_to_tree("true and not 1 == 2 or false"));
+ CPPUNIT_ASSERT_EQUAL("(AND (AND (AND (< 1 2) (> 3 4)) (<= 5 6)) (>= 7 8))"s,
+ parse_to_tree("1 < 2 and 3 > 4 and 5 <= 6 and 7 >= 8"));
+ CPPUNIT_ASSERT_EQUAL("(OR (AND (AND (< 1 2) (> 3 4)) (<= 5 6)) (>= 7 8))"s,
+ parse_to_tree("1 < 2 and 3 > 4 and 5 <= 6 or 7 >= 8"));
+ CPPUNIT_ASSERT_EQUAL("(OR (AND (< 1 2) (> 3 4)) (AND (<= 5 6) (>= 7 8)))"s,
+ parse_to_tree("1 < 2 and 3 > 4 or 5 <= 6 and 7 >= 8"));
+ // Unary plus is simply ignored by the parser.
+ CPPUNIT_ASSERT_EQUAL("(== 1 -2)"s, parse_to_tree("+1==-2"));
+ CPPUNIT_ASSERT_EQUAL("(== 1.23 -2.56)"s, parse_to_tree("+1.23==-2.56"));
+ CPPUNIT_ASSERT_EQUAL("(== (+ 1 2) (- 3 -4))"s, parse_to_tree("1 + +2==3 - -4"));
+ CPPUNIT_ASSERT_EQUAL("(== (+ 1 2) (- 3 -4))"s, parse_to_tree("1++2==3--4"));
+
+ // Due to the way parentheses are handled by the AST, ((foo)) always gets
+ // reduced down to (foo).
+ CPPUNIT_ASSERT_EQUAL("(DOCTYPE (testdoctype1))"s, parse_to_tree("(((testdoctype1)))"));
+ CPPUNIT_ASSERT_EQUAL("(AND (DOCTYPE (testdoctype1)) (DOCTYPE (testdoctype2)))"s,
+ parse_to_tree("((((testdoctype1))) and ((testdoctype2)))"));
+
+ CPPUNIT_ASSERT_EQUAL("(== (ID id) \"foo\")"s, parse_to_tree("id == 'foo'"));
+ CPPUNIT_ASSERT_EQUAL("(== (ID id.group) \"foo\")"s, parse_to_tree("id.group == 'foo'"));
+ // id_spec function apply
+ CPPUNIT_ASSERT_EQUAL("(== (hash (ID id)) 12345)"s, parse_to_tree("id.hash() == 12345"));
+ // Combination of id_spec function apply and arith_expr function apply
+ CPPUNIT_ASSERT_EQUAL("(== (abs (hash (ID id))) 12345)"s, parse_to_tree("id.hash().abs() == 12345"));
+}
+
+void DocumentSelectParserTest::test_token_used_as_ident_preserves_casing() {
+ createDocs();
+ using namespace std::string_literals;
+
+ // TYPE, SCHEME, ORDER etc are tokens that may also be used as identifiers
+ // without introducing parsing ambiguities. In this context their original
+ // casing should be preserved.
+ CPPUNIT_ASSERT_EQUAL("(== (VAR Type) 123)"s, parse_to_tree("$Type == 123"));
+ CPPUNIT_ASSERT_EQUAL("(== (VAR giD) 123)"s, parse_to_tree("$giD == 123"));
+ CPPUNIT_ASSERT_EQUAL("(== (VAR ORDER) 123)"s, parse_to_tree("$ORDER == 123"));
+}
+
+void DocumentSelectParserTest::test_ambiguous_field_spec_expression_is_handled_correctly() {
+ createDocs();
+ using namespace std::string_literals;
+ // In earlier revisions of LR(1)-grammar, this triggered a reduce/reduce conflict between
+ // logical_expr and arith_expr for the sequence '(' field_spec ')', which failed to
+ // parse in an expected manner. Test that we don't get regressions here.
+ CPPUNIT_ASSERT_EQUAL("(!= (FIELD testdoctype1 foo) null)"s, parse_to_tree("(testdoctype1.foo)"));
+ CPPUNIT_ASSERT_EQUAL("(AND (!= (FIELD testdoctype1 foo) null) (!= (FIELD testdoctype1 bar) null))"s,
+ parse_to_tree("(testdoctype1.foo) AND (testdoctype1.bar)"));
+}
+
+void DocumentSelectParserTest::test_can_build_field_value_from_field_expr_node() {
+ using select::FieldExprNode;
+ {
+ // Simple field expression
+ auto lhs = std::make_unique<FieldExprNode>("mydoctype");
+ auto root = std::make_unique<FieldExprNode>(std::move(lhs), "foo");
+ auto fv = root->convert_to_field_value();
+ CPPUNIT_ASSERT_EQUAL(vespalib::string("mydoctype"), fv->getDocType());
+ CPPUNIT_ASSERT_EQUAL(vespalib::string("foo"), fv->getFieldName());
+ }
+ {
+ // Nested field expression
+ auto lhs1 = std::make_unique<FieldExprNode>("mydoctype");
+ auto lhs2 = std::make_unique<FieldExprNode>(std::move(lhs1), "foo");
+ auto root = std::make_unique<FieldExprNode>(std::move(lhs2), "bar");
+ auto fv = root->convert_to_field_value();
+ CPPUNIT_ASSERT_EQUAL(vespalib::string("mydoctype"), fv->getDocType());
+ CPPUNIT_ASSERT_EQUAL(vespalib::string("foo.bar"), fv->getFieldName());
+ }
+}
+
+void DocumentSelectParserTest::test_can_build_function_call_from_field_expr_node() {
+ using select::FieldExprNode;
+ {
+ // doctype.foo.lowercase()
+ // Note that calling lowercase() directly on the doctype is not supported
+ // (see test_function_call_on_doctype_throws_exception)
+ auto lhs1 = std::make_unique<FieldExprNode>("mydoctype");
+ auto lhs2 = std::make_unique<FieldExprNode>(std::move(lhs1), "foo");
+ auto root = std::make_unique<FieldExprNode>(std::move(lhs2), "lowercase");
+ auto func = root->convert_to_function_call();
+ CPPUNIT_ASSERT_EQUAL(vespalib::string("lowercase"), func->getFunctionName());
+ // TODO vespalib::string?
+ CPPUNIT_ASSERT_EQUAL(std::string("(FIELD mydoctype foo)"), node_to_string(func->getChild()));
+ }
+}
+
+void DocumentSelectParserTest::test_function_call_on_doctype_throws_exception() {
+ using select::FieldExprNode;
+ auto lhs = std::make_unique<FieldExprNode>("mydoctype");
+ auto root = std::make_unique<FieldExprNode>(std::move(lhs), "lowercase");
+ try {
+ root->convert_to_function_call();
+ } catch (const vespalib::IllegalArgumentException& e) {
+ CPPUNIT_ASSERT_EQUAL(vespalib::string("Cannot call function 'lowercase' directly on document type"),
+ e.getMessage());
+ }
+}
+
+namespace {
+
+void check_parse_i64(vespalib::stringref str, bool expect_ok, int64_t expected_output) {
+ int64_t out = 0;
+ bool ok = select::util::parse_i64(str.data(), str.size(), out);
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Parsing did not returned expected success status for i64 input " + str, expect_ok, ok);
+ if (expect_ok) {
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Parse output not as expected for i64 input " + str, expected_output, out);
+ }
+}
+
+void check_parse_hex_i64(vespalib::stringref str, bool expect_ok, int64_t expected_output) {
+ int64_t out = 0;
+ bool ok = select::util::parse_hex_i64(str.data(), str.size(), out);
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Parsing did not returned expected success status for hex i64 input " + str, expect_ok, ok);
+ if (expect_ok) {
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Parse output not as expected for hex i64 input " + str, expected_output, out);
+ }
+}
+
+void check_parse_double(vespalib::stringref str, bool expect_ok, double expected_output) {
+ double out = 0;
+ bool ok = select::util::parse_double(str.data(), str.size(), out);
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Parsing did not returned expected success status for hex i64 input " + str, expect_ok, ok);
+ if (expect_ok) {
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Parse output not as expected for double input " + str, expected_output, out);
+ }
+}
+
+}
+
+void DocumentSelectParserTest::test_parse_utilities_handle_well_formed_input() {
+ check_parse_i64("0", true, 0);
+ check_parse_i64("1", true, 1);
+ check_parse_i64("9223372036854775807", true, INT64_MAX);
+
+ // Note: 0x prefix is _not_ included
+ check_parse_hex_i64("0", true, 0);
+ check_parse_hex_i64("1", true, 1);
+ check_parse_hex_i64("f", true, 15);
+ check_parse_hex_i64("F", true, 15);
+ check_parse_hex_i64("ffffffff", true, UINT32_MAX);
+ check_parse_hex_i64("7FFFFFFFFFFFFFFF", true, INT64_MAX);
+ // We actually parse as u64 internally, then convert
+ check_parse_hex_i64("ffffffffffffffff", true, -1);
+
+ check_parse_double("1.0", true, 1.0);
+ check_parse_double("1.", true, 1.0);
+ check_parse_double("1.79769e+308", true, 1.79769e+308); // DBL_MAX
+}
+
+void DocumentSelectParserTest::test_parse_utilities_handle_malformed_input() {
+ check_parse_i64("9223372036854775808", false, 0); // INT64_MAX + 1
+ check_parse_i64("18446744073709551615", false, 0); // UINT64_MAX
+ check_parse_i64("", false, 0);
+ check_parse_i64("bjarne", false, 0);
+ check_parse_i64("1x", false, 0);
+
+ check_parse_hex_i64("", false, 0);
+ check_parse_hex_i64("g", false, 0);
+ check_parse_hex_i64("0x1", false, 0);
+ check_parse_hex_i64("ffffffffffffffff1", false, 0);
+
+ check_parse_double("1.x", false, 0.0);
+ // TODO double outside representable range returns Inf, but we probably would
+ // like this to trigger a parse failure?
+ check_parse_double("1.79769e+309", true, std::numeric_limits<double>::infinity());
+}
+
} // document
diff --git a/document/src/vespa/document/bucket/bucketselector.cpp b/document/src/vespa/document/bucket/bucketselector.cpp
index 5ded691269a..ceb231a483c 100644
--- a/document/src/vespa/document/bucket/bucketselector.cpp
+++ b/document/src/vespa/document/bucket/bucketselector.cpp
@@ -137,27 +137,6 @@ using namespace document::select;
}
}
- void compare(const select::SearchColumnValueNode& node,
- const select::ValueNode& valnode,
- const select::Operator& op) {
- if (op == FunctionOperator::EQ || op == document::select::GlobOperator::GLOB) {
- int bucketCount = 1 << 16;
- const IntegerValueNode* val(
- dynamic_cast<const IntegerValueNode*>(&valnode));
-
- int64_t rval = val->getValue();
-
- for (int i = 0; i < bucketCount; i++) {
- int64_t column = node.getValue(BucketId(16, i));
- if (column == rval) {
- _buckets.push_back(BucketId(16, i));
- }
- }
-
- _unknown = false;
- }
- }
-
void visitComparison(const document::select::Compare& node) override {
if (node.getOperator() != document::select::FunctionOperator::EQ &&
node.getOperator() != document::select::GlobOperator::GLOB)
@@ -166,12 +145,8 @@ using namespace document::select;
}
const IdValueNode* lid(dynamic_cast<const IdValueNode*>(
&node.getLeft()));
- const SearchColumnValueNode* sc(dynamic_cast<const SearchColumnValueNode*>(
- &node.getLeft()));
if (lid) {
compare(*lid, node.getRight(), node.getOperator());
- } else if (sc) {
- compare(*sc, node.getRight(), node.getOperator());
} else {
const IdValueNode* rid(dynamic_cast<const IdValueNode*>(
&node.getRight()));
@@ -187,7 +162,6 @@ using namespace document::select;
void visitArithmeticValueNode(const ArithmeticValueNode &) override {}
void visitFunctionValueNode(const FunctionValueNode &) override {}
void visitIdValueNode(const IdValueNode &) override {}
- void visitSearchColumnValueNode(const SearchColumnValueNode &) override {}
void visitFieldValueNode(const FieldValueNode &) override {}
void visitFloatValueNode(const FloatValueNode &) override {}
void visitVariableValueNode(const VariableValueNode &) override {}
diff --git a/document/src/vespa/document/select/.gitignore b/document/src/vespa/document/select/.gitignore
index 5f004816692..919eb5c7ca9 100644
--- a/document/src/vespa/document/select/.gitignore
+++ b/document/src/vespa/document/select/.gitignore
@@ -2,3 +2,6 @@ Makefile
.depend*
.*.swp
*.So
+*.hxx
+*.cxx
+*.hh
diff --git a/document/src/vespa/document/select/CMakeLists.txt b/document/src/vespa/document/select/CMakeLists.txt
index 0e94fa0f530..eba5ddde40c 100644
--- a/document/src/vespa/document/select/CMakeLists.txt
+++ b/document/src/vespa/document/select/CMakeLists.txt
@@ -1,4 +1,14 @@
# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+find_package(BISON REQUIRED)
+find_package(FLEX REQUIRED)
+
+BISON_TARGET(DocSelParser grammar/parser.yy ${CMAKE_CURRENT_BINARY_DIR}/parser.cxx)
+FLEX_TARGET(DocSelLexer grammar/lexer.ll ${CMAKE_CURRENT_BINARY_DIR}/lexer.cxx)
+
+ADD_FLEX_BISON_DEPENDENCY(DocSelLexer DocSelParser)
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
vespa_add_library(document_select OBJECT
SOURCES
bodyfielddetector.cpp
@@ -13,7 +23,6 @@ vespa_add_library(document_select OBJECT
operator.cpp
orderingselector.cpp
orderingspecification.cpp
- parser.cpp
result.cpp
resultset.cpp
resultlist.cpp
@@ -22,6 +31,11 @@ vespa_add_library(document_select OBJECT
value.cpp
valuenode.cpp
valuenodes.cpp
+ parser.cpp
+ parse_utils.cpp
+ parsing_failed_exception.cpp
+ ${BISON_DocSelParser_OUTPUTS}
+ ${FLEX_DocSelLexer_OUTPUTS}
AFTER
document_documentconfig
)
diff --git a/document/src/vespa/document/select/branch.cpp b/document/src/vespa/document/select/branch.cpp
index 7f6ad252471..5b28fad0df8 100644
--- a/document/src/vespa/document/select/branch.cpp
+++ b/document/src/vespa/document/select/branch.cpp
@@ -9,7 +9,7 @@ namespace document {
namespace select {
And::And(std::unique_ptr<Node> left, std::unique_ptr<Node> right, const char* name)
- : Branch(name ? name : "AND"),
+ : Branch(name ? name : "and"),
_left(std::move(left)),
_right(std::move(right))
{
@@ -55,7 +55,7 @@ And::trace(const Context& context, std::ostream& out) const
}
Or::Or(std::unique_ptr<Node> left, std::unique_ptr<Node> right, const char* name)
- : Branch(name ? name : "OR"),
+ : Branch(name ? name : "or"),
_left(std::move(left)),
_right(std::move(right))
{
@@ -101,7 +101,7 @@ Or::trace(const Context& context, std::ostream& out) const
}
Not::Not(std::unique_ptr<Node> child, const char* name)
- : Branch(name ? name : "NOT"),
+ : Branch(name ? name : "not"),
_child(std::move(child))
{
assert(_child.get());
diff --git a/document/src/vespa/document/select/cloningvisitor.cpp b/document/src/vespa/document/select/cloningvisitor.cpp
index 4011cbdeea1..d695e3ec83d 100644
--- a/document/src/vespa/document/select/cloningvisitor.cpp
+++ b/document/src/vespa/document/select/cloningvisitor.cpp
@@ -162,7 +162,7 @@ CloningVisitor::visitConstant(const Constant &expr)
_priority = ConstPriority;
bool val = expr.getConstantValue();
_resultSet.add(val ? Result::True : Result::False);
- _node.reset(new Constant(val ? "true" : "false"));
+ _node.reset(new Constant(val));
}
@@ -199,16 +199,6 @@ CloningVisitor::visitIdValueNode(const IdValueNode &expr)
void
-CloningVisitor::visitSearchColumnValueNode(const SearchColumnValueNode &expr)
-{
- _constVal = false;
- ++_fieldNodes; // needs document id, thus needs document
- _valueNode = expr.clone();
- _priority = SearchColPriority;
-}
-
-
-void
CloningVisitor::visitFieldValueNode(const FieldValueNode &expr)
{
_constVal = false;
diff --git a/document/src/vespa/document/select/cloningvisitor.h b/document/src/vespa/document/select/cloningvisitor.h
index ff74af1201d..2c5f94c20a4 100644
--- a/document/src/vespa/document/select/cloningvisitor.h
+++ b/document/src/vespa/document/select/cloningvisitor.h
@@ -60,7 +60,6 @@ public:
void visitInvalidConstant(const InvalidConstant &expr) override;
void visitDocumentType(const DocType &expr) override;
void visitIdValueNode(const IdValueNode &expr) override;
- void visitSearchColumnValueNode(const SearchColumnValueNode &expr) override;
void visitFieldValueNode(const FieldValueNode &expr) override;
void visitFloatValueNode(const FloatValueNode &expr) override;
void visitVariableValueNode(const VariableValueNode &expr) override;
diff --git a/document/src/vespa/document/select/constant.cpp b/document/src/vespa/document/select/constant.cpp
index 02821180337..8d1445f40a0 100644
--- a/document/src/vespa/document/select/constant.cpp
+++ b/document/src/vespa/document/select/constant.cpp
@@ -7,28 +7,10 @@
namespace document::select {
-Constant::Constant(const vespalib::stringref & value)
- : Node(value),
- _value(false)
+Constant::Constant(bool value)
+ : Node(value ? "true" : "false"), // TODO remove required name from Node
+ _value(value)
{
- if (value.size() == 4 &&
- (value[0] & 0xdf) == 'T' &&
- (value[1] & 0xdf) == 'R' &&
- (value[2] & 0xdf) == 'U' &&
- (value[3] & 0xdf) == 'E')
- {
- _value = true;
- } else if (value.size() == 5 &&
- (value[0] & 0xdf) == 'F' &&
- (value[1] & 0xdf) == 'A' &&
- (value[2] & 0xdf) == 'L' &&
- (value[3] & 0xdf) == 'S' &&
- (value[4] & 0xdf) == 'E')
- {
- _value = false;
- } else {
- assert(false);
- }
}
ResultList
diff --git a/document/src/vespa/document/select/constant.h b/document/src/vespa/document/select/constant.h
index 08be5c95ec7..46a98ed7eaa 100644
--- a/document/src/vespa/document/select/constant.h
+++ b/document/src/vespa/document/select/constant.h
@@ -5,9 +5,8 @@
*
* @brief Class describing a constant in the select tree.
*
- * @author H�kon Humberset
+ * @author HÃ¥kon Humberset
* @date 2005-06-07
- * @version $Id$
*/
#pragma once
@@ -23,7 +22,7 @@ private:
bool _value;
public:
- explicit Constant(const vespalib::stringref & value);
+ explicit Constant(bool value);
ResultList contains(const Context&) const override {
return ResultList(Result::get(_value));
@@ -32,8 +31,8 @@ public:
ResultList trace(const Context&, std::ostream& trace) const override;
void print(std::ostream& out, bool verbose, const std::string& indent) const override;
void visit(Visitor& v) const override;
- bool getConstantValue() const { return _value; }
- Node::UP clone() const override { return wrapParens(new Constant(_name)); }
+ bool getConstantValue() const noexcept { return _value; }
+ Node::UP clone() const override { return wrapParens(new Constant(_value)); }
};
diff --git a/document/src/vespa/document/select/gid_filter.cpp b/document/src/vespa/document/select/gid_filter.cpp
index ce3045564ba..71a57a0886a 100644
--- a/document/src/vespa/document/select/gid_filter.cpp
+++ b/document/src/vespa/document/select/gid_filter.cpp
@@ -22,7 +22,6 @@ struct NoOpVisitor : Visitor {
void visitArithmeticValueNode(const ArithmeticValueNode&) override {}
void visitFunctionValueNode(const FunctionValueNode&) override {}
void visitIdValueNode(const IdValueNode&) override {}
- void visitSearchColumnValueNode(const SearchColumnValueNode&) override {}
void visitFieldValueNode(const FieldValueNode&) override {}
void visitFloatValueNode(const FloatValueNode&) override {}
void visitVariableValueNode(const VariableValueNode&) override {}
diff --git a/document/src/vespa/document/select/grammar/lexer.ll b/document/src/vespa/document/select/grammar/lexer.ll
new file mode 100644
index 00000000000..8cd5638c122
--- /dev/null
+++ b/document/src/vespa/document/select/grammar/lexer.ll
@@ -0,0 +1,182 @@
+ /* Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. */
+
+ /* We use the .*xx-suffix to denote a build-time generated file */
+%option outfile="lexer.cxx"
+%option header-file="lexer.hxx"
+
+%option c++
+ /* Uncomment to enable debug tracing of parsing */
+ /* %option debug */
+%option 8bit warn nodefault
+%option noyywrap nounput
+%option yyclass="document::select::DocSelScanner"
+
+ /* Used to track source locations, see https://github.com/bingmann/flex-bison-cpp-example/blob/master/src/scanner.ll */
+%{
+#define YY_USER_ACTION yyloc->columns(yyleng);
+%}
+
+%{
+
+#include "parser.hxx"
+#include <vespa/document/select/scanner.h>
+#include <vespa/document/select/parse_utils.h>
+#include <vespa/document/util/stringutil.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <string>
+#include <cstdlib>
+
+#undef YY_DECL
+#define YY_DECL int document::select::DocSelScanner::yylex( \
+ document::select::DocSelParser::semantic_type* yylval, \
+ document::select::DocSelParser::location_type* yyloc)
+
+using token = document::select::DocSelParser::token;
+using string = vespalib::string;
+
+// Inspired by https://coldfix.eu/2015/05/16/bison-c++11/
+
+#define YIELD_TOKEN(name, field_name, value) \
+ yylval->field_name = value; \
+ return token::T_##name;
+
+#define INT_TOKEN(name, value) YIELD_TOKEN(name, i64_val, value)
+#define STRING_TOKEN(name) YIELD_TOKEN(name, string_val, new string(yytext, yyleng))
+#define CONST_STR_TOKEN(name, value) YIELD_TOKEN(name, const_str_val, value)
+#define TAGGED_TOKEN INT_TOKEN
+
+#define NAMED_TOKEN(name) return token::T_##name;
+
+%}
+
+ /* Lexer fragments, used as part of token patterns */
+
+SIGN [+-]
+DECIMAL [0-9]+
+HEXDIGIT [0-9a-fA-F]
+HEX 0[xX]{HEXDIGIT}{1,16}
+OCTAL 0[0-7]*
+EXPONENT [eE][+-]?[0-9]+
+IDCHARS [a-zA-Z_][a-zA-Z_0-9_]*
+WS [ \f\r\t]
+
+ /* It is weird that you can't do \' inside "" and vice versa, but that's the StringUtil::unescape logic today... */
+DQ_STRING \"(\\([\\tnfr"]|x{HEXDIGIT}{2})|[^"\\])*\"
+SQ_STRING \'(\\([\\tnfr']|x{HEXDIGIT}{2})|[^'\\])*\'
+
+%%
+
+ /* Code to take place at the beginning of yylex() */
+%{
+ // TODO move to YY_USER_ACTION instead?
+ yyloc->step();
+%}
+
+ /* TODO support length suffixes? supported in JavaCC grammar, but not in legacy Spirit grammar... */
+{HEX} {
+ // TODO replace with std::from_string() once compiler support is there
+ if (!util::parse_hex_i64(yytext + 2, yyleng - 2, yylval->i64_val)) { // Skip 0[xX] prefix
+ throw_parser_syntax_error(*yyloc, "Not a valid 64-bit hex integer: " + std::string(yytext, yyleng));
+ }
+ return token::T_INTEGER;
+}
+
+ /* Sign is handled explicitly in the parser to avoid lexing ambiguities for expressions such as "1 -2" */
+{DECIMAL} {
+ if (!util::parse_i64(yytext, yyleng, yylval->i64_val)) {
+ throw_parser_syntax_error(*yyloc, "Not a valid signed 64-bit integer: " + std::string(yytext, yyleng));
+ }
+ return token::T_INTEGER;
+}
+
+ /*
+ * We use a strict definition of floats when lexing, i.e. we require a dot
+ * in order to remove ambiguities with the base 10 integer token.
+ */
+[0-9]+(\.[0-9]*){EXPONENT}?[fFdD]? {
+ if (!util::parse_double(yytext, yyleng, yylval->double_val)) {
+ throw_parser_syntax_error(*yyloc, "Not a valid floating point number: " + std::string(yytext, yyleng));
+ }
+ return token::T_FLOAT;
+}
+
+({DQ_STRING}|{SQ_STRING}) {
+ // Always slice off start and end quote chars
+ yylval->string_val = new string(yytext + 1, yyleng - 2);
+ return token::T_STRING;
+}
+
+ /* FIXME this is a syntactic hack to "flatten" fieldpath map and array lookups into a single token
+ rather than match these structurally in the parser itself. This is due to the way fieldpaths
+ are handled in the legacy AST (i.e. as strings, not structures), and this must be changed first
+ before we can fix this. */
+ /* Field path expressions do not support any other escapes than double quote char */
+ /* TODO {WS} does not include newline, do we need to support that here? */
+\{{WS}*($?{IDCHARS}|{DECIMAL}|\"([^\\\"]|\\\")*\"){WS}*\} STRING_TOKEN(FP_MAP_LOOKUP)
+\[{WS}*(${IDCHARS}|{DECIMAL}){WS}*\] STRING_TOKEN(FP_ARRAY_LOOKUP)
+
+ /* Primary tokens are case insensitive */
+(?i:"id") NAMED_TOKEN(ID)
+(?i:"null") NAMED_TOKEN(NULL)
+(?i:"true") NAMED_TOKEN(TRUE)
+(?i:"false") NAMED_TOKEN(FALSE)
+(?i:"and") NAMED_TOKEN(AND)
+(?i:"or") NAMED_TOKEN(OR)
+(?i:"not") NAMED_TOKEN(NOT)
+
+ /* We expose the verbatim input as the token value, as these may also be used for identifiers... */
+(?i:"user") STRING_TOKEN(USER)
+(?i:"group") STRING_TOKEN(GROUP)
+(?i:"scheme") STRING_TOKEN(SCHEME)
+(?i:"namespace") STRING_TOKEN(NAMESPACE)
+(?i:"specific") STRING_TOKEN(SPECIFIC)
+(?i:"bucket") STRING_TOKEN(BUCKET)
+(?i:"gid") STRING_TOKEN(GID)
+(?i:"type") STRING_TOKEN(TYPE)
+(?i:"order") STRING_TOKEN(ORDER)
+
+"now\(\)" NAMED_TOKEN(NOW_FUNC) /* This _is_ case-sensitive in the legacy parser */
+
+ /* Binary operators */
+ /* TODO INT_TOKEN with code directly from selection operator node? Or direct operator object ptr? */
+"=" NAMED_TOKEN(GLOB)
+"=~" NAMED_TOKEN(REGEX)
+"==" NAMED_TOKEN(EQ)
+"!=" NAMED_TOKEN(NE)
+">=" NAMED_TOKEN(GE)
+"<=" NAMED_TOKEN(LE)
+">" NAMED_TOKEN(GT)
+"<" NAMED_TOKEN(LT)
+
+"$" NAMED_TOKEN(DOLLAR)
+"." NAMED_TOKEN(DOT)
+"(" NAMED_TOKEN(LPAREN)
+")" NAMED_TOKEN(RPAREN)
+"," NAMED_TOKEN(COMMA)
+"+" NAMED_TOKEN(PLUS)
+"-" NAMED_TOKEN(MINUS)
+"*" NAMED_TOKEN(MULTIPLY)
+"/" NAMED_TOKEN(DIVIDE)
+"%" NAMED_TOKEN(MODULO)
+
+{IDCHARS} STRING_TOKEN(IDENTIFIER)
+
+\n {
+ yyloc->lines(yyleng);
+ yyloc->step();
+ return yytext[0];
+}
+
+{WS} {
+ yyloc->step();
+}
+
+ /*
+ * Everything that hasn't already matched is an error. Throw exception immediately with the exact
+ * char to avoid getting auto-generated error messages with "unexpected $undefined" due to the
+ * resulting token not matching any existing, explicitly named tokens.
+ */
+. { throw_parser_syntax_error(*yyloc, "Unexpected character: '" + StringUtil::escape(vespalib::string(yytext, 1)) + "'"); }
+
+%%
+
diff --git a/document/src/vespa/document/select/grammar/parser.yy b/document/src/vespa/document/select/grammar/parser.yy
new file mode 100644
index 00000000000..baf987355c9
--- /dev/null
+++ b/document/src/vespa/document/select/grammar/parser.yy
@@ -0,0 +1,374 @@
+ /* Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. */
+
+%output "parser.cxx"
+%defines "parser.hxx"
+
+ /* Skeleton implementation included as part of the generated source. Note: _not_ covered by the GPL. */
+%skeleton "lalr1.cc"
+
+%require "3.0"
+
+ /* Uncomment to enable debugging of lexer invocations */
+ /*%debug*/
+
+%locations
+
+%define parse.error verbose
+%define parse.assert
+
+%define api.token.prefix {T_}
+%define api.namespace {document::select}
+%define parser_class_name {DocSelParser}
+
+ /*
+ * Due to current Bison variant support not being move-enabled (and our AST ptrs being move-only),
+ * we have to use good old POD unions for our rule results. Note that we have to use %destructor
+ * for all ptrs to ensure cleanup.
+ */
+%union {
+ int64_t i64_val;
+ double double_val;
+ const char* const_str_val;
+ vespalib::string* string_val;
+ Constant* constant_node;
+ ValueNode* value_node;
+ FieldExprNode* field_expr_node;
+ Node* abstract_node;
+}
+
+%token END 0 "end of input"
+%token NULL
+%token TRUE
+%token FALSE
+%token AND
+%token OR
+%token NOT
+
+ /* Specify aliases for several tokens for ease of use and better error reporting */
+%token GLOB "="
+%token REGEX "=~"
+%token EQ "=="
+%token NE "!="
+%token GE ">="
+%token LE "<="
+%token GT ">"
+%token LT "<"
+%token ID
+%token NOW_FUNC
+
+ /*
+ * Tokens that we only mention by alias in the grammar rules, but which we define
+ * explicitly to improve error reporting
+ */
+%token DOLLAR "$"
+%token DOT "."
+%token LPAREN "("
+%token RPAREN ")"
+%token COMMA ","
+%token PLUS "+"
+%token MINUS "-"
+%token MULTIPLY "*"
+%token DIVIDE "/"
+%token MODULO "%"
+
+%token <string_val> IDENTIFIER
+%token <string_val> STRING
+%token <string_val> FP_MAP_LOOKUP FP_ARRAY_LOOKUP
+%token <double_val> FLOAT
+%token <i64_val> INTEGER
+%token <string_val> USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE ORDER
+
+%type <string_val> ident mangled_ident
+%type <abstract_node> bool_
+ /* TODO 'leaf' is a bad name for something that isn't a leaf... */
+%type <abstract_node> expression comparison logical_expr leaf doc_type
+%type <string_val> id_arg
+%type <value_node> number null_ value string arith_expr id_spec variable
+%type <field_expr_node> field_spec
+
+%destructor { delete $$; } IDENTIFIER STRING FP_MAP_LOOKUP FP_ARRAY_LOOKUP
+%destructor { delete $$; } USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE ORDER
+%destructor { delete $$; } null_ bool_ number string doc_type ident id_arg id_spec
+%destructor { delete $$; } variable mangled_ident field_spec value arith_expr
+%destructor { delete $$; } comparison leaf logical_expr expression
+
+%start entry
+
+%parse-param {DocSelScanner& scanner}
+%parse-param {const BucketIdFactory& bucket_id_factory}
+%parse-param {const DocumentTypeRepo& doc_type_repo}
+%parse-param {std::unique_ptr<Node>& recv_expr}
+
+ /* Generated parser header file verbatim */
+%code requires {
+
+#include "location.hh"
+#include <vespa/document/select/constant.h>
+#include <vespa/document/select/branch.h>
+#include <vespa/document/select/compare.h>
+#include <vespa/document/select/valuenodes.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <memory>
+
+namespace document {
+class BucketIdFactory;
+class DocumentTypeRepo;
+}
+
+namespace document::select {
+class DocSelScanner;
+class Node;
+class Constant;
+class ValueNode;
+}
+
+}
+
+%code {
+
+// Bison has some chunky destructors that trigger inlining warnings. Disable warning
+// for this translation unit, since we can't really do much about the code it generates.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winline"
+
+#include <vespa/document/bucket/bucketidfactory.h>
+#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/document/select/scanner.h>
+#include <vespa/document/select/constant.h>
+#include <vespa/document/select/branch.h>
+#include <vespa/document/select/compare.h>
+#include <vespa/document/select/doctype.h>
+#include <vespa/document/select/valuenodes.h>
+#include <vespa/document/util/stringutil.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <memory>
+
+using string = vespalib::string;
+
+// Wrap grabbing pointers from sub-rules in a way that nulls out the
+// stored attribute from the Bison stack. Otherwise, exception cleanup
+// in the parser code will attempt to double-delete the pointee.
+// Yes, it's not beautiful, but that's life when you're dealing with raw pointers.
+template <typename T>
+std::unique_ptr<T> steal(T*& ptr) noexcept {
+ std::unique_ptr<T> owned(ptr);
+ ptr = nullptr;
+ return owned;
+}
+
+// yylex tokenization must defer to scanner instance given to parser
+#undef yylex
+#define yylex scanner.yylex
+
+}
+
+%code provides {
+
+// This cute little indirection is to get around the syntax_error constructor
+// being defined as inline and therefore not being available outside the
+// auto-generated parser source file.
+[[noreturn]] void throw_parser_syntax_error(const document::select::DocSelParser::location_type& loc,
+ const std::string& msg);
+
+}
+
+
+%left OR
+%left AND
+%left EQ NE LT GT LE GE GLOB REGEX
+%left PLUS MINUS
+%left MULTIPLY DIVIDE
+%left MODULO /* Matches legacy parser recursive descent precedence */
+%precedence NEG
+%right UNOT
+%left NON_DOT
+%precedence DOT /* Used to give higher precedence to id.foo vs id expressions. Re: "dangling else" problem */
+
+%%
+
+null_
+ : NULL { $$ = new NullValueNode(); }
+ ;
+
+bool_
+ : TRUE { $$ = new Constant(true); }
+ | FALSE { $$ = new Constant(false); }
+ ;
+
+number
+ : INTEGER { $$ = new IntegerValueNode($1, false); }
+ | FLOAT { $$ = new FloatValueNode($1); }
+ ;
+
+string
+ : STRING { {
+ try {
+ $$ = new StringValueNode(StringUtil::unescape(*steal<string>($1)));
+ } catch (const vespalib::IllegalArgumentException& exc) {
+ throw syntax_error(@$, exc.getMessage());
+ }
+ } }
+ ;
+
+doc_type
+ : ident {
+ if (doc_type_repo.getDocumentType(*$1) == nullptr) {
+ throw syntax_error(@$, vespalib::make_string("Document type '%s' not found", $1->c_str()));
+ }
+ $$ = new DocType(*steal<string>($1));
+ }
+ ;
+
+ident
+ : IDENTIFIER { $$ = $1; }
+ | SCHEME { $$ = $1; }
+ | TYPE { $$ = $1; }
+ | NAMESPACE { $$ = $1; }
+ | SPECIFIC { $$ = $1; }
+ | BUCKET { $$ = $1; }
+ | GID { $$ = $1; }
+ | ORDER { $$ = $1; }
+ ;
+
+id_arg
+ : USER { $$ = $1; }
+ | GROUP { $$ = $1; }
+ | SCHEME { $$ = $1; }
+ | NAMESPACE { $$ = $1; }
+ | SPECIFIC { $$ = $1; }
+ | BUCKET { $$ = $1; }
+ | GID { $$ = $1; }
+ | TYPE { $$ = $1; }
+ ;
+
+id_spec
+ : ID %prec NON_DOT { $$ = new IdValueNode(bucket_id_factory, "id", ""); } /* Prefer shifting instead of reducing */
+ | ID "." id_arg { $$ = new IdValueNode(bucket_id_factory, "id", *steal<string>($3)); }
+ | ID "." IDENTIFIER "(" ")" { $$ = new FunctionValueNode(*steal<string>($3), std::make_unique<IdValueNode>(bucket_id_factory, "id", "")); }
+ | ID "." ORDER "(" INTEGER "," INTEGER ")" { $$ = new IdValueNode(bucket_id_factory, "id", *steal<string>($3), $5, $7); }
+ ;
+
+variable
+ : "$" ident { $$ = new VariableValueNode(*steal<string>($2)); }
+ ;
+
+ /* FIXME this is a horrible leftover of post-parsed fieldpath processing */
+ /* At least we verify structural integrity at initial parse-time now... */
+ /* Post-parsing should be replaced with an actual parse-time built AST! */
+mangled_ident
+ : ident { $$ = $1; }
+ | mangled_ident FP_MAP_LOOKUP { $1->append(*steal<string>($2)); $$ = $1; }
+ | mangled_ident FP_ARRAY_LOOKUP { $1->append(*steal<string>($2)); $$ = $1; }
+ ;
+
+field_spec
+ : ident "." mangled_ident {
+ if (doc_type_repo.getDocumentType(*$1) == nullptr) {
+ throw syntax_error(@$, vespalib::make_string("Document type '%s' not found", $1->c_str()));
+ }
+ $$ = new FieldExprNode(std::make_unique<FieldExprNode>(*steal<string>($1)), *steal<string>($3));
+ }
+ | field_spec "." mangled_ident { $$ = new FieldExprNode(steal<FieldExprNode>($1), *steal<string>($3)); }
+ ;
+
+value
+ : null_ { $$ = $1; }
+ | string { $$ = $1; }
+ | id_spec { $$ = $1; }
+ | variable { $$ = $1; }
+ | NOW_FUNC { $$ = new CurrentTimeValueNode(); }
+ ;
+
+arith_expr
+ : value { $$ = $1; }
+ | number { $$ = $1; }
+ /* JavaCC and legacy parsers don't support unary plus/minus for _expressions_, just for numbers. So we have to fudge this a bit. */
+ | "-" number %prec NEG {
+ if (dynamic_cast<IntegerValueNode*>($2) != nullptr) {
+ $$ = new IntegerValueNode(- static_cast<IntegerValueNode&>(*steal<ValueNode>($2)).getValue(), false);
+ } else {
+ $$ = new FloatValueNode(- dynamic_cast<FloatValueNode&>(*steal<ValueNode>($2)).getValue());
+ }
+ }
+ | "+" number %prec NEG { $$ = $2; }
+ | field_spec { $$ = steal<FieldExprNode>($1)->convert_to_field_value().release(); }
+ | field_spec "(" ")" { $$ = steal<FieldExprNode>($1)->convert_to_function_call().release(); }
+ | arith_expr "+" arith_expr { $$ = new ArithmeticValueNode(steal<ValueNode>($1), "+", steal<ValueNode>($3)); }
+ | arith_expr "-" arith_expr { $$ = new ArithmeticValueNode(steal<ValueNode>($1), "-", steal<ValueNode>($3)); }
+ | arith_expr "*" arith_expr { $$ = new ArithmeticValueNode(steal<ValueNode>($1), "*", steal<ValueNode>($3)); }
+ | arith_expr "/" arith_expr { $$ = new ArithmeticValueNode(steal<ValueNode>($1), "/", steal<ValueNode>($3)); }
+ | arith_expr "%" arith_expr { $$ = new ArithmeticValueNode(steal<ValueNode>($1), "%", steal<ValueNode>($3)); }
+ | "(" arith_expr ")" { $$ = $2; $$->setParentheses(); }
+ | arith_expr "." IDENTIFIER "(" ")" { $$ = new FunctionValueNode(*steal<string>($3), steal<ValueNode>($1)); } /* FIXME shift/reduce conflict */
+ ;
+
+comparison
+ : arith_expr EQ arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::EQ, steal<ValueNode>($3), bucket_id_factory); }
+ | arith_expr NE arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::NE, steal<ValueNode>($3), bucket_id_factory); }
+ | arith_expr GE arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::GEQ, steal<ValueNode>($3), bucket_id_factory); }
+ | arith_expr LE arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::LEQ, steal<ValueNode>($3), bucket_id_factory); }
+ | arith_expr GT arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::GT, steal<ValueNode>($3), bucket_id_factory); }
+ | arith_expr LT arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::LT, steal<ValueNode>($3), bucket_id_factory); }
+ | arith_expr GLOB arith_expr { $$ = new Compare(steal<ValueNode>($1), GlobOperator::GLOB, steal<ValueNode>($3), bucket_id_factory); }
+ | arith_expr REGEX arith_expr { $$ = new Compare(steal<ValueNode>($1), RegexOperator::REGEX, steal<ValueNode>($3), bucket_id_factory); }
+ ;
+
+leaf
+ : bool_ { $$ = $1; }
+ | comparison { $$ = $1; }
+ | doc_type { $$ = $1; }
+ | arith_expr { /* Actually field_spec, see comment below..! */
+ // Grammar-wise, we _do not_ accept arbitrary arith_exprs at this level. But the
+ // selection grammar as it stands is otherwise ambiguous with LR(1) parsing.
+ // More specifically, if we used field_spec instead of arith_expr here, the parser
+ // state machine cannot decide what to do if it has processed the sequence '(' field_spec
+ // and sees the next token of ')'. Since both logical_expr and arith_expr allows for
+ // parenthesis expression recursion, the reduce step may produce either of these and
+ // is therefore technically undefined. By using arith_expr instead for this rule, all
+ // '(' field_spec ')' sequences result in an arith_expr rule match and the reduce/reduce
+ // conflict goes away. We can then do a sneaky "run-time" type check to ensure we only
+ // get the expected node from the rule.
+ // It's not pretty, but it avoids an undefined grammar (which is much less pretty!).
+ auto node = steal<ValueNode>($1);
+ if (dynamic_cast<FieldValueNode*>(node.get()) == nullptr) {
+ throw syntax_error(@$, "expected field spec, doctype, bool or comparison");
+ }
+ // Implicit rewrite to non-null comparison node
+ $$ = new Compare(std::move(node),
+ FunctionOperator::NE,
+ std::make_unique<NullValueNode>(),
+ bucket_id_factory);
+ }
+ ;
+
+logical_expr
+ : leaf { $$ = $1; }
+ | logical_expr AND logical_expr { $$ = new And(steal<Node>($1), steal<Node>($3)); }
+ | logical_expr OR logical_expr { $$ = new Or(steal<Node>($1), steal<Node>($3)); }
+ | NOT logical_expr %prec UNOT { $$ = new Not(steal<Node>($2)); }
+ | "(" logical_expr ")" { $$ = $2; $$->setParentheses(); }
+ ;
+
+expression
+ : logical_expr { $$ = $1; }
+ ;
+
+entry
+ : expression END { recv_expr = steal<Node>($1); }
+ | END { recv_expr = std::make_unique<Constant>(true); }
+ ;
+
+%%
+
+void document::select::DocSelParser::error(const location_type& l, const std::string& what) {
+ throw syntax_error(l, what);
+}
+
+void throw_parser_syntax_error(const document::select::DocSelParser::location_type& loc, const std::string& msg) {
+ throw document::select::DocSelParser::syntax_error(loc, msg);
+}
+
+#pragma GCC diagnostic pop
diff --git a/document/src/vespa/document/select/node.h b/document/src/vespa/document/select/node.h
index eab46e824c8..83e2ea3542d 100644
--- a/document/src/vespa/document/select/node.h
+++ b/document/src/vespa/document/select/node.h
@@ -5,9 +5,8 @@
*
* @brief Base class for all nodes in the document selection tree.
*
- * @author H�kon Humberset
+ * @author HÃ¥kon Humberset
* @date 2005-06-07
- * @version $Id$
*/
#pragma once
diff --git a/document/src/vespa/document/select/orderingselector.cpp b/document/src/vespa/document/select/orderingselector.cpp
index bf8e96c0533..836647aab26 100644
--- a/document/src/vespa/document/select/orderingselector.cpp
+++ b/document/src/vespa/document/select/orderingselector.cpp
@@ -137,7 +137,6 @@ namespace {
void visitArithmeticValueNode(const ArithmeticValueNode &) override {}
void visitFunctionValueNode(const FunctionValueNode &) override {}
void visitIdValueNode(const IdValueNode &) override {}
- void visitSearchColumnValueNode(const SearchColumnValueNode &) override {}
void visitFieldValueNode(const FieldValueNode &) override {}
void visitFloatValueNode(const FloatValueNode &) override {}
void visitVariableValueNode(const VariableValueNode &) override {}
diff --git a/document/src/vespa/document/select/parse_utils.cpp b/document/src/vespa/document/select/parse_utils.cpp
new file mode 100644
index 00000000000..ab4ce2f6d4a
--- /dev/null
+++ b/document/src/vespa/document/select/parse_utils.cpp
@@ -0,0 +1,37 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "parse_utils.h"
+#include <boost/spirit/include/qi.hpp>
+
+namespace document::select::util {
+
+namespace qi = boost::spirit::qi;
+
+// TODO replace use of Spirit.Qi with std::from_string when available.
+// Note: these parsers are all pure, reentrant and without locking.
+bool parse_hex_i64(const char* str, size_t len, int64_t& out) {
+ const char* iter = str;
+ const char* end = str + len;
+ // Legacy parser parses hex numbers as u64 rather than i64 (then implicitly
+ // converts), so we do the same thing here to avoid change of semantics.
+ using u64_hex_parser = qi::uint_parser<uint64_t, 16, 1, 16>;
+ u64_hex_parser u64_hex;
+ uint64_t tmp = 0;
+ const bool ok = qi::parse(iter, end, u64_hex, tmp);
+ out = static_cast<int64_t>(tmp);
+ return (ok && (iter == end));
+}
+bool parse_i64(const char* str, size_t len, int64_t& out) {
+ const char* iter = str;
+ const char* end = str + len;
+ const bool ok = qi::parse(iter, end, qi::long_long, out);
+ return (ok && (iter == end));
+}
+bool parse_double(const char* str, size_t len, double& out) {
+ const char* iter = str;
+ const char* end = str + len;
+ const bool ok = qi::parse(iter, end, qi::double_, out);
+ return (ok && (iter == end));
+}
+
+}
diff --git a/document/src/vespa/document/select/parse_utils.h b/document/src/vespa/document/select/parse_utils.h
new file mode 100644
index 00000000000..38c36dfe94a
--- /dev/null
+++ b/document/src/vespa/document/select/parse_utils.h
@@ -0,0 +1,17 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <stdint.h>
+#include <stddef.h>
+
+namespace document::select::util {
+
+// Fast, locale-independent numeric parse helpers for Flex lexing.
+
+// For all parse_* functions, returns true if parsing is successful. False otherwise.
+// Value of `out` is undefined if return value is false.
+bool parse_hex_i64(const char* str, size_t len, int64_t& out);
+bool parse_i64(const char* str, size_t len, int64_t& out);
+bool parse_double(const char* str, size_t len, double& out);
+
+} \ No newline at end of file
diff --git a/document/src/vespa/document/select/parser.cpp b/document/src/vespa/document/select/parser.cpp
index ceaf0b0c438..9f015409011 100644
--- a/document/src/vespa/document/select/parser.cpp
+++ b/document/src/vespa/document/select/parser.cpp
@@ -1,1493 +1,33 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
#include "parser.h"
-#include "branch.h"
-#include "compare.h"
-#include "constant.h"
-#include "operator.h"
-#include "doctype.h"
-#include "valuenode.h"
-#include "simpleparser.h"
-
-#include <vespa/document/repo/documenttyperepo.h>
+#include "scanner.h"
#include <vespa/document/base/exceptions.h>
#include <vespa/document/util/stringutil.h>
-#include <vespa/vespalib/stllike/asciistream.h>
-#include <vespa/vespalib/locale/c.h>
-#include <boost/spirit/include/classic_chset.hpp>
-#include <boost/spirit/include/classic_core.hpp>
-#include <boost/spirit/include/classic_escape_char.hpp>
-#include <boost/spirit/include/classic_grammar_def.hpp>
-#include <boost/spirit/include/classic_parse_tree.hpp>
-#include <boost/spirit/include/classic_tree_to_xml.hpp>
-#include <iostream>
-#include <map>
+#include <vespa/vespalib/util/stringfmt.h>
#include <sstream>
-using boost::spirit::classic::tree_node;
-using document::DocumentTypeRepo;
-using std::unique_ptr;
-using std::cerr;
-using std::endl;
-using std::istringstream;
-using std::ostringstream;
-using vespalib::IllegalStateException;
-
-/*
- * This cannot be part of a plugin. boost contains constructs causing
- * compiler to generate calls to atexit().
- */
-
-#define parse_assert(a)
-
-namespace document {
-namespace select {
-
-VESPA_IMPLEMENT_EXCEPTION(ParsingFailedException, vespalib::Exception);
-
-Parser::Parser(const DocumentTypeRepo& repo,
- const BucketIdFactory& bucketIdFactory)
- : _repo(repo),
- _bucketIdFactory(bucketIdFactory)
-{
-}
-
-namespace {
-
-/**
- * Defines the grammar for the document selection text format.
- */
-struct DocSelectionGrammar
- : public boost::spirit::classic::grammar<DocSelectionGrammar>
-{
- /** Node identifiers (value 0 should not be used) */
- enum ids { id_nil=1, id_bool, id_number, id_string,
- id_doctype, id_fieldname, id_function, id_idarg, id_searchcolumnarg,
- id_operator, id_idspec, id_searchcolumnspec, id_fieldspec, id_value,
- id_valuefuncadd, id_valuefuncmul, id_valuefuncmod,
- id_valuegroup, id_arithmvalue,
- id_comparison, id_leaf, id_not, id_and,
- id_or, id_group, id_order, id_expression, id_variable };
-
- const DocumentTypeRepo &_repo;
- const BucketIdFactory& _bucketIdFactory;
-
- DocSelectionGrammar(const DocumentTypeRepo& repo,
- const BucketIdFactory& bucketIdFactory)
- : _repo(repo),
- _bucketIdFactory(bucketIdFactory) {}
-
- const BucketIdFactory& getBucketIdFactory() const
- { return _bucketIdFactory; }
-
- /** Grammar base types. To be able to retrieve different grammars. */
- template <typename Scanner>
- struct gram_base {
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_nil> > rule_nil;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_bool> > rule_bool;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_number> > rule_number;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_string> > rule_string;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_doctype> > rule_doctype;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_fieldname> > rule_fieldname;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_function> > rule_function;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_idarg> > rule_idarg;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_searchcolumnarg> > rule_searchcolumnarg;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_operator> > rule_operator;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_idspec> > rule_idspec;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_searchcolumnspec> > rule_searchcolumnspec;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_fieldspec> > rule_fieldspec;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_value> > rule_value;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_valuefuncadd> > rule_valuefuncadd;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_valuefuncmul> > rule_valuefuncmul;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_valuefuncmod> > rule_valuefuncmod;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_valuegroup> > rule_valuegroup;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_arithmvalue> > rule_arithmvalue;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_comparison> > rule_comparison;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_leaf> > rule_leaf;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_not> > rule_not;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_and> > rule_and;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_or> > rule_or;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_group> > rule_group;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_order> > rule_order;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_expression> > rule_expression;
- typedef typename boost::spirit::classic::rule<Scanner,
- boost::spirit::classic::parser_tag<id_variable> > rule_variable;
- typedef boost::spirit::classic::grammar_def<rule_expression,
- rule_leaf,
- rule_arithmvalue> type;
- };
-
- template <typename Scanner>
- struct definition : gram_base<Scanner>::type
- {
- typename gram_base<Scanner>::rule_nil _nil;
- typename gram_base<Scanner>::rule_bool _bool;
- typename gram_base<Scanner>::rule_number _number;
- typename gram_base<Scanner>::rule_string _string;
- typename gram_base<Scanner>::rule_doctype _doctype;
- typename gram_base<Scanner>::rule_fieldname _fieldname;
- typename gram_base<Scanner>::rule_function _function;
- typename gram_base<Scanner>::rule_idarg _idarg;
- typename gram_base<Scanner>::rule_searchcolumnarg _searchcolumnarg;
- typename gram_base<Scanner>::rule_operator _operator;
- typename gram_base<Scanner>::rule_idspec _idspec;
- typename gram_base<Scanner>::rule_searchcolumnspec _searchcolumnspec;
- typename gram_base<Scanner>::rule_fieldspec _fieldspec;
- typename gram_base<Scanner>::rule_value _value;
- typename gram_base<Scanner>::rule_valuefuncadd _valuefuncadd;
- typename gram_base<Scanner>::rule_valuefuncmul _valuefuncmul;
- typename gram_base<Scanner>::rule_valuefuncmod _valuefuncmod;
- typename gram_base<Scanner>::rule_valuegroup _valuegroup;
- typename gram_base<Scanner>::rule_arithmvalue _arithmvalue;
- typename gram_base<Scanner>::rule_comparison _comparison;
- typename gram_base<Scanner>::rule_leaf _leaf;
- typename gram_base<Scanner>::rule_not _not;
- typename gram_base<Scanner>::rule_and _and;
- typename gram_base<Scanner>::rule_or _or;
- typename gram_base<Scanner>::rule_group _group;
- typename gram_base<Scanner>::rule_order _order;
- typename gram_base<Scanner>::rule_expression _expression;
- typename gram_base<Scanner>::rule_variable _variable;
+namespace document::select {
- definition(const DocSelectionGrammar&)
- : _nil(),
- _bool(),
- _number(),
- _string(),
- _doctype(),
- _fieldname(),
- _function(),
- _idarg(),
- _operator(),
- _idspec(),
- _searchcolumnspec(),
- _fieldspec(),
- _value(),
- _valuefuncadd(),
- _valuefuncmul(),
- _valuefuncmod(),
- _valuegroup(),
- _arithmvalue(),
- _comparison(),
- _leaf(),
- _not(),
- _and(),
- _or(),
- _group(),
- _order(),
- _expression(),
- _variable()
- {
- using namespace boost::spirit::classic;
+std::unique_ptr<Node> Parser::parse(const std::string& str) const {
+ try {
+ std::istringstream ss(str);
+ DocSelScanner scanner(&ss);
- boost::spirit::classic::uint_parser<uint64_t, 16, 1, -1> hexvalue;
-
- // Initialize primitives
- _nil = lexeme_d[ as_lower_d["null"] ];
- _bool = lexeme_d[ as_lower_d["true"] | as_lower_d["false"] ];
- _number = lexeme_d[ str_p("0x") >> hexvalue ] | lexeme_d[ real_p ];
- _string = ( lexeme_d[
- ( no_node_d[ ch_p('"') ] >>
- token_node_d[ *( ~chset<>("\\\"\x00-\x1f\x7f-\xff") |
- ( '\\' >> ( ch_p('\\') | 't' | 'n' | 'f' | 'r' | '"' |
- (ch_p('x') >> xdigit_p >> xdigit_p) ) ) ) ] >>
- no_node_d[ ch_p('"') ] ) |
- ( no_node_d[ ch_p('\'') ] >>
- token_node_d[ *( ~chset<>("\\'\x00-\x1f\x7f-\xff") |
- ( '\\' >> ( ch_p('\\') | 't' | 'n' | 'f' | 'r' | '\'' |
- (ch_p('x') >> xdigit_p >> xdigit_p) ) ) ) ] >>
- no_node_d[ ch_p('\'') ] )
- ] );
- _doctype = lexeme_d[ token_node_d[ chset<>("_A-Za-z")
- >> *(chset<>("_A-Za-z0-9")) ]];
- _fieldname = lexeme_d[ token_node_d[chset<>("_A-Za-z")
- >> *(chset<>("_A-Za-z0-9{}[]$"))
- ]];
- _function = lexeme_d[ token_node_d[ chset<>("A-Za-z")
- >> *(chset<>("A-Za-z0-9")) ]
- >> no_node_d[ str_p("()") ] ];
-
- _order = as_lower_d["order"]
- >> no_node_d[ ch_p('(') ]
- >> _number
- >> no_node_d[ ch_p(',') ]
- >> _number
- >> no_node_d[ ch_p(')') ];
-
- _idarg = (as_lower_d[ "scheme"] | as_lower_d[ "namespace"] |
- as_lower_d[ "specific" ] | as_lower_d[ "user" ] |
- as_lower_d[ "group" ] | as_lower_d[ "bucket" ] |
- as_lower_d[ "gid" ] | as_lower_d["type"] | _order);
-
- _searchcolumnarg = lexeme_d[ token_node_d[ *(chset<>("_A-Za-z0-9")) ]];
- _operator = (str_p(">=") | ">" | "==" | "=~" | "="
- | "<=" | "<" | "!=");
- // Derived
- _idspec = as_lower_d["id"]
- >> !(no_node_d[ ch_p('.') ] >> _idarg);
- _searchcolumnspec = as_lower_d["searchcolumn"]
- >> !(no_node_d[ ch_p('.') ] >> _searchcolumnarg);
- _fieldspec = _doctype
- >> +( no_node_d[ ch_p('.') ] >> (_function | _fieldname));
- _variable = lexeme_d[ token_node_d[chset<>("$")
- >> *(chset<>("A-Za-z0-9"))
- ]];
- _value = (_valuegroup | _function | _nil | _number | _string
- | _idspec | _searchcolumnspec | _fieldspec | _variable)
- >> *(no_node_d[ ch_p('.') ] >> _function);
- _valuefuncmod = (_valuegroup | _value)
- >> +( ch_p('%')
- >> (_valuegroup | _value) );
- _valuefuncmul = (_valuefuncmod | _valuegroup | _value)
- >> +( (ch_p('*') | ch_p('/'))
- >> (_valuefuncmod | _valuegroup | _value));
- _valuefuncadd
- = (_valuefuncmul | _valuefuncmod | _valuegroup | _value)
- >> +((ch_p('+') | ch_p('-'))
- >> (_valuefuncmul | _valuefuncmod | _valuegroup |
- _value));
- _valuegroup = no_node_d[ ch_p('(') ] >> _arithmvalue
- >> no_node_d[ ch_p(')') ]
- >> *(no_node_d[ ch_p('.') ] >> _function);
- _arithmvalue = (_valuefuncadd | _valuefuncmul | _valuefuncmod
- | _valuegroup | _value);
- _comparison = _arithmvalue >> _operator >> _arithmvalue;
- _leaf = _bool | _comparison | _fieldspec | _doctype;
-
- _not = (as_lower_d["not"] >> _group)
- | (lexeme_d[ as_lower_d["not"] >> no_node_d[ space_p ] ] >> _leaf);
- _and = (_not | _group | _leaf)
- >> as_lower_d["and"] >> (_and | _not | _group | _leaf);
- _or = (_and | _not | _group | _leaf)
- >> as_lower_d["or"] >> (_or | _and | _not | _group | _leaf);
- _group = no_node_d[ ch_p('(') ]
- >> (_or | _and | _not | _group | _leaf)
- >> no_node_d[ ch_p(')') ];
-
- _expression = !(_or | _and | _not | _group | _leaf) >> end_p;
-
- this->start_parsers(_expression, _leaf, _arithmvalue);
+ std::unique_ptr<Node> root;
+ DocSelParser parser(scanner, _bucket_id_factory, _doc_type_repo, root);
+ if (parser.parse() != 0) {
+ throw ParsingFailedException(
+ vespalib::make_string("Unknown parse failure while parsing selection '%s'", str.c_str()),
+ VESPA_STRLOC);
}
- };
-
-};
-
-template<typename T>
-std::unique_ptr<Node>
-parseTree(DocSelectionGrammar& grammar, tree_node<T>& root) {
- return parseNode(grammar, root);
-}
-
-template<typename T>
-std::unique_ptr<Node>
-parseNode(DocSelectionGrammar& grammar, tree_node<T>& node) {
- switch (node.value.id().to_long()) {
- case DocSelectionGrammar::id_or:
- return parseOr(grammar, node);
- case DocSelectionGrammar::id_and:
- return parseAnd(grammar, node);
- case DocSelectionGrammar::id_not:
- return parseNot(grammar, node);
- case DocSelectionGrammar::id_group:
- {
- std::unique_ptr<Node> n(parseNode(grammar, node.children[0]));
- n->setParentheses();
- return n;
- }
- case DocSelectionGrammar::id_leaf:
- case DocSelectionGrammar::id_value:
- parse_assert(node.children.size() == 1);
- return parseNode(grammar, node.children[0]);
- case DocSelectionGrammar::id_expression:
- if (node.children.size() == 1) {
- return parseNode(grammar, node.children[0]);
- }
- parse_assert(node.children.size() == 0);
- return std::unique_ptr<Node>(new Constant("true"));
- case DocSelectionGrammar::id_bool:
- return parseBool(grammar, node);
- case DocSelectionGrammar::id_comparison:
- return parseComparison(grammar, node);
- case DocSelectionGrammar::id_fieldspec:
- return parseFieldSpec(grammar, node);
- case DocSelectionGrammar::id_doctype:
- return parseDocType(grammar, node);
- }
- vespalib::asciistream ost;
- ost << "Received unhandled nodetype "
- << node.value.id().to_long() << " in parseNode()\n";
- throw IllegalStateException(ost.str(), VESPA_STRLOC);
-}
-
-template<typename T>
-std::unique_ptr<Node>
-parseOr(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_or);
- parse_assert(node.children.size() == 3);
- vespalib::string op(node.children[1].value.begin(),
- node.children[1].value.end());
- return std::unique_ptr<Node>(new Or(
- parseNode(grammar, node.children[0]),
- parseNode(grammar, node.children[2]),
- op.c_str()));
-}
-
-template<typename T>
-std::unique_ptr<Node>
-parseAnd(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_and);
- parse_assert(node.children.size() == 3);
- vespalib::string op(node.children[1].value.begin(),
- node.children[1].value.end());
- return std::unique_ptr<Node>(new And(
- parseNode(grammar, node.children[0]),
- parseNode(grammar, node.children[2]),
- op.c_str()));
-}
-
-template<typename T>
-std::unique_ptr<Node>
-parseNot(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_not);
- parse_assert(node.children.size() == 2);
- vespalib::string op(node.children[0].value.begin(),
- node.children[0].value.end());
- return std::unique_ptr<Node>(new Not(
- parseNode(grammar, node.children[1]), op.c_str()));
-}
-
-template<typename T>
-std::unique_ptr<Node>
-parseBool(DocSelectionGrammar& grammar, tree_node<T>& node) {
- (void) grammar;
- parse_assert(node.value.id().to_long() == grammar.id_bool);
- parse_assert(node.children.size() == 1);
- parse_assert(node.children[0].value.id().to_long() == grammar.id_bool);
- parse_assert(node.children[0].children.size() == 0);
- vespalib::string s(node.children[0].value.begin(), node.children[0].value.end());
- return std::unique_ptr<Node>(new Constant(s));
-}
-
-template<typename T>
-std::unique_ptr<Node>
-parseComparison(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_comparison);
- parse_assert(node.children.size() == 3);
- parse_assert(node.children[1].children.size() == 1);
- vespalib::string op(node.children[1].children[0].value.begin(),
- node.children[1].children[0].value.end());
- return std::unique_ptr<Node>(new Compare(
- parseArithmValue(grammar, node.children[0]),
- Operator::get(op),
- parseArithmValue(grammar, node.children[2]),
- grammar.getBucketIdFactory()));
-}
-
-template<typename T>
-std::unique_ptr<Node>
-parseFieldSpec(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_fieldspec);
- return std::unique_ptr<Node>(new Compare(
- parseFieldSpecValue(grammar, node),
- Operator::get("!="),
- std::unique_ptr<ValueNode>(new NullValueNode("null")),
- grammar.getBucketIdFactory()));
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseVariable(DocSelectionGrammar& grammar, tree_node<T>& node) {
- (void) grammar;
- parse_assert(node.value.id().to_long() == grammar.id_variable);
- vespalib::string varName(node.children[0].value.begin(),
- node.children[0].value.end());
- return std::unique_ptr<ValueNode>(new VariableValueNode(varName.substr(1)));
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseGlobValueFunction(DocSelectionGrammar& grammar, tree_node<T>& node) {
- (void) grammar;
- parse_assert(node.value.id().to_long() == grammar.id_function);
- vespalib::string varName(node.children[0].value.begin(),
- node.children[0].value.end());
- if (varName == "now") {
- return std::unique_ptr<ValueNode>(new CurrentTimeValueNode);
+ return root;
+ } catch (const DocSelParser::syntax_error& err) {
+ throw ParsingFailedException(
+ vespalib::make_string("%s at column %u when parsing selection '%s'",
+ err.what(), err.location.begin.column, str.c_str()),
+ VESPA_STRLOC);
}
- throw ParsingFailedException("Unexpected function name '" + varName
- + "' found.", VESPA_STRLOC);
}
-template<typename T>
-std::unique_ptr<Node>
-parseDocType(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_doctype);
- parse_assert(node.children.size() == 1);
- parse_assert(node.children[0].value.id().to_long() == grammar.id_doctype);
- parse_assert(node.children[0].children.size() == 0);
- vespalib::string doctype(node.children[0].value.begin(),
- node.children[0].value.end());
- // Verify existance of any version of document
- if (!grammar._repo.getDocumentType(doctype)) {
- throw ParsingFailedException("Document type " + doctype + " not found",
- VESPA_STRLOC);
- }
- return std::unique_ptr<Node>(new DocType(doctype));
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-addFunctions(DocSelectionGrammar& grammar, tree_node<T>& node,
- std::unique_ptr<ValueNode> src, uint32_t index)
-{
- (void) grammar;
- while (index < node.children.size()) {
- parse_assert(node.children[index].value.id().to_long()
- == grammar.id_function);
- vespalib::string func(node.children[index].children[0].value.begin(),
- node.children[index].children[0].value.end());
- std::unique_ptr<ValueNode> fnode(new FunctionValueNode(func, std::move(src)));
- src = std::move(fnode);
- ++index;
- }
- return std::move(src);
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseArithmValue(DocSelectionGrammar& grammar, tree_node<T>& node) {
- switch (node.value.id().to_long()) {
- case DocSelectionGrammar::id_arithmvalue:
- parse_assert(node.children.size() == 1);
- return parseArithmValue(grammar, node.children[0]);
- case DocSelectionGrammar::id_value:
- return parseValue(grammar, node);
- case DocSelectionGrammar::id_valuegroup:
- return parseValueGroup(grammar, node);
- case DocSelectionGrammar::id_valuefuncadd:
- case DocSelectionGrammar::id_valuefuncmul:
- case DocSelectionGrammar::id_valuefuncmod:
- return parseValueArithmetics(grammar, node);
- }
- vespalib::asciistream ost;
- ost << "Received unhandled nodetype "
- << node.value.id().to_long()
- << " in parseArithmValue()\n";
- throw IllegalStateException(ost.str(), VESPA_STRLOC);
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseValueArithmetics(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.children.size() >= 3 && node.children.size() % 2 == 1);
- std::unique_ptr<ValueNode> lhs(parseArithmValue(grammar, node.children[0]));
- for (unsigned int i = 1; i < node.children.size(); i += 2) {
- vespalib::string op(node.children[i].value.begin(),
- node.children[i].value.end());
- std::unique_ptr<ValueNode> rhs(parseArithmValue(grammar,
- node.children[i + 1]));
- std::unique_ptr<ValueNode> res(
- new ArithmeticValueNode(std::move(lhs), op, std::move(rhs)));
- lhs = std::move(res);
- }
- return lhs;
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseValueGroup(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_valuegroup);
- parse_assert(node.children.size() >= 1);
- std::unique_ptr<ValueNode> result(
- parseArithmValue(grammar, node.children[0]));
- result->setParentheses();
- return addFunctions(grammar, node, std::move(result), 1);
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseValue(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_value);
- parse_assert(node.children.size() >= 1);
- std::unique_ptr<ValueNode> result;
- switch (node.children[0].value.id().to_long()) {
- case DocSelectionGrammar::id_nil:
- result = parseNilValue(grammar, node.children[0]);
- break;
- case DocSelectionGrammar::id_idspec:
- result = parseIdSpecValue(grammar, node.children[0]);
- break;
- case DocSelectionGrammar::id_searchcolumnspec:
- result = parseSearchColumnSpecValue(grammar, node.children[0]);
- break;
- case DocSelectionGrammar::id_fieldspec:
- result = parseFieldSpecValue(grammar, node.children[0]);
- break;
- case DocSelectionGrammar::id_number:
- result = parseNumberValue(grammar, node.children[0]);
- break;
- case DocSelectionGrammar::id_string:
- result = parseStringValue(grammar, node.children[0]);
- break;
- case DocSelectionGrammar::id_valuegroup:
- result = parseValueGroup(grammar, node.children[0]);
- break;
- case DocSelectionGrammar::id_variable:
- result = parseVariable(grammar, node.children[0]);
- break;
- case DocSelectionGrammar::id_function:
- result = parseGlobValueFunction(grammar, node.children[0]);
- break;
- default:
- vespalib::asciistream ost;
- ost << "Received unhandled nodetype "
- << node.children[0].value.id().to_long()
- << " in parseValue(), from node of type "
- << node.value.id().to_long() << "\n";
- throw IllegalStateException(ost.str(), VESPA_STRLOC);
- }
- return addFunctions(grammar, node, std::move(result), 1);
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseNilValue(DocSelectionGrammar& grammar, tree_node<T>& node) {
- (void) grammar;
- parse_assert(node.value.id().to_long() == grammar.id_nil);
- parse_assert(node.children.size() == 1);
- parse_assert(node.children[0].children.size() == 0);
- vespalib::string op(node.children[0].value.begin(),
- node.children[0].value.end());
- return std::unique_ptr<ValueNode>(new NullValueNode(op));
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseIdSpecValue(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_idspec);
- parse_assert(node.children.size() >= 1);
- parse_assert(node.children[0].children.size() == 0);
- vespalib::string id(node.children[0].value.begin(),
- node.children[0].value.end());
- if (node.children.size() == 1) {
- return std::unique_ptr<ValueNode>(
- new IdValueNode(grammar.getBucketIdFactory(), id, ""));
- }
-
- vespalib::string type;
-
- int widthBits = -1;
- int divisionBits = -1;
-
- if (node.children[1].children[0].value.id().to_long() == grammar.id_order) {
- tree_node<T>& ordernode(node.children[1].children[0]);
- type = vespalib::string(ordernode.children[0].value.begin(),
- ordernode.children[0].value.end());
-
- vespalib::string val = vespalib::string(
- ordernode.children[1].children[0].value.begin(),
- ordernode.children[1].children[0].value.end());
- widthBits = atoi(val.c_str());
-
- val = vespalib::string(ordernode.children[2].children[0].value.begin(),
- ordernode.children[2].children[0].value.end());
- divisionBits = atoi(val.c_str());
- } else {
- type = vespalib::string(node.children[1].children[0].value.begin(),
- node.children[1].children[0].value.end());
- }
-
- return std::unique_ptr<ValueNode>(
- new IdValueNode(grammar.getBucketIdFactory(), id, type,
- widthBits, divisionBits));
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseSearchColumnSpecValue(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_searchcolumnspec);
- parse_assert(node.children.size() == 2);
- parse_assert(node.children[0].children.size() == 0);
- parse_assert(node.children[1].value.id().to_long() == grammar.id_searchcolumnarg);
-
- vespalib::string id(node.children[0].value.begin(),
- node.children[0].value.end());
- parse_assert(node.children.size() == 2);
-
- vespalib::string val = vespalib::string(node.children[1].children[0].value.begin(),
- node.children[1].children[0].value.end());
- return std::unique_ptr<ValueNode>(new SearchColumnValueNode(
- grammar.getBucketIdFactory(), id, atoi(val.c_str())));
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseFieldSpecValue(DocSelectionGrammar& grammar, tree_node<T>& node) {
- parse_assert(node.value.id().to_long() == grammar.id_fieldspec);
- parse_assert(node.children.size() >= 2);
- parse_assert(node.children[0].value.id().to_long() == grammar.id_doctype);
- vespalib::string doctype(node.children[0].children[0].value.begin(),
- node.children[0].children[0].value.end());
- // Verify that document type exist at any version
- if (!grammar._repo.getDocumentType(doctype)) {
- throw ParsingFailedException("Document type " + doctype + " not found",
- VESPA_STRLOC);
- }
- std::unique_ptr<ValueNode> value;
- uint32_t iterator = 2;
-
- parse_assert(node.children[1].value.id().to_long() == grammar.id_fieldname);
- vespalib::string field(node.children[1].children[0].value.begin(),
- node.children[1].children[0].value.end());
- while (iterator < node.children.size()
- && node.children[iterator].value.id().to_long() == grammar.id_fieldname)
- {
- field += "." + vespalib::string(
- node.children[iterator].children[0].value.begin(),
- node.children[iterator].children[0].value.end());
- ++iterator;
- }
- value.reset(new FieldValueNode(doctype, field));
-
- for (; iterator<node.children.size(); ++iterator) {
- std::unique_ptr<ValueNode> child(std::move(value));
- vespalib::string function(node.children[iterator].children[0].value.begin(),
- node.children[iterator].children[0].value.end());
- parse_assert(node.children[iterator].value.id().to_long() == grammar.id_function);
- value.reset(new FunctionValueNode(function, std::move(child)));
- }
- return value;
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseNumberValue(DocSelectionGrammar& grammar, tree_node<T>& node) {
- (void) grammar;
- parse_assert(node.value.id().to_long() == grammar.id_number);
- vespalib::string sval;
- int base = 10;
- if (node.children.size() == 2) {
- base = 16;
- sval = vespalib::string(node.children[1].value.begin(),
- node.children[1].value.end());
- parse_assert(node.children[0].value.id().to_long() == grammar.id_number);
- parse_assert(node.children[1].value.id().to_long() == grammar.id_number);
- } else {
- parse_assert(node.children.size() == 1);
- sval = vespalib::string(node.children[0].value.begin(),
- node.children[0].value.end());
- parse_assert(node.children[0].value.id().to_long() == grammar.id_number);
- }
- if (sval.find('.') != vespalib::string::npos) {
- char* endptr;
- double val = vespalib::locale::c::strtod(sval.c_str(), &endptr);
- if (*endptr == '\0') {
- return std::unique_ptr<ValueNode>(new FloatValueNode(val));
- }
- } else {
- char* endptr;
- int64_t val;
- if (base == 16) {
- val = strtoull(sval.c_str(), &endptr, base);
- } else {
- val = strtoll(sval.c_str(), &endptr, base);
- }
- if (*endptr == '\0') {
- return std::unique_ptr<ValueNode>(new IntegerValueNode(val, false));
- }
- }
- vespalib::string error = "'" + sval + "' is not a valid number.";
- throw ParsingFailedException(error, VESPA_STRLOC);
-}
-
-template<typename T>
-std::unique_ptr<ValueNode>
-parseStringValue(DocSelectionGrammar& grammar, tree_node<T>& node) {
- (void) grammar;
- parse_assert(node.value.id().to_long() == grammar.id_string);
- if (node.children.size() == 0) {
- return std::unique_ptr<ValueNode>(new StringValueNode(""));
- }
- parse_assert(node.children.size() == 1);
- parse_assert(node.children[0].value.id().to_long() == grammar.id_string);
- vespalib::string val(node.children[0].value.begin(),
- node.children[0].value.end());
- return std::unique_ptr<ValueNode>(new StringValueNode(StringUtil::unescape(val)));
-}
-
-template<typename Tree>
-void printSpiritTree(std::ostream& out, Tree tree, const vespalib::string& query,
- const DocSelectionGrammar& grammar) {
- using boost::spirit::classic::parser_id;
-
- std::map<parser_id, vespalib::string> names;
- names[parser_id(grammar.id_bool)] = "bool";
- names[parser_id(grammar.id_number)] = "number";
- names[parser_id(grammar.id_string)] = "string";
- names[parser_id(grammar.id_doctype)] = "doctype";
- names[parser_id(grammar.id_fieldname)] = "fieldname";
- names[parser_id(grammar.id_function)] = "function";
- names[parser_id(grammar.id_idarg)] = "idarg";
- names[parser_id(grammar.id_searchcolumnarg)] = "searchcolumnarg";
- names[parser_id(grammar.id_operator)] = "operator";
- names[parser_id(grammar.id_idspec)] = "idspec";
- names[parser_id(grammar.id_searchcolumnspec)] = "searchcolumnspec";
- names[parser_id(grammar.id_fieldspec)] = "fieldspec";
- names[parser_id(grammar.id_value)] = "value";
- names[parser_id(grammar.id_valuefuncadd)] = "valuefuncadd";
- names[parser_id(grammar.id_valuefuncmul)] = "valuefuncmul";
- names[parser_id(grammar.id_valuefuncmod)] = "valuefuncmod";
- names[parser_id(grammar.id_valuegroup)] = "valuegroup";
- names[parser_id(grammar.id_arithmvalue)] = "arithmvalue";
- names[parser_id(grammar.id_comparison)] = "comparison";
- names[parser_id(grammar.id_leaf)] = "leaf";
- names[parser_id(grammar.id_not)] = "not";
- names[parser_id(grammar.id_and)] = "and";
- names[parser_id(grammar.id_or)] = "or";
- names[parser_id(grammar.id_group)] = "group";
- names[parser_id(grammar.id_expression)] = "expression";
- tree_to_xml(out, tree, query.c_str(), names);
-}
-
-template<typename Parser>
-bool testExpr(const DocumentTypeRepo& repo,
- const BucketIdFactory& factory,
- const vespalib::string& expression, const Parser& parser,
- const vespalib::string& result)
-{
- //std::cerr << "Testing expression '" << expression << "'.\n";
- using boost::spirit::classic::space_p;
-
- DocSelectionGrammar grammar(repo, factory);
- boost::spirit::classic::tree_parse_info<> info;
- info = pt_parse(expression.c_str(), parser,
- space_p);
- std::ostringstream ost;
- printSpiritTree(ost, info.trees, expression, grammar);
- if (!info.full) {
- cerr << "Expression '" << expression
- << "' wasn't completely parsed\n"
- << ost.str() << "\n";
- return false;
- }
- vespalib::string httpexpr = expression;
- vespalib::string::size_type index;
- while ((index = httpexpr.find('>')) != vespalib::string::npos) {
- httpexpr = httpexpr.substr(0,index) + "&gt;"
- + httpexpr.substr(index+1);
- }
- vespalib::string fullresult = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"
- "<!DOCTYPE parsetree SYSTEM \"parsetree.dtd\">\n"
- "<!-- " + httpexpr + " -->\n" + result;
- //if (ost.str() != fullresult) {
- if (fullresult != ost.str()) {
- cerr << "Parsing expression '" << expression << "', expected\n"
- << fullresult << "\nbut got\n" << ost.str() << "\n";
- return false;
- }
- return true;
-}
-
-bool test(const DocumentTypeRepo& repo,
- const BucketIdFactory& bucketIdFactory)
-{
- //std::cerr << "\n\nTESTING DOCUMENT SELECT PARSER\n\n";
- DocSelectionGrammar grammar(repo, bucketIdFactory);
-
- using boost::spirit::classic::space_p;
-
- // Parser two is the arithmvalue..
- // idspec, fieldspec, number & stringval, + - * / % ()
- testExpr(repo, bucketIdFactory, "3.14", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>3.14</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "-999", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>-999</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "15e4", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>15e4</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "3.4e-4", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>3.4e-4</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "\" Test \"", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"string\">\n"
- " <parsenode rule=\"string\">\n"
- " <value> Test </value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "id", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"idspec\">\n"
- " <parsenode rule=\"idspec\">\n"
- " <value>id</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "id.namespace",
- grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"idspec\">\n"
- " <parsenode rule=\"idspec\">\n"
- " <value>id</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"idarg\">\n"
- " <parsenode rule=\"idarg\">\n"
- " <value>namespace</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "id.hash()", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"idspec\">\n"
- " <parsenode rule=\"idspec\">\n"
- " <value>id</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"function\">\n"
- " <parsenode rule=\"function\">\n"
- " <value>hash</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory,
- "id.namespace.hash()", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"idspec\">\n"
- " <parsenode rule=\"idspec\">\n"
- " <value>id</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"idarg\">\n"
- " <parsenode rule=\"idarg\">\n"
- " <value>namespace</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"function\">\n"
- " <parsenode rule=\"function\">\n"
- " <value>hash</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory,
- "music.artist", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"fieldspec\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <value>music</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"fieldname\">\n"
- " <parsenode rule=\"fieldname\">\n"
- " <value>artist</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory,
- "music.artist.lowercase()", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"fieldspec\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <value>music</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"fieldname\">\n"
- " <parsenode rule=\"fieldname\">\n"
- " <value>artist</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"function\">\n"
- " <parsenode rule=\"function\">\n"
- " <value>lowercase</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "(43)", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"valuegroup\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>43</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory,
- "1 + 2 * 3 - 10 % 2 / 3", grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"valuefuncadd\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>1</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncadd\">\n"
- " <value>+</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncmul\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>2</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncmul\">\n"
- " <value>*</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>3</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncadd\">\n"
- " <value>-</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncmul\">\n"
- " <parsenode rule=\"valuefuncmod\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>10</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncmod\">\n"
- " <value>%</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>2</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncmul\">\n"
- " <value>/</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>3</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "(43 + 14) / 34",
- grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"valuefuncmul\">\n"
- " <parsenode rule=\"valuegroup\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"valuefuncadd\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>43</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncadd\">\n"
- " <value>+</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>14</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncmul\">\n"
- " <value>/</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>34</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "34 * (3 - 1) % 4",
- grammar.use_parser<2>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"valuefuncmul\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>34</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncmul\">\n"
- " <value>*</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncmod\">\n"
- " <parsenode rule=\"valuegroup\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"valuefuncadd\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>3</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncadd\">\n"
- " <value>-</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>1</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"valuefuncmod\">\n"
- " <value>%</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>4</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
-
- // Parser 1 is a leaf. bool, comparison, fieldspec, doctype
- testExpr(repo, bucketIdFactory, "true", grammar.use_parser<1>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"bool\">\n"
- " <parsenode rule=\"bool\">\n"
- " <value>true</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "false", grammar.use_parser<1>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"bool\">\n"
- " <parsenode rule=\"bool\">\n"
- " <value>false</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "music.test", grammar.use_parser<1>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"fieldspec\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <value>music</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"fieldname\">\n"
- " <parsenode rule=\"fieldname\">\n"
- " <value>test</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory, "music", grammar.use_parser<1>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <value>music</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory,
- "music.artist = \"*john*\"", grammar.use_parser<1>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"comparison\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"fieldspec\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <value>music</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"fieldname\">\n"
- " <parsenode rule=\"fieldname\">\n"
- " <value>artist</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"operator\">\n"
- " <parsenode rule=\"operator\">\n"
- " <value>=</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"string\">\n"
- " <parsenode rule=\"string\">\n"
- " <value>*john*</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory,
- "music.length >= 180", grammar.use_parser<1>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"comparison\">\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"fieldspec\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <value>music</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"fieldname\">\n"
- " <parsenode rule=\"fieldname\">\n"
- " <value>length</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"operator\">\n"
- " <parsenode rule=\"operator\">\n"
- " <value>&gt;=</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"arithmvalue\">\n"
- " <parsenode rule=\"value\">\n"
- " <parsenode rule=\"number\">\n"
- " <parsenode rule=\"number\">\n"
- " <value>180</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
-
- // Parser 0 - The whole expression
- testExpr(repo, bucketIdFactory,
- "true oR nOt false And true", grammar.use_parser<0>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"expression\">\n"
- " <parsenode rule=\"or\">\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"bool\">\n"
- " <parsenode rule=\"bool\">\n"
- " <value>true</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"or\">\n"
- " <value>oR</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"and\">\n"
- " <parsenode rule=\"not\">\n"
- " <parsenode rule=\"not\">\n"
- " <value>nOt</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"bool\">\n"
- " <parsenode rule=\"bool\">\n"
- " <value>false</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"and\">\n"
- " <value>And</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"bool\">\n"
- " <parsenode rule=\"bool\">\n"
- " <value>true</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory,
- "(true oR false) aNd true", grammar.use_parser<0>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"expression\">\n"
- " <parsenode rule=\"and\">\n"
- " <parsenode rule=\"group\">\n"
- " <parsenode rule=\"or\">\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"bool\">\n"
- " <parsenode rule=\"bool\">\n"
- " <value>true</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"or\">\n"
- " <value>oR</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"bool\">\n"
- " <parsenode rule=\"bool\">\n"
- " <value>false</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"and\">\n"
- " <value>aNd</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"bool\">\n"
- " <parsenode rule=\"bool\">\n"
- " <value>true</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- testExpr(repo, bucketIdFactory,
- "iddoc or not(notand and ornot)", grammar.use_parser<0>(),
- "<parsetree version=\"1.0\">\n"
- " <parsenode rule=\"expression\">\n"
- " <parsenode rule=\"or\">\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <value>iddoc</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"or\">\n"
- " <value>or</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"not\">\n"
- " <parsenode rule=\"not\">\n"
- " <value>not</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"group\">\n"
- " <parsenode rule=\"and\">\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <value>notand</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " <parsenode rule=\"and\">\n"
- " <value>and</value>\n"
- " </parsenode>\n"
- " <parsenode rule=\"leaf\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <parsenode rule=\"doctype\">\n"
- " <value>ornot</value>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- " </parsenode>\n"
- "</parsetree>\n");
- return true;
-}
-
-}
-
-vespalib::Lock Parser::_G_parseLock;
-
-unique_ptr<Node> Parser::parse(const vespalib::stringref & s)
-{
-
- simple::SelectionParser simple(_bucketIdFactory);
- if (simple.parse(s) && simple.getRemaining().empty()) {
- Node::UP tmp(simple.getNode());
- assert(tmp.get() != NULL);
- return tmp;
- } else {
- return fullParse(s);
- }
-}
-
-unique_ptr<Node> Parser::fullParse(const vespalib::stringref & s)
-{
- static bool haveTested = test(_repo, _bucketIdFactory); if (haveTested) {}
- try{
- vespalib::LockGuard guard(_G_parseLock);
- DocSelectionGrammar grammar(_repo, _bucketIdFactory);
- boost::spirit::classic::tree_parse_info<> info
- = pt_parse(&s[0], &s[0]+s.size(),
- grammar.use_parser<0>(), boost::spirit::classic::space_p);
- if (!info.full) {
- vespalib::string unexpected(info.stop);
- unsigned int position = s.size() - unexpected.size();
- if (unexpected.size() > 10) {
- unexpected = unexpected.substr(0,10);
- }
- vespalib::asciistream ost;
- ost << "Unexpected token at position " << position << " ('"
- << unexpected << "') in query '" << s << "',";
- throw ParsingFailedException(ost.str(), VESPA_STRLOC);
- }
- parse_assert(info.trees.size() == 1);
- //printSpiritTree(std::cerr, info.trees, s, grammar);
- return parseTree(grammar, info.trees[0]);
- } catch (ParsingFailedException& e) {
- throw;
- } catch (vespalib::Exception& e) {
- throw ParsingFailedException("Parsing failed. See cause exception.",
- e, VESPA_STRLOC);
- } catch (std::exception& e) {
- cerr << "Parser::parse() internal error: "
- << e.what() << endl;
- throw; // Program will abort when this tries to go out..
- }
- return unique_ptr<Node>();
}
-} // select
-} // document
diff --git a/document/src/vespa/document/select/parser.h b/document/src/vespa/document/select/parser.h
index 4df00d64bf3..35d710298c3 100644
--- a/document/src/vespa/document/select/parser.h
+++ b/document/src/vespa/document/select/parser.h
@@ -1,37 +1,40 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
#pragma once
#include "node.h"
+#include "parsing_failed_exception.h"
#include <vespa/document/bucket/bucketidfactory.h>
-#include <vespa/vespalib/util/exception.h>
-#include <vespa/vespalib/util/sync.h>
-
-namespace document {
-class DocumentTypeRepo;
-
-namespace select {
-
-VESPA_DEFINE_EXCEPTION(ParsingFailedException, vespalib::Exception);
-
+#include <vespa/document/repo/documenttyperepo.h>
+#include <memory>
+#include <string>
+
+namespace document::select {
+
+/**
+ * Document selection parser built around Flex/Bison. O(n) on input size
+ * and non-locking.
+ *
+ * Thread safety: same as a std::vector
+ */
class Parser {
+ const DocumentTypeRepo&_doc_type_repo;
+ const BucketIdFactory& _bucket_id_factory;
public:
- Parser(const DocumentTypeRepo&, const BucketIdFactory& bucketIdFactory);
+ Parser(const DocumentTypeRepo& repo, const BucketIdFactory& bucket_id_factory)
+ : _doc_type_repo(repo),
+ _bucket_id_factory(bucket_id_factory)
+ {}
/**
* Returns a newly allocated AST root node representing the selection
* if parsing is successful. Otherwise, ParsingFailedException will be
* thrown.
+ *
+ * Thread safe, assuming referenced DocumentTypeRepo and BucketIdFactory
+ * instances are immutable.
*/
- std::unique_ptr<Node> parse(const vespalib::stringref& s);
-
-private:
- std::unique_ptr<Node> fullParse(const vespalib::stringref& s);
- static vespalib::Lock _G_parseLock;
- const DocumentTypeRepo& _repo;
- const BucketIdFactory& _bucketIdFactory;
+ std::unique_ptr<Node> parse(const std::string& str) const;
};
-} // select
-} // parser
+}
diff --git a/document/src/vespa/document/select/parsing_failed_exception.cpp b/document/src/vespa/document/select/parsing_failed_exception.cpp
new file mode 100644
index 00000000000..ce02389ed46
--- /dev/null
+++ b/document/src/vespa/document/select/parsing_failed_exception.cpp
@@ -0,0 +1,9 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "parsing_failed_exception.h"
+#include <vespa/document/base/exceptions.h>
+
+namespace document::select {
+
+VESPA_IMPLEMENT_EXCEPTION(ParsingFailedException, vespalib::Exception);
+
+} \ No newline at end of file
diff --git a/document/src/vespa/document/select/parsing_failed_exception.h b/document/src/vespa/document/select/parsing_failed_exception.h
new file mode 100644
index 00000000000..54138a492e8
--- /dev/null
+++ b/document/src/vespa/document/select/parsing_failed_exception.h
@@ -0,0 +1,10 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/exception.h>
+
+namespace document::select {
+
+VESPA_DEFINE_EXCEPTION(ParsingFailedException, vespalib::Exception);
+
+}
diff --git a/document/src/vespa/document/select/scanner.h b/document/src/vespa/document/select/scanner.h
new file mode 100644
index 00000000000..5aa9ea1c8d3
--- /dev/null
+++ b/document/src/vespa/document/select/scanner.h
@@ -0,0 +1,21 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#if !defined(yyFlexLexerOnce)
+# include <FlexLexer.h>
+#endif
+
+#include "parser.hxx"
+#include "location.hh"
+#include <iosfwd>
+
+namespace document::select {
+
+class DocSelScanner final : yyFlexLexer {
+public:
+ explicit DocSelScanner(std::istream* in) : yyFlexLexer(in) {}
+ ~DocSelScanner() override = default;
+ int yylex(DocSelParser::semantic_type* yylval, DocSelParser::location_type* yyloc);
+};
+
+}
diff --git a/document/src/vespa/document/select/traversingvisitor.cpp b/document/src/vespa/document/select/traversingvisitor.cpp
index b8f34540b29..26de6093ddf 100644
--- a/document/src/vespa/document/select/traversingvisitor.cpp
+++ b/document/src/vespa/document/select/traversingvisitor.cpp
@@ -73,12 +73,6 @@ TraversingVisitor::visitIdValueNode(const IdValueNode &)
void
-TraversingVisitor::visitSearchColumnValueNode(const SearchColumnValueNode &)
-{
-}
-
-
-void
TraversingVisitor::visitFieldValueNode(const FieldValueNode &)
{
}
diff --git a/document/src/vespa/document/select/traversingvisitor.h b/document/src/vespa/document/select/traversingvisitor.h
index 43d10cfcaa2..f8b0377b102 100644
--- a/document/src/vespa/document/select/traversingvisitor.h
+++ b/document/src/vespa/document/select/traversingvisitor.h
@@ -21,7 +21,6 @@ public:
void visitInvalidConstant(const InvalidConstant &) override;
void visitDocumentType(const DocType &) override;
void visitIdValueNode(const IdValueNode &) override;
- void visitSearchColumnValueNode(const SearchColumnValueNode &) override;
void visitFieldValueNode(const FieldValueNode &) override;
void visitFloatValueNode(const FloatValueNode &) override;
void visitVariableValueNode(const VariableValueNode &) override;
diff --git a/document/src/vespa/document/select/valuenodes.cpp b/document/src/vespa/document/select/valuenodes.cpp
index 479896f9124..a0bbde0d21b 100644
--- a/document/src/vespa/document/select/valuenodes.cpp
+++ b/document/src/vespa/document/select/valuenodes.cpp
@@ -15,7 +15,6 @@
#include <iomanip>
#include <sys/time.h>
-
#include <vespa/log/log.h>
LOG_SETUP(".document.select.valuenode");
@@ -61,10 +60,7 @@ InvalidValueNode::print(std::ostream& out, bool verbose,
if (hadParentheses()) out << ')';
}
-NullValueNode::NullValueNode(const vespalib::stringref & name)
- : _name(name)
-{ }
-
+NullValueNode::NullValueNode() {}
void
NullValueNode::visit(Visitor &visitor) const
@@ -79,7 +75,7 @@ NullValueNode::print(std::ostream& out, bool verbose,
{
(void) verbose; (void) indent;
if (hadParentheses()) out << '(';
- out << _name;
+ out << "null";
if (hadParentheses()) out << ')';
}
@@ -678,88 +674,6 @@ IdValueNode::print(std::ostream& out, bool verbose,
if (hadParentheses()) out << ')';
}
-SearchColumnValueNode::SearchColumnValueNode(
- const BucketIdFactory& bucketIdFactory,
- const vespalib::stringref & name, int numColumns)
- : _bucketIdFactory(bucketIdFactory),
- _id(name),
- _numColumns(numColumns),
- _distribution(std::make_unique<BucketDistribution>(_numColumns, 16))
-{
-}
-
-int64_t
-SearchColumnValueNode::getValue(const BucketId& id) const
-{
- return _distribution->getColumn(id);
-}
-
-
-std::unique_ptr<Value>
-SearchColumnValueNode::getValue(const Context& context) const
-{
- if (context._doc != NULL) {
- return getValue(context._doc->getId());
- } else if (context._docId != NULL) {
- return getValue(*context._docId);
- } else {
- return getValue(context._docUpdate->getId());
- }
-}
-
-
-std::unique_ptr<Value>
-SearchColumnValueNode::getValue(const DocumentId& id) const
-{
- return std::unique_ptr<Value>(new IntegerValue(
- getValue(_bucketIdFactory.getBucketId(id)), false));
-}
-
-
-std::unique_ptr<Value>
-SearchColumnValueNode::traceValue(const Context& context,
- std::ostream &out) const
-{
- if (context._doc != NULL) {
- return traceValue(context._doc->getId(), out);
- } else if (context._docId != NULL) {
- return traceValue(*context._docId, out);
- } else {
- return traceValue(context._docUpdate->getId(), out);
- }
-}
-
-
-std::unique_ptr<Value>
-SearchColumnValueNode::traceValue(const DocumentId& id,
- std::ostream& out) const
-{
- std::unique_ptr<Value> result(new IntegerValue(
- getValue(_bucketIdFactory.getBucketId(id)), false));
- out << "Resolved search column of doc \"" << id << "\" to " << *result
- << "\n";
- return result;
-}
-
-
-void
-SearchColumnValueNode::visit(Visitor &visitor) const
-{
- visitor.visitSearchColumnValueNode(*this);
-}
-
-
-void
-SearchColumnValueNode::print(std::ostream& out, bool verbose,
- const std::string& indent) const
-{
- (void) verbose; (void) indent;
- if (hadParentheses()) out << '(';
- out << _id;
- out << '.' << _numColumns;
- if (hadParentheses()) out << ')';
-}
-
namespace {
union HashUnion {
unsigned char _key[16];
@@ -1176,5 +1090,43 @@ ArithmeticValueNode::print(std::ostream& out, bool verbose,
if (hadParentheses()) out << ')';
}
+std::unique_ptr<FieldValueNode> FieldExprNode::convert_to_field_value() const {
+ const auto& doctype = resolve_doctype();
+ // FIXME deprecate manual post-parsing of field expressions in favor of
+ // actually using the structural parser in the way nature intended.
+ vespalib::string mangled_expression;
+ build_mangled_expression(mangled_expression);
+ return std::make_unique<FieldValueNode>(doctype, mangled_expression);
+}
+
+std::unique_ptr<FunctionValueNode> FieldExprNode::convert_to_function_call() const {
+ // Right hand expr string contains function call, lhs contains field spec on which
+ // the function is to be invoked.
+ if ((_left_expr == nullptr) || (_left_expr->_left_expr == nullptr)) {
+ throw vespalib::IllegalArgumentException(
+ vespalib::make_string("Cannot call function '%s' directly on document type", _right_expr.c_str()));
+ }
+ auto lhs = _left_expr->convert_to_field_value();
+ const auto& function_name = _right_expr;
+ return std::make_unique<FunctionValueNode>(function_name, std::move(lhs));
+}
+
+void FieldExprNode::build_mangled_expression(vespalib::string& dest) const {
+ // Leftmost node is doctype, which should not be emitted as part of mangled expression.
+ if (_left_expr && _left_expr->_left_expr) {
+ _left_expr->build_mangled_expression(dest);
+ dest.push_back('.');
+ }
+ dest.append(_right_expr);
+}
+
+const vespalib::string& FieldExprNode::resolve_doctype() const {
+ const auto* leftmost = this;
+ while (leftmost->_left_expr) {
+ leftmost = leftmost->_left_expr.get();
+ }
+ return leftmost->_right_expr;
+}
+
}
diff --git a/document/src/vespa/document/select/valuenodes.h b/document/src/vespa/document/select/valuenodes.h
index 0464159b85f..4257516d227 100644
--- a/document/src/vespa/document/select/valuenodes.h
+++ b/document/src/vespa/document/select/valuenodes.h
@@ -35,9 +35,8 @@ public:
class NullValueNode : public ValueNode
{
- vespalib::string _name;
public:
- NullValueNode(const vespalib::stringref & name);
+ NullValueNode();
std::unique_ptr<Value> getValue(const Context&) const override {
return std::unique_ptr<Value>(new NullValue());
@@ -48,7 +47,7 @@ public:
void visit(Visitor& visitor) const override;
ValueNode::UP clone() const override {
- return wrapParens(new NullValueNode(_name));
+ return wrapParens(new NullValueNode());
}
};
@@ -56,7 +55,7 @@ class StringValueNode : public ValueNode
{
vespalib::string _value;
public:
- StringValueNode(const vespalib::stringref & val);
+ explicit StringValueNode(const vespalib::stringref & val);
const vespalib::string& getValue() const { return _value; }
@@ -115,6 +114,7 @@ class VariableValueNode : public ValueNode
{
vespalib::string _value;
public:
+ // TODO stringref
VariableValueNode(const vespalib::string & variableName) : _value(variableName) {}
const vespalib::string& getVariableName() const { return _value; }
@@ -183,6 +183,59 @@ private:
void initFieldPath(const DocumentType&) const;
};
+class FunctionValueNode;
+
+// Only used by the parser to build a partial field expression. Never part of
+// an AST tree returned to the caller.
+class FieldExprNode final : public ValueNode {
+ std::unique_ptr<FieldExprNode> _left_expr;
+ vespalib::string _right_expr;
+public:
+ explicit FieldExprNode(const vespalib::string& doctype) : _left_expr(), _right_expr(doctype) {}
+ FieldExprNode(std::unique_ptr<FieldExprNode> left_expr, vespalib::stringref right_expr)
+ : _left_expr(std::move(left_expr)), _right_expr(right_expr)
+ {}
+ FieldExprNode(const FieldExprNode &) = delete;
+ FieldExprNode & operator = (const FieldExprNode &) = delete;
+ FieldExprNode(FieldExprNode &&) = default;
+ FieldExprNode & operator = (FieldExprNode &&) = default;
+ ~FieldExprNode() = default;
+
+ std::unique_ptr<FieldValueNode> convert_to_field_value() const;
+ std::unique_ptr<FunctionValueNode> convert_to_function_call() const;
+private:
+ void build_mangled_expression(vespalib::string& dest) const;
+ const vespalib::string& resolve_doctype() const;
+
+ // These are not used, can just return dummy values.
+ std::unique_ptr<Value> getValue(const Context& context) const override {
+ (void) context;
+ return std::unique_ptr<Value>();
+ }
+ std::unique_ptr<Value> traceValue(const Context &context, std::ostream& out) const override {
+ (void) context;
+ (void) out;
+ return std::unique_ptr<Value>();
+ }
+ void print(std::ostream& out, bool verbose, const std::string& indent) const override {
+ (void) out;
+ (void) verbose;
+ (void) indent;
+ }
+ void visit(Visitor& visitor) const override {
+ (void) visitor;
+ }
+
+ ValueNode::UP clone() const override {
+ if (_left_expr) {
+ return wrapParens(new FieldExprNode(std::unique_ptr<FieldExprNode>(
+ static_cast<FieldExprNode*>(_left_expr->clone().release())), _right_expr));
+ } else {
+ return wrapParens(new FieldExprNode(_right_expr));
+ }
+ }
+};
+
class IdValueNode : public ValueNode
{
public:
@@ -222,35 +275,6 @@ private:
int _divisionBits;
};
-class SearchColumnValueNode : public ValueNode
-{
-public:
- SearchColumnValueNode(const BucketIdFactory& bucketIdFactory,
- const vespalib::stringref & name,
- int numColumns);
-
- int getColumns() { return _numColumns; }
-
- std::unique_ptr<Value> getValue(const Context& context) const override;
- std::unique_ptr<Value> getValue(const DocumentId& id) const;
- std::unique_ptr<Value> traceValue(const Context& context, std::ostream &out) const override;
- std::unique_ptr<Value> traceValue(const DocumentId& val, std::ostream& out) const;
-
- int64_t getValue(const BucketId& bucketId) const;
- void print(std::ostream& out, bool verbose, const std::string& indent) const override;
- void visit(Visitor& visitor) const override;
-
- ValueNode::UP clone() const override {
- return wrapParens(new SearchColumnValueNode(_bucketIdFactory, _id, _numColumns));
-}
-
-private:
- const BucketIdFactory& _bucketIdFactory;
- vespalib::string _id;
- int _numColumns;
- std::unique_ptr<BucketDistribution> _distribution;
-};
-
class FunctionValueNode : public ValueNode
{
public:
diff --git a/document/src/vespa/document/select/visitor.h b/document/src/vespa/document/select/visitor.h
index c89f0f24a6f..762d47c7c35 100644
--- a/document/src/vespa/document/select/visitor.h
+++ b/document/src/vespa/document/select/visitor.h
@@ -70,9 +70,6 @@ public:
visitIdValueNode(const IdValueNode &) = 0;
virtual void
- visitSearchColumnValueNode(const SearchColumnValueNode &) = 0;
-
- virtual void
visitFieldValueNode(const FieldValueNode &) = 0;
virtual void