diff options
Diffstat (limited to 'document')
30 files changed, 1289 insertions, 1803 deletions
diff --git a/document/pom.xml b/document/pom.xml index 81b21487314..10f71218422 100644 --- a/document/pom.xml +++ b/document/pom.xml @@ -9,6 +9,7 @@ <groupId>com.yahoo.vespa</groupId> <artifactId>parent</artifactId> <version>6-SNAPSHOT</version> + <relativePath>../parent/pom.xml</relativePath> </parent> <artifactId>document</artifactId> <version>6-SNAPSHOT</version> diff --git a/document/src/main/java/com/yahoo/document/idstring/IdIdString.java b/document/src/main/java/com/yahoo/document/idstring/IdIdString.java index 7fcb530b347..44ff08c73f0 100644 --- a/document/src/main/java/com/yahoo/document/idstring/IdIdString.java +++ b/document/src/main/java/com/yahoo/document/idstring/IdIdString.java @@ -62,6 +62,9 @@ public class IdIdString extends IdString { if (hasSetLocation) { throw new IllegalArgumentException("Illegal key combination in " + keyValues); } + if (value.isEmpty()) { + throw new IllegalArgumentException("ID location value for 'n=' key is empty"); + } location = Long.parseLong(value); hasSetLocation = true; hasNumber = true; @@ -70,6 +73,9 @@ public class IdIdString extends IdString { if (hasSetLocation) { throw new IllegalArgumentException("Illegal key combination in " + keyValues); } + if (value.isEmpty()) { + throw new IllegalArgumentException("ID location value for 'g=' key is empty"); + } location = makeLocation(value); hasSetLocation = true; hasGroup = true; diff --git a/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java b/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java index 79a10bc72e4..bd769889363 100644 --- a/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java +++ b/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java @@ -1,9 +1,12 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.document; -import com.yahoo.document.*; import com.yahoo.document.idstring.*; import com.yahoo.vespa.objects.BufferSerializer; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; import java.math.BigInteger; @@ -12,16 +15,20 @@ import java.util.regex.Pattern; import java.util.Arrays; import static org.hamcrest.Matchers.containsString; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; -public class DocumentIdTestCase extends junit.framework.TestCase { +public class DocumentIdTestCase { DocumentTypeManager manager = new DocumentTypeManager(); - public DocumentIdTestCase(String name) { - super(name); - } + @Rule + public ExpectedException expectedException = ExpectedException.none(); - protected void setUp() { + @Before + public void setUp() { DocumentType testDocType = new DocumentType("testdoc"); testDocType.addHeaderField("intattr", DataType.INT); @@ -33,6 +40,7 @@ public class DocumentIdTestCase extends junit.framework.TestCase { manager.registerDocumentType(testDocType); } + @Test public void testCompareTo() { DocumentId docId1 = new Document(manager.getDocumentType("testdoc"), new DocumentId("doc:testdoc:http://www.uio.no/")).getId(); DocumentId docId2 = new Document(manager.getDocumentType("testdoc"), new DocumentId("doc:testdoc:http://www.uio.no/")).getId(); @@ -56,6 +64,7 @@ public class DocumentIdTestCase extends junit.framework.TestCase { } } + @Test public void testValidInvalidUriSchemes() { try { //valid URIs @@ -92,9 +101,23 @@ public class DocumentIdTestCase extends junit.framework.TestCase { checkInvalidUri("id:namespace:type:n=0,g=foo:foo"); } + @Test + public void empty_user_location_value_throws_exception() { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage("ID location value for 'n=' key is empty"); + new DocumentId("id:namespace:type:n=:foo"); + } + + @Test + public void empty_group_location_value_throws_exception() { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage("ID location value for 'g=' key is empty"); + new DocumentId("id:namespace:type:g=:foo"); + } //Compares globalId with C++ implementation located in // ~document-HEAD/document/src/tests/cpp-globalidbucketids.txt + @Test public void testCalculateGlobalId() throws IOException{ String file = "src/tests/cpp-globalidbucketids.txt"; @@ -135,6 +158,7 @@ public class DocumentIdTestCase extends junit.framework.TestCase { //Compares bucketId with C++ implementation located in // ~document-HEAD/document/src/tests/cpp-globalidbucketids.txt + @Test public void testGetBucketId() throws IOException{ String file = "src/tests/cpp-globalidbucketids.txt"; BufferedReader fr = new BufferedReader(new FileReader(file)); @@ -153,6 +177,7 @@ public class DocumentIdTestCase extends junit.framework.TestCase { fr.close(); } + @Test public void testGroupdoc() { try { //valid @@ -166,11 +191,13 @@ public class DocumentIdTestCase extends junit.framework.TestCase { } } + @Test public void testInvalidGroupdoc() { checkInvalidUri("grouppdoc:blabla:something"); checkInvalidUri("groupdoc:blablasomething"); } + @Test public void testUriNamespace() { DocumentId docId = new DocumentId("doc:bar:foo"); assertEquals("doc:bar:foo", docId.toString()); @@ -217,6 +244,7 @@ public class DocumentIdTestCase extends junit.framework.TestCase { assertEquals(1268182861, ((OrderDocIdString)docId.getScheme()).getOrdering()); } + @Test public void testIdStrings() { DocumentId docId; docId = new DocumentId(new DocIdString("test", "baaaa")); @@ -240,6 +268,7 @@ public class DocumentIdTestCase extends junit.framework.TestCase { assertEquals("type", docId.getDocType()); } + @Test public void testIdStringFeatures() { DocumentId none = new DocumentId("id:ns:type::foo"); assertFalse(none.getScheme().hasGroup()); @@ -276,6 +305,7 @@ public class DocumentIdTestCase extends junit.framework.TestCase { assertEquals(42, order.getScheme().getNumber()); } + @Test public void testHashCodeOfGids() { DocumentId docId0 = new DocumentId("doc:blabla:0"); byte[] docId0Gid = docId0.getGlobalId(); @@ -295,6 +325,7 @@ public class DocumentIdTestCase extends junit.framework.TestCase { assertEquals(Arrays.hashCode(docId0Gid), Arrays.hashCode(docId0CopyGid)); } + @Test public void testDocumentIdCanOnlyContainTextCharacters() throws UnsupportedEncodingException { assertExceptionWhenConstructing(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 0, 99}, // "id:a:b::0x0c" "illegal code point 0x0"); @@ -313,6 +344,7 @@ public class DocumentIdTestCase extends junit.framework.TestCase { } } + @Test public void testSerializedDocumentIdCanContainNonTextCharacter() throws UnsupportedEncodingException { String strId = new String(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 7, 99}); // "id:a:b::0x7c" DocumentId docId = DocumentId.createFromSerialized(strId); @@ -328,6 +360,7 @@ public class DocumentIdTestCase extends junit.framework.TestCase { } } + @Test public void testSerializedDocumentIdCannotContainZeroByte() throws UnsupportedEncodingException { String strId = new String(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 0, 99}); // "id:a:b::0x0c" try { diff --git a/document/src/tests/bucketselectortest.cpp b/document/src/tests/bucketselectortest.cpp index 0f8520745f1..e0857a32dba 100644 --- a/document/src/tests/bucketselectortest.cpp +++ b/document/src/tests/bucketselectortest.cpp @@ -85,8 +85,6 @@ void BucketSelectorTest::testSimple() ASSERT_BUCKET_COUNT("id.bucket == 0x4000000000000258", 1u); // Bucket 600 ASSERT_BUCKET_COUNT("(testdoctype1 and id.bucket=0)", 1u); - ASSERT_BUCKET_COUNT("searchcolumn.3 = 1", 21845u); - // Check that the correct buckets is found ASSERT_BUCKET("id = \"userdoc:ns:123:foobar\"", document::BucketId(58, 123)); diff --git a/document/src/tests/documentselectparsertest.cpp b/document/src/tests/documentselectparsertest.cpp index c5715ae5114..db7b48cdc3a 100644 --- a/document/src/tests/documentselectparsertest.cpp +++ b/document/src/tests/documentselectparsertest.cpp @@ -1,6 +1,5 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - #include <cppunit/TestFixture.h> #include <cppunit/extensions/HelperMacros.h> #include <vespa/document/repo/configbuilder.h> @@ -12,12 +11,16 @@ #include <vespa/document/select/visitor.h> #include <vespa/document/select/bodyfielddetector.h> #include <vespa/document/select/valuenode.h> +#include <vespa/document/select/valuenodes.h> #include <vespa/document/select/branch.h> #include <vespa/document/select/simpleparser.h> #include <vespa/document/select/constant.h> #include <vespa/document/select/invalidconstant.h> #include <vespa/document/select/doctype.h> #include <vespa/document/select/compare.h> +#include <vespa/document/select/parse_utils.h> +#include <vespa/vespalib/util/exceptions.h> +#include <limits> using namespace document::config_builder; @@ -34,6 +37,15 @@ class DocumentSelectParserTest : public CppUnit::TestFixture { CPPUNIT_TEST(testThatComplexFieldValuesHaveCorrectFieldNames); CPPUNIT_TEST(testBodyFieldDetection); CPPUNIT_TEST(testDocumentUpdates); + CPPUNIT_TEST(test_syntax_error_reporting); + CPPUNIT_TEST(test_operator_precedence); + CPPUNIT_TEST(test_token_used_as_ident_preserves_casing); + CPPUNIT_TEST(test_ambiguous_field_spec_expression_is_handled_correctly); + CPPUNIT_TEST(test_can_build_field_value_from_field_expr_node); + CPPUNIT_TEST(test_can_build_function_call_from_field_expr_node); + CPPUNIT_TEST(test_function_call_on_doctype_throws_exception); + CPPUNIT_TEST(test_parse_utilities_handle_well_formed_input); + CPPUNIT_TEST(test_parse_utilities_handle_malformed_input); CPPUNIT_TEST_SUITE_END(); BucketIdFactory _bucketIdFactory; @@ -51,11 +63,13 @@ class DocumentSelectParserTest : public CppUnit::TestFixture { const std::string& hstr); std::unique_ptr<select::FieldValueNode> - parseFieldValue(const std::string expression); + parseFieldValue(const std::string& expression); template <typename ContainsType> select::ResultList doParse(const vespalib::stringref& expr, const ContainsType& t); + + std::string parse_to_tree(const std::string& str); public: DocumentSelectParserTest() @@ -88,7 +102,15 @@ public: void testDocumentUpdates2(); void testDocumentUpdates3(); void testDocumentUpdates4(); - void testDocumentUpdates5(); + void test_syntax_error_reporting(); + void test_operator_precedence(); + void test_token_used_as_ident_preserves_casing(); + void test_ambiguous_field_spec_expression_is_handled_correctly(); + void test_can_build_field_value_from_field_expr_node(); + void test_can_build_function_call_from_field_expr_node(); + void test_function_call_on_doctype_throws_exception(); + void test_parse_utilities_handle_well_formed_input(); + void test_parse_utilities_handle_malformed_input(); }; CPPUNIT_TEST_SUITE_REGISTRATION(DocumentSelectParserTest); @@ -111,9 +133,9 @@ void DocumentSelectParserTest::setUp() builder.document(-1673092522, "usergroup", Struct("usergroup.header"), Struct("usergroup.body")); - _repo.reset(new DocumentTypeRepo(builder.config())); + _repo = std::make_unique<DocumentTypeRepo>(builder.config()); - _parser.reset(new select::Parser(*_repo, _bucketIdFactory)); + _parser = std::make_unique<select::Parser>(*_repo, _bucketIdFactory); } Document::SP DocumentSelectParserTest::createDoc( @@ -319,11 +341,45 @@ void verifyParse(const std::string& query, const char* expected = 0) { } } +void DocumentSelectParserTest::test_syntax_error_reporting() { + createDocs(); + + verifyFailedParse("testdoctype1.headerval == aaa", "ParsingFailedException: " + "syntax error, unexpected end of input, expecting . at column 30 " + "when parsing selection 'testdoctype1.headerval == aaa'"); + // TODO improve error reporting of broken escape sequences. Current error messages + // are not too helpful since we simply fail to parse the string token altogether. + verifyFailedParse("testdoctype1.headerval == \"tab\\x0notcomplete\"", + "ParsingFailedException: Unexpected character: '\\\"' at column 27 " + "when parsing selection 'testdoctype1.headerval == \"tab\\x0notcomplete\"'"); + verifyFailedParse("testdoctype1.headerval == \"tab\\ysf\"", + "ParsingFailedException: Unexpected character: '\\\"' at column 27 " + "when parsing selection 'testdoctype1.headerval == \"tab\\ysf\"'"); + // Test illegal operator + verifyFailedParse("testdoctype1.headerval <> 12", "ParsingFailedException: syntax error, " + "unexpected > at column 25 when parsing selection 'testdoctype1.headerval <> 12'"); + + // This will trigger a missing doctype error instead of syntax error, as "fal" + // will be reduced into a doctype rule. + verifyFailedParse("fal se", "ParsingFailedException: Document type 'fal' " + "not found at column 1 when parsing selection 'fal se'"); + + verifyFailedParse("mytype", "ParsingFailedException: Document type 'mytype' not found"); + + verifyFailedParse("mytype.foo.bar", "ParsingFailedException: Document type 'mytype' not found"); + + verifyFailedParse("testdoctype1 == 8", "ParsingFailedException: syntax error, unexpected ==, " + "expecting end of input at column 14 when parsing selection 'testdoctype1 == 8'"); + + verifyFailedParse("(1 + 2)", "ParsingFailedException: expected field spec, " + "doctype, bool or comparison at column 1 when parsing selection '(1 + 2)'"); +} + void DocumentSelectParserTest::testParseTerminals() { createDocs(); - // Test number value + // Test number value verifyParse("", "true"); verifyParse("testdoctype1.headerval == 123"); verifyParse("testdoctype1.headerval == +123.53", "testdoctype1.headerval == 123.53"); @@ -332,10 +388,8 @@ void DocumentSelectParserTest::testParseTerminals() "testdoctype1.headerval == 2.34124e+08"); verifyParse("testdoctype1.headerval == -234123.523E-3", "testdoctype1.headerval == -234.124"); - verifyFailedParse("testdoctype1.headerval == aaa", "ParsingFailedException: " - "Unexpected token at position 23 ('== aaa') in query " - "'testdoctype1.headerval == aaa', at fullParse in "); - // Test string value + + // Test string value verifyParse("testdoctype1.headerval == \"test\""); std::unique_ptr<select::Node> node( _parser->parse("testdoctype1.headerval == \"test\"")); @@ -345,64 +399,46 @@ void DocumentSelectParserTest::testParseTerminals() dynamic_cast<const select::FieldValueNode&>(compnode.getLeft())); const select::StringValueNode& vnode( dynamic_cast<const select::StringValueNode&>(compnode.getRight())); - /* - CPPUNIT_ASSERT_EQUAL(vespalib::string("testdoctype1"), - fnode.getDocType()->getName()); - */ + CPPUNIT_ASSERT_EQUAL(vespalib::string("headerval"), fnode.getFieldName()); CPPUNIT_ASSERT_EQUAL(vespalib::string("test"), vnode.getValue()); - // Test whitespace + // Test whitespace verifyParse("testdoctype1.headerval == \"te st \""); verifyParse(" \t testdoctype1.headerval\t== \t \"test\"\t", "testdoctype1.headerval == \"test\""); - // Test escaping + // Test escaping verifyParse("testdoctype1.headerval == \"tab\\ttest\""); verifyParse("testdoctype1.headerval == \"tab\\x09test\"", "testdoctype1.headerval == \"tab\\ttest\""); verifyParse("testdoctype1.headerval == \"tab\\x055test\""); - verifyFailedParse("testdoctype1.headerval == \"tab\\x0notcomplete\"", - "ParsingFailedException: Unexpected token at position 23 " - "('== \"tab\\x0') in query 'testdoctype1.headerval == \"tab\\x0notcomplete\"', " - "at fullParse in "); - verifyFailedParse("testdoctype1.headerval == \"tab\\ysf\"", - "ParsingFailedException: Unexpected token at position 23 " - "('== \"tab\\ys') in query 'testdoctype1.headerval == \"tab\\ysf\"', " - "at fullParse in "); node = _parser->parse("testdoctype1.headerval == \"\\tt\\x48 \\n\""); select::Compare& escapednode(dynamic_cast<select::Compare&>(*node)); const select::StringValueNode& escval( dynamic_cast<const select::StringValueNode&>(escapednode.getRight())); CPPUNIT_ASSERT_EQUAL(vespalib::string("\ttH \n"), escval.getValue()); - // Test illegal operator - verifyFailedParse("testdoctype1.headerval <> 12", "ParsingFailedException: Unexpected" - " token at position 23 ('<> 12') in query 'testdoctype1.headerval <> 12', at"); - // Test <= <, > >= + // Test <= <, > >= verifyParse("testdoctype1.headerval >= 123"); verifyParse("testdoctype1.headerval > 123"); verifyParse("testdoctype1.headerval <= 123"); verifyParse("testdoctype1.headerval < 123"); verifyParse("testdoctype1.headerval != 123"); - // Test defined + // Test defined verifyParse("testdoctype1.headerval", "testdoctype1.headerval != null"); - // Test bools - verifyParse("TRUE"); - verifyParse("FALSE"); + // Test bools + verifyParse("TRUE", "true"); + verifyParse("FALSE", "false"); verifyParse("true"); verifyParse("false"); - verifyParse("faLSe"); - verifyFailedParse("fal se", "ParsingFailedException: Unexpected token at " - "position 4 ('se') in query 'fal se', at"); + verifyParse("faLSe", "false"); - // Test document types + // Test document types verifyParse("testdoctype1"); - verifyFailedParse("mytype", "ParsingFailedException: Document type mytype " - "not found"); verifyParse("_test_doctype3_"); verifyParse("_test_doctype3_._only_in_child_ == 0"); - // Test document id with simple parser. + // Test document id with simple parser. verifySimpleParse("id == \"userdoc:ns:mytest\""); verifySimpleParse("id.namespace == \"myspace\""); verifySimpleParse("id.scheme == \"userdoc\""); @@ -411,7 +447,7 @@ void DocumentSelectParserTest::testParseTerminals() verifySimpleParse("id.user == 1234"); verifySimpleParse("id.user == 0x12456ab", "id.user == 19158699"); - // Test document id + // Test document id verifyParse("id == \"userdoc:ns:mytest\""); verifyParse("id.namespace == \"myspace\""); verifyParse("id.scheme == \"userdoc\""); @@ -429,28 +465,23 @@ void DocumentSelectParserTest::testParseTerminals() "id.bucket == -9223372036854775566"); verifyParse("id.gid == \"gid(0xd755743aea262650274d70f0)\""); - // Test search column - verifyParse("searchcolumn.10 == 2"); - - // Test other operators + // Test other operators verifyParse("id.scheme = \"*doc\""); verifyParse("testdoctype1.hstringval =~ \"(john|barry|shrek)\""); - // Verify functions + // Verify functions verifyParse("id.hash() == 124"); verifyParse("id.specific.hash() == 124"); verifyParse("testdoctype1.hstringval.lowercase() == \"chang\""); verifyParse("testdoctype1.hstringval.lowercase().hash() == 124"); - verifyFailedParse("testdoctype1 == 8", "ParsingFailedException: Unexpected token" - " at position 13 ('== 8') in query 'testdoctype1 == 8', at fullParse in "); verifyParse("testdoctype1.hintval > now()"); verifyParse("testdoctype1.hintval > now().abs()"); - // Value grouping + // Value grouping verifyParse("(123) < (200)"); verifyParse("(\"hmm\") < (id.scheme)"); - // Arithmetics + // Arithmetics verifyParse("1 + 2 > 1"); verifyParse("1 - 2 > 1"); verifyParse("1 * 2 > 1"); @@ -459,11 +490,11 @@ void DocumentSelectParserTest::testParseTerminals() verifyParse("(1 + 2) * (4 - 2) == 1"); verifyParse("23 + 643 / 34 % 10 > 34"); - // CJK stuff + // CJK stuff verifyParse("testdoctype1.hstringval = \"\xE4\xB8\xBA\xE4\xBB\x80\"", "testdoctype1.hstringval = \"\\xe4\\xb8\\xba\\xe4\\xbb\\x80\""); - // Strange doctype names + // Strange doctype names verifyParse("notandor"); verifyParse("ornotand"); verifyParse("andornot"); @@ -475,16 +506,16 @@ void DocumentSelectParserTest::testParseBranches() { createDocs(); - verifyParse("TRUE or FALSE aNd FALSE oR TRUE"); - verifyParse("TRUE and FALSE or FALSE and TRUE"); - verifyParse("TRUE or FALSE and FALSE or TRUE"); - verifyParse("(TRUE or FALSE) and (FALSE or TRUE)"); + verifyParse("TRUE or FALSE aNd FALSE oR TRUE", "true or false and false or true"); + verifyParse("TRUE and FALSE or FALSE and TRUE", "true and false or false and true"); + verifyParse("TRUE or FALSE and FALSE or TRUE", "true or false and false or true"); + verifyParse("(TRUE or FALSE) and (FALSE or TRUE)", "(true or false) and (false or true)"); verifyParse("true or (not false) and not true"); - // Test number branching with node branches + // Test number branching with node branches verifyParse("((243) < 300 and (\"FOO\").lowercase() == (\"foo\"))"); - // Strange doctype names + // Strange doctype names verifyParse("notandor and ornotand"); verifyParse("ornotand or andornot"); verifyParse("not andornot"); @@ -554,7 +585,7 @@ void DocumentSelectParserTest::testOperators0() std::cerr << ost.str() << "\n"; } // */ - // Check that comparison operators work. + // Check that comparison operators work. PARSE("", *_doc[0], True); PARSE("30 < 10", *_doc[0], False); PARSE("10 < 30", *_doc[0], True); @@ -593,7 +624,7 @@ void DocumentSelectParserTest::testOperators1() { createDocs(); - // Mix of types should within numbers, but otherwise not match + // Mix of types should within numbers, but otherwise not match PARSE("30 < 10.2", *_doc[0], False); PARSE("10.2 < 30", *_doc[0], True); PARSE("30 < \"foo\"", *_doc[0], Invalid); @@ -606,7 +637,7 @@ void DocumentSelectParserTest::testOperators1() PARSE("14.3 == null", *_doc[0], False); PARSE("null = 0", *_doc[0], False); - // Field values + // Field values PARSE("testdoctype1.headerval = 24", *_doc[0], True); PARSE("testdoctype1.headerval = 24", *_doc[1], False); PARSE("testdoctype1.headerval = 13", *_doc[0], False); @@ -625,11 +656,11 @@ void DocumentSelectParserTest::testOperators1() PARSE("testdoctype1.byteweightedset == 7", *_doc[1], False); PARSE("testdoctype1.byteweightedset == 5", *_doc[1], True); - // Document types + // Document types PARSE("testdoctype1", *_doc[0], True); PARSE("testdoctype2", *_doc[0], False); - // Inherited doctypes + // Inherited doctypes PARSE("testdoctype2", *_doc[4], True); PARSE("testdoctype2", *_doc[3], False); PARSE("testdoctype1", *_doc[4], True); @@ -640,7 +671,7 @@ void DocumentSelectParserTest::testOperators2() { createDocs(); - // Id values + // Id values PARSEI("id == \"doc:myspace:anything\"", *_doc[0], True); PARSEI(" iD== \"doc:myspace:anything\" ", *_doc[0], True); PARSEI("id == \"doc:myspa:nything\"", *_doc[0], False); @@ -702,16 +733,13 @@ void DocumentSelectParserTest::testOperators3() PARSEI("id.user = 1234", *_doc[8], True); PARSEI("id.group == \"1234\"", *_doc[8], True); PARSEI("id.group == \"mygroup\"", *_doc[9], True); - - // Searchcolumn policy - PARSE("searchcolumn.10 == 8", *_doc[0], True); } void DocumentSelectParserTest::testOperators4() { createDocs(); - // Branch operators + // Branch operators PARSEI("true and false", *_doc[0], False); PARSEI("true and true", *_doc[0], True); PARSEI("true or false", *_doc[0], True); @@ -723,7 +751,7 @@ void DocumentSelectParserTest::testOperators4() PARSEI("true and not false or false", *_doc[0], True); PARSEI("((243 < 300) and (\"FOO\".lowercase() == \"foo\"))", *_doc[0], True); - // Invalid branching. testdoctype1.content = 1 is invalid + // Invalid branching. testdoctype1.content = 1 is invalid PARSE("testdoctype1.content = 1 and true", *_doc[0], Invalid); PARSE("testdoctype1.content = 1 or true", *_doc[0], True); PARSE("testdoctype1.content = 1 and false", *_doc[0], False); @@ -738,7 +766,7 @@ void DocumentSelectParserTest::testOperators5() { createDocs(); - // Functions + // Functions PARSE("testdoctype1.hstringval.lowercase() == \"Yet\"", *_doc[3], False); PARSE("testdoctype1.hstringval.lowercase() == \"yet\"", *_doc[3], True); PARSE("testdoctype1.hfloatval.lowercase() == \"yet\"", *_doc[3], Invalid); @@ -754,7 +782,7 @@ void DocumentSelectParserTest::testOperators5() PARSE("now() < 1311862500", *_doc[10], False); PARSE("now() > 1611862500", *_doc[10], False); - // Arithmetics + // Arithmetics PARSEI("id.specific.hash() % 10 = 8", *_doc[0], True); PARSEI("id.specific.hash() % 10 = 2", *_doc[0], False); PARSEI("\"foo\" + \"bar\" = \"foobar\"", *_doc[0], True); @@ -767,14 +795,18 @@ void DocumentSelectParserTest::testOperators6() { createDocs(); - // CJK - // Assuming the characters " \ ? * is not used as part of CJK tokens + // CJK + // Assuming the characters " \ ? * is not used as part of CJK tokens PARSE("testdoctype1.content=\"\xE4\xB8\xBA\xE4\xBB\x80\"", *_doc[3], True); PARSE("testdoctype1.content=\"\xE4\xB7\xBA\xE4\xBB\x80\"", *_doc[3], False); - // Structs and arrays + // Structs and arrays PARSE("testdoctype1.mystruct", *_doc[0], False); PARSE("testdoctype1.mystruct", *_doc[1], True); + PARSE("(testdoctype1.mystruct)", *_doc[0], False); + PARSE("(testdoctype1.mystruct)", *_doc[1], True); + PARSE("(((testdoctype1.mystruct)))", *_doc[0], False); + PARSE("(((testdoctype1.mystruct)))", *_doc[1], True); PARSE("testdoctype1.mystruct", *_doc[2], False); PARSE("testdoctype1.mystruct == testdoctype1.mystruct", *_doc[0], True); PARSE("testdoctype1.mystruct == testdoctype1.mystruct", *_doc[1], True); @@ -812,6 +844,7 @@ void DocumentSelectParserTest::testOperators7() PARSE("testdoctype1.structarray", *_doc[1], True); PARSE("testdoctype1.structarray.key == 15", *_doc[1], True); PARSE("testdoctype1.structarray[1].key == 16", *_doc[1], True); + PARSE("testdoctype1.structarray[1].key", *_doc[1], True); // "key is set?" expr PARSE("testdoctype1.structarray[1].key = 16", *_doc[1], True); PARSE("testdoctype1.structarray.value == \"structval1\"", *_doc[0], False); PARSE("testdoctype1.structarray[4].value == \"structval1\"", *_doc[0], False); @@ -952,7 +985,6 @@ namespace { void visitArithmeticValueNode(const select::ArithmeticValueNode &) override {} void visitFunctionValueNode(const select::FunctionValueNode &) override {} void visitIdValueNode(const select::IdValueNode &) override {} - void visitSearchColumnValueNode(const select::SearchColumnValueNode &) override {} void visitFieldValueNode(const select::FieldValueNode &) override {} void visitFloatValueNode(const select::FloatValueNode &) override {} void visitVariableValueNode(const select::VariableValueNode &) override {} @@ -977,15 +1009,14 @@ void DocumentSelectParserTest::testVisitor() TestVisitor v; root->visit(v); + std::string expected = - "OR(CONSTANT(true), " - "AND(DOCTYPE(testdoctype1), " - "AND(OR(NOT(COMPARE(id.user = 12)), " - "COMPARE(testdoctype1.hstringval = \"ola\")), " - "COMPARE(testdoctype1.headerval != null)" - ")" - ")" - ")"; + "OR(CONSTANT(true), " + "AND(AND(DOCTYPE(testdoctype1), " + "OR(NOT(COMPARE(id.user = 12)), " + "COMPARE(testdoctype1.hstringval = \"ola\"))), " + "COMPARE(testdoctype1.headerval != null)))"; + CPPUNIT_ASSERT_EQUAL(expected, v.getVisitString()); } @@ -1093,13 +1124,15 @@ void DocumentSelectParserTest::testDocumentUpdates0() PARSEI("\"\" =~ \"\"", *_update[0], True); PARSEI("30 = 10", *_update[0], False); PARSEI("30 = 30", *_update[0], True); + PARSEI("(30 = 10)", *_update[0], False); + PARSEI("(30 = 30)", *_update[0], True); } void DocumentSelectParserTest::testDocumentUpdates1() { createDocs(); - // Mix of types should within numbers, but otherwise not match + // Mix of types should within numbers, but otherwise not match PARSEI("30 < 10.2", *_update[0], False); PARSEI("10.2 < 30", *_update[0], True); PARSEI("30 < \"foo\"", *_update[0], Invalid); @@ -1112,17 +1145,18 @@ void DocumentSelectParserTest::testDocumentUpdates1() PARSEI("14.3 == null", *_update[0], False); PARSEI("null = 0", *_update[0], False); - // Field values + // Field values PARSE("testdoctype1.headerval = 24", *_update[0], Invalid); PARSE("testdoctype1.hfloatval = 2.0", *_update[0], Invalid); PARSE("testdoctype1.content = \"bar\"", *_update[0], Invalid); PARSE("testdoctype1.hstringval == testdoctype1.content", *_update[0], Invalid); - // Document types + // Document types PARSE("testdoctype1", *_update[0], True); + PARSE("(testdoctype1)", *_update[0], True); PARSE("testdoctype2", *_update[0], False); - // Inherited doctypes + // Inherited doctypes PARSE("testdoctype2", *_update[4], True); PARSE("testdoctype2", *_update[3], False); PARSE("testdoctype1", *_update[4], True); @@ -1133,7 +1167,7 @@ void DocumentSelectParserTest::testDocumentUpdates2() { createDocs(); - // Id values + // Id values PARSEI("id == \"doc:myspace:anything\"", *_update[0], True); PARSEI(" iD== \"doc:myspace:anything\" ", *_update[0], True); PARSEI("id == \"doc:myspa:nything\"", *_update[0], False); @@ -1159,7 +1193,7 @@ void DocumentSelectParserTest::testDocumentUpdates3() { createDocs(); - // Branch operators + // Branch operators PARSEI("true and false", *_update[0], False); PARSEI("true and true", *_update[0], True); PARSEI("true or false", *_update[0], True); @@ -1171,7 +1205,7 @@ void DocumentSelectParserTest::testDocumentUpdates3() PARSEI("true and not false or false", *_update[0], True); PARSEI("((243 < 300) and (\"FOO\".lowercase() == \"foo\"))", *_update[0], True); - // Invalid branching. testdoctype1.content = 1 is invalid + // Invalid branching. testdoctype1.content = 1 is invalid PARSE("testdoctype1.content = 1 and true", *_update[0], Invalid); PARSE("testdoctype1.content = 1 or true", *_update[0], True); PARSE("testdoctype1.content = 1 and false", *_update[0], False); @@ -1186,7 +1220,7 @@ void DocumentSelectParserTest::testDocumentUpdates4() { createDocs(); - // Functions + // Functions PARSEI("\"bar\".hash() == -2012135647395072713", *_update[0], True); PARSEI("\"bar\".hash().abs() == 2012135647395072713", *_update[0], True); PARSEI("null.hash() == 123", *_update[0], Invalid); @@ -1195,7 +1229,7 @@ void DocumentSelectParserTest::testDocumentUpdates4() PARSEI("\"foo\".hash() == 123", *_update[0], False); PARSEI("(234).hash() == 123", *_update[0], False); - // Arithmetics + // Arithmetics PARSEI("id.specific.hash() % 10 = 8", *_update[0], True); PARSEI("id.specific.hash() % 10 = 2", *_update[0], False); PARSEI("\"foo\" + \"bar\" = \"foobar\"", *_update[0], True); @@ -1221,7 +1255,7 @@ void DocumentSelectParserTest::testUtf8() } std::unique_ptr<select::FieldValueNode> -DocumentSelectParserTest::parseFieldValue(const std::string expression) { +DocumentSelectParserTest::parseFieldValue(const std::string& expression) { return std::unique_ptr<select::FieldValueNode>(dynamic_cast<select::FieldValueNode *>( dynamic_cast<const select::Compare &>(*_parser->parse(expression)).getLeft().clone().release())); } @@ -1246,4 +1280,296 @@ void DocumentSelectParserTest::testThatComplexFieldValuesHaveCorrectFieldNames() parseFieldValue("testdoctype1.headerval.meow.meow{test}")->getRealFieldName()); } +namespace { + +class OperatorVisitor : public select::Visitor { +private: + std::ostringstream data; +public: + void visitConstant(const select::Constant& node) override { + data << node; + } + + void + visitInvalidConstant(const select::InvalidConstant& node) override { + (void) node; + assert(false); + } + + void visitDocumentType(const select::DocType& node) override { + data << "(DOCTYPE " << node << ")"; + } + + void visitComparison(const select::Compare& node) override { + data << '(' << node.getOperator() << ' '; + node.getLeft().visit(*this); + data << ' '; + node.getRight().visit(*this); + data << ')'; + } + + void visitAndBranch(const select::And& node) override { + data << "(AND "; + node.getLeft().visit(*this); + data << " "; + node.getRight().visit(*this); + data << ")"; + } + + void visitOrBranch(const select::Or& node) override { + data << "(OR "; + node.getLeft().visit(*this); + data << " "; + node.getRight().visit(*this); + data << ")"; + } + + void visitNotBranch(const select::Not& node) override { + data << "(NOT "; + node.getChild().visit(*this); + data << ")"; + } + + void visitArithmeticValueNode(const select::ArithmeticValueNode& node) override { + data << '(' << node.getOperatorName() << ' '; + node.getLeft().visit(*this); + data << ' '; + node.getRight().visit(*this); + data << ')'; + } + void visitFunctionValueNode(const select::FunctionValueNode& node) override { + data << '(' << node.getFunctionName() << ' '; + node.getChild().visit(*this); + data << ')'; + } + void visitIdValueNode(const select::IdValueNode& node) override { + data << "(ID " << node.toString() << ')'; + } + void visitFieldValueNode(const select::FieldValueNode& node) override { + data << "(FIELD " << node.getDocType() << ' ' << node.getFieldName() << ')'; + } + void visitFloatValueNode(const select::FloatValueNode& node) override { + data << node.getValue(); + } + void visitVariableValueNode(const select::VariableValueNode& node) override { + data << "(VAR " << node.getVariableName() << ')'; + } + void visitIntegerValueNode(const select::IntegerValueNode& node) override { + data << node.getValue(); + } + void visitCurrentTimeValueNode(const select::CurrentTimeValueNode&) override {} + void visitStringValueNode(const select::StringValueNode& str) override { + data << '"' << str.getValue() << '"'; + } + void visitNullValueNode(const select::NullValueNode&) override { + data << "null"; + } + void visitInvalidValueNode(const select::InvalidValueNode&) override { + data << "INVALID!"; + } + + std::string visit_string() { return data.str(); } +}; + +template <typename NodeType> +std::string node_to_string(const NodeType& node) { + OperatorVisitor v; + node.visit(v); + return v.visit_string(); +} + +} + +std::string DocumentSelectParserTest::parse_to_tree(const std::string& str) { + std::unique_ptr<select::Node> root(_parser->parse(str)); + return node_to_string(*root); +} + +void DocumentSelectParserTest::test_operator_precedence() { + createDocs(); + using namespace std::string_literals; + + CPPUNIT_ASSERT_EQUAL("(AND true false)"s, parse_to_tree("true and false")); + CPPUNIT_ASSERT_EQUAL("(AND (NOT false) true)"s, parse_to_tree("not false and true")); + CPPUNIT_ASSERT_EQUAL("(NOT (AND false true))"s, parse_to_tree("not (false and true)")); + CPPUNIT_ASSERT_EQUAL("(NOT (DOCTYPE testdoctype1))"s, parse_to_tree("not testdoctype1")); + CPPUNIT_ASSERT_EQUAL("(NOT (DOCTYPE (testdoctype1)))"s, parse_to_tree("not (testdoctype1)")); + CPPUNIT_ASSERT_EQUAL("(NOT (DOCTYPE (testdoctype1)))"s, parse_to_tree("(not (testdoctype1))")); + CPPUNIT_ASSERT_EQUAL("(OR (== 1 2) (== 3 4))"s, parse_to_tree("1==2 or 3==4")); + CPPUNIT_ASSERT_EQUAL("(!= (+ (+ 1 2) 3) 0)"s, parse_to_tree("1+2+3 != 0")); + CPPUNIT_ASSERT_EQUAL("(!= (+ (+ 1.1 2.2) 3.3) 4.4)"s, parse_to_tree("1.1+2.2+3.3 != 4.4")); + CPPUNIT_ASSERT_EQUAL("(!= (- (- 1 2) 3) 0)"s, parse_to_tree("1-2-3 != 0")); + CPPUNIT_ASSERT_EQUAL("(!= (+ (+ 1 2) 3) 0)"s, parse_to_tree("1 + 2 + 3 != 0")); + CPPUNIT_ASSERT_EQUAL("(!= (+ 1 (* 2 3)) 0)"s, parse_to_tree("1 + 2 * 3 != 0")); + CPPUNIT_ASSERT_EQUAL("(!= (- (/ (* 1 2) 3) 4) 0)"s, parse_to_tree("1 * 2 / 3 - 4 != 0")); + CPPUNIT_ASSERT_EQUAL("(!= (/ (* 1 2) (- 3 4)) 0)"s, parse_to_tree("1 * 2 / (3 - 4) != 0")); + CPPUNIT_ASSERT_EQUAL("(OR (AND true (NOT (== 1 2))) false)"s, + parse_to_tree("true and not 1 == 2 or false")); + CPPUNIT_ASSERT_EQUAL("(AND (AND (AND (< 1 2) (> 3 4)) (<= 5 6)) (>= 7 8))"s, + parse_to_tree("1 < 2 and 3 > 4 and 5 <= 6 and 7 >= 8")); + CPPUNIT_ASSERT_EQUAL("(OR (AND (AND (< 1 2) (> 3 4)) (<= 5 6)) (>= 7 8))"s, + parse_to_tree("1 < 2 and 3 > 4 and 5 <= 6 or 7 >= 8")); + CPPUNIT_ASSERT_EQUAL("(OR (AND (< 1 2) (> 3 4)) (AND (<= 5 6) (>= 7 8)))"s, + parse_to_tree("1 < 2 and 3 > 4 or 5 <= 6 and 7 >= 8")); + // Unary plus is simply ignored by the parser. + CPPUNIT_ASSERT_EQUAL("(== 1 -2)"s, parse_to_tree("+1==-2")); + CPPUNIT_ASSERT_EQUAL("(== 1.23 -2.56)"s, parse_to_tree("+1.23==-2.56")); + CPPUNIT_ASSERT_EQUAL("(== (+ 1 2) (- 3 -4))"s, parse_to_tree("1 + +2==3 - -4")); + CPPUNIT_ASSERT_EQUAL("(== (+ 1 2) (- 3 -4))"s, parse_to_tree("1++2==3--4")); + + // Due to the way parentheses are handled by the AST, ((foo)) always gets + // reduced down to (foo). + CPPUNIT_ASSERT_EQUAL("(DOCTYPE (testdoctype1))"s, parse_to_tree("(((testdoctype1)))")); + CPPUNIT_ASSERT_EQUAL("(AND (DOCTYPE (testdoctype1)) (DOCTYPE (testdoctype2)))"s, + parse_to_tree("((((testdoctype1))) and ((testdoctype2)))")); + + CPPUNIT_ASSERT_EQUAL("(== (ID id) \"foo\")"s, parse_to_tree("id == 'foo'")); + CPPUNIT_ASSERT_EQUAL("(== (ID id.group) \"foo\")"s, parse_to_tree("id.group == 'foo'")); + // id_spec function apply + CPPUNIT_ASSERT_EQUAL("(== (hash (ID id)) 12345)"s, parse_to_tree("id.hash() == 12345")); + // Combination of id_spec function apply and arith_expr function apply + CPPUNIT_ASSERT_EQUAL("(== (abs (hash (ID id))) 12345)"s, parse_to_tree("id.hash().abs() == 12345")); +} + +void DocumentSelectParserTest::test_token_used_as_ident_preserves_casing() { + createDocs(); + using namespace std::string_literals; + + // TYPE, SCHEME, ORDER etc are tokens that may also be used as identifiers + // without introducing parsing ambiguities. In this context their original + // casing should be preserved. + CPPUNIT_ASSERT_EQUAL("(== (VAR Type) 123)"s, parse_to_tree("$Type == 123")); + CPPUNIT_ASSERT_EQUAL("(== (VAR giD) 123)"s, parse_to_tree("$giD == 123")); + CPPUNIT_ASSERT_EQUAL("(== (VAR ORDER) 123)"s, parse_to_tree("$ORDER == 123")); +} + +void DocumentSelectParserTest::test_ambiguous_field_spec_expression_is_handled_correctly() { + createDocs(); + using namespace std::string_literals; + // In earlier revisions of LR(1)-grammar, this triggered a reduce/reduce conflict between + // logical_expr and arith_expr for the sequence '(' field_spec ')', which failed to + // parse in an expected manner. Test that we don't get regressions here. + CPPUNIT_ASSERT_EQUAL("(!= (FIELD testdoctype1 foo) null)"s, parse_to_tree("(testdoctype1.foo)")); + CPPUNIT_ASSERT_EQUAL("(AND (!= (FIELD testdoctype1 foo) null) (!= (FIELD testdoctype1 bar) null))"s, + parse_to_tree("(testdoctype1.foo) AND (testdoctype1.bar)")); +} + +void DocumentSelectParserTest::test_can_build_field_value_from_field_expr_node() { + using select::FieldExprNode; + { + // Simple field expression + auto lhs = std::make_unique<FieldExprNode>("mydoctype"); + auto root = std::make_unique<FieldExprNode>(std::move(lhs), "foo"); + auto fv = root->convert_to_field_value(); + CPPUNIT_ASSERT_EQUAL(vespalib::string("mydoctype"), fv->getDocType()); + CPPUNIT_ASSERT_EQUAL(vespalib::string("foo"), fv->getFieldName()); + } + { + // Nested field expression + auto lhs1 = std::make_unique<FieldExprNode>("mydoctype"); + auto lhs2 = std::make_unique<FieldExprNode>(std::move(lhs1), "foo"); + auto root = std::make_unique<FieldExprNode>(std::move(lhs2), "bar"); + auto fv = root->convert_to_field_value(); + CPPUNIT_ASSERT_EQUAL(vespalib::string("mydoctype"), fv->getDocType()); + CPPUNIT_ASSERT_EQUAL(vespalib::string("foo.bar"), fv->getFieldName()); + } +} + +void DocumentSelectParserTest::test_can_build_function_call_from_field_expr_node() { + using select::FieldExprNode; + { + // doctype.foo.lowercase() + // Note that calling lowercase() directly on the doctype is not supported + // (see test_function_call_on_doctype_throws_exception) + auto lhs1 = std::make_unique<FieldExprNode>("mydoctype"); + auto lhs2 = std::make_unique<FieldExprNode>(std::move(lhs1), "foo"); + auto root = std::make_unique<FieldExprNode>(std::move(lhs2), "lowercase"); + auto func = root->convert_to_function_call(); + CPPUNIT_ASSERT_EQUAL(vespalib::string("lowercase"), func->getFunctionName()); + // TODO vespalib::string? + CPPUNIT_ASSERT_EQUAL(std::string("(FIELD mydoctype foo)"), node_to_string(func->getChild())); + } +} + +void DocumentSelectParserTest::test_function_call_on_doctype_throws_exception() { + using select::FieldExprNode; + auto lhs = std::make_unique<FieldExprNode>("mydoctype"); + auto root = std::make_unique<FieldExprNode>(std::move(lhs), "lowercase"); + try { + root->convert_to_function_call(); + } catch (const vespalib::IllegalArgumentException& e) { + CPPUNIT_ASSERT_EQUAL(vespalib::string("Cannot call function 'lowercase' directly on document type"), + e.getMessage()); + } +} + +namespace { + +void check_parse_i64(vespalib::stringref str, bool expect_ok, int64_t expected_output) { + int64_t out = 0; + bool ok = select::util::parse_i64(str.data(), str.size(), out); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Parsing did not returned expected success status for i64 input " + str, expect_ok, ok); + if (expect_ok) { + CPPUNIT_ASSERT_EQUAL_MESSAGE("Parse output not as expected for i64 input " + str, expected_output, out); + } +} + +void check_parse_hex_i64(vespalib::stringref str, bool expect_ok, int64_t expected_output) { + int64_t out = 0; + bool ok = select::util::parse_hex_i64(str.data(), str.size(), out); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Parsing did not returned expected success status for hex i64 input " + str, expect_ok, ok); + if (expect_ok) { + CPPUNIT_ASSERT_EQUAL_MESSAGE("Parse output not as expected for hex i64 input " + str, expected_output, out); + } +} + +void check_parse_double(vespalib::stringref str, bool expect_ok, double expected_output) { + double out = 0; + bool ok = select::util::parse_double(str.data(), str.size(), out); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Parsing did not returned expected success status for hex i64 input " + str, expect_ok, ok); + if (expect_ok) { + CPPUNIT_ASSERT_EQUAL_MESSAGE("Parse output not as expected for double input " + str, expected_output, out); + } +} + +} + +void DocumentSelectParserTest::test_parse_utilities_handle_well_formed_input() { + check_parse_i64("0", true, 0); + check_parse_i64("1", true, 1); + check_parse_i64("9223372036854775807", true, INT64_MAX); + + // Note: 0x prefix is _not_ included + check_parse_hex_i64("0", true, 0); + check_parse_hex_i64("1", true, 1); + check_parse_hex_i64("f", true, 15); + check_parse_hex_i64("F", true, 15); + check_parse_hex_i64("ffffffff", true, UINT32_MAX); + check_parse_hex_i64("7FFFFFFFFFFFFFFF", true, INT64_MAX); + // We actually parse as u64 internally, then convert + check_parse_hex_i64("ffffffffffffffff", true, -1); + + check_parse_double("1.0", true, 1.0); + check_parse_double("1.", true, 1.0); + check_parse_double("1.79769e+308", true, 1.79769e+308); // DBL_MAX +} + +void DocumentSelectParserTest::test_parse_utilities_handle_malformed_input() { + check_parse_i64("9223372036854775808", false, 0); // INT64_MAX + 1 + check_parse_i64("18446744073709551615", false, 0); // UINT64_MAX + check_parse_i64("", false, 0); + check_parse_i64("bjarne", false, 0); + check_parse_i64("1x", false, 0); + + check_parse_hex_i64("", false, 0); + check_parse_hex_i64("g", false, 0); + check_parse_hex_i64("0x1", false, 0); + check_parse_hex_i64("ffffffffffffffff1", false, 0); + + check_parse_double("1.x", false, 0.0); + // TODO double outside representable range returns Inf, but we probably would + // like this to trigger a parse failure? + check_parse_double("1.79769e+309", true, std::numeric_limits<double>::infinity()); +} + } // document diff --git a/document/src/vespa/document/bucket/bucketselector.cpp b/document/src/vespa/document/bucket/bucketselector.cpp index 5ded691269a..ceb231a483c 100644 --- a/document/src/vespa/document/bucket/bucketselector.cpp +++ b/document/src/vespa/document/bucket/bucketselector.cpp @@ -137,27 +137,6 @@ using namespace document::select; } } - void compare(const select::SearchColumnValueNode& node, - const select::ValueNode& valnode, - const select::Operator& op) { - if (op == FunctionOperator::EQ || op == document::select::GlobOperator::GLOB) { - int bucketCount = 1 << 16; - const IntegerValueNode* val( - dynamic_cast<const IntegerValueNode*>(&valnode)); - - int64_t rval = val->getValue(); - - for (int i = 0; i < bucketCount; i++) { - int64_t column = node.getValue(BucketId(16, i)); - if (column == rval) { - _buckets.push_back(BucketId(16, i)); - } - } - - _unknown = false; - } - } - void visitComparison(const document::select::Compare& node) override { if (node.getOperator() != document::select::FunctionOperator::EQ && node.getOperator() != document::select::GlobOperator::GLOB) @@ -166,12 +145,8 @@ using namespace document::select; } const IdValueNode* lid(dynamic_cast<const IdValueNode*>( &node.getLeft())); - const SearchColumnValueNode* sc(dynamic_cast<const SearchColumnValueNode*>( - &node.getLeft())); if (lid) { compare(*lid, node.getRight(), node.getOperator()); - } else if (sc) { - compare(*sc, node.getRight(), node.getOperator()); } else { const IdValueNode* rid(dynamic_cast<const IdValueNode*>( &node.getRight())); @@ -187,7 +162,6 @@ using namespace document::select; void visitArithmeticValueNode(const ArithmeticValueNode &) override {} void visitFunctionValueNode(const FunctionValueNode &) override {} void visitIdValueNode(const IdValueNode &) override {} - void visitSearchColumnValueNode(const SearchColumnValueNode &) override {} void visitFieldValueNode(const FieldValueNode &) override {} void visitFloatValueNode(const FloatValueNode &) override {} void visitVariableValueNode(const VariableValueNode &) override {} diff --git a/document/src/vespa/document/select/.gitignore b/document/src/vespa/document/select/.gitignore index 5f004816692..919eb5c7ca9 100644 --- a/document/src/vespa/document/select/.gitignore +++ b/document/src/vespa/document/select/.gitignore @@ -2,3 +2,6 @@ Makefile .depend* .*.swp *.So +*.hxx +*.cxx +*.hh diff --git a/document/src/vespa/document/select/CMakeLists.txt b/document/src/vespa/document/select/CMakeLists.txt index 0e94fa0f530..6dadd35e98a 100644 --- a/document/src/vespa/document/select/CMakeLists.txt +++ b/document/src/vespa/document/select/CMakeLists.txt @@ -1,4 +1,14 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +find_package(BISON REQUIRED) +find_package(FLEX REQUIRED) + +BISON_TARGET(DocSelParser grammar/parser.yy ${CMAKE_CURRENT_BINARY_DIR}/parser.cxx) +FLEX_TARGET(DocSelLexer grammar/lexer.ll ${CMAKE_CURRENT_BINARY_DIR}/lexer.cxx) + +ADD_FLEX_BISON_DEPENDENCY(DocSelLexer DocSelParser) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + vespa_add_library(document_select OBJECT SOURCES bodyfielddetector.cpp @@ -13,7 +23,6 @@ vespa_add_library(document_select OBJECT operator.cpp orderingselector.cpp orderingspecification.cpp - parser.cpp result.cpp resultset.cpp resultlist.cpp @@ -22,6 +31,14 @@ vespa_add_library(document_select OBJECT value.cpp valuenode.cpp valuenodes.cpp + parser.cpp + parse_utils.cpp + parsing_failed_exception.cpp + ${BISON_DocSelParser_OUTPUTS} + ${FLEX_DocSelLexer_OUTPUTS} AFTER document_documentconfig ) + +#TODO Remove once we have a recently new flex compiler. At least 2.5.38/39 or 2.6 +set_source_files_properties(${FLEX_DocSelLexer_OUTPUTS} PROPERTIES COMPILE_FLAGS -Wno-register) diff --git a/document/src/vespa/document/select/branch.cpp b/document/src/vespa/document/select/branch.cpp index 7f6ad252471..5b28fad0df8 100644 --- a/document/src/vespa/document/select/branch.cpp +++ b/document/src/vespa/document/select/branch.cpp @@ -9,7 +9,7 @@ namespace document { namespace select { And::And(std::unique_ptr<Node> left, std::unique_ptr<Node> right, const char* name) - : Branch(name ? name : "AND"), + : Branch(name ? name : "and"), _left(std::move(left)), _right(std::move(right)) { @@ -55,7 +55,7 @@ And::trace(const Context& context, std::ostream& out) const } Or::Or(std::unique_ptr<Node> left, std::unique_ptr<Node> right, const char* name) - : Branch(name ? name : "OR"), + : Branch(name ? name : "or"), _left(std::move(left)), _right(std::move(right)) { @@ -101,7 +101,7 @@ Or::trace(const Context& context, std::ostream& out) const } Not::Not(std::unique_ptr<Node> child, const char* name) - : Branch(name ? name : "NOT"), + : Branch(name ? name : "not"), _child(std::move(child)) { assert(_child.get()); diff --git a/document/src/vespa/document/select/cloningvisitor.cpp b/document/src/vespa/document/select/cloningvisitor.cpp index 4011cbdeea1..d695e3ec83d 100644 --- a/document/src/vespa/document/select/cloningvisitor.cpp +++ b/document/src/vespa/document/select/cloningvisitor.cpp @@ -162,7 +162,7 @@ CloningVisitor::visitConstant(const Constant &expr) _priority = ConstPriority; bool val = expr.getConstantValue(); _resultSet.add(val ? Result::True : Result::False); - _node.reset(new Constant(val ? "true" : "false")); + _node.reset(new Constant(val)); } @@ -199,16 +199,6 @@ CloningVisitor::visitIdValueNode(const IdValueNode &expr) void -CloningVisitor::visitSearchColumnValueNode(const SearchColumnValueNode &expr) -{ - _constVal = false; - ++_fieldNodes; // needs document id, thus needs document - _valueNode = expr.clone(); - _priority = SearchColPriority; -} - - -void CloningVisitor::visitFieldValueNode(const FieldValueNode &expr) { _constVal = false; diff --git a/document/src/vespa/document/select/cloningvisitor.h b/document/src/vespa/document/select/cloningvisitor.h index ff74af1201d..2c5f94c20a4 100644 --- a/document/src/vespa/document/select/cloningvisitor.h +++ b/document/src/vespa/document/select/cloningvisitor.h @@ -60,7 +60,6 @@ public: void visitInvalidConstant(const InvalidConstant &expr) override; void visitDocumentType(const DocType &expr) override; void visitIdValueNode(const IdValueNode &expr) override; - void visitSearchColumnValueNode(const SearchColumnValueNode &expr) override; void visitFieldValueNode(const FieldValueNode &expr) override; void visitFloatValueNode(const FloatValueNode &expr) override; void visitVariableValueNode(const VariableValueNode &expr) override; diff --git a/document/src/vespa/document/select/constant.cpp b/document/src/vespa/document/select/constant.cpp index 02821180337..8d1445f40a0 100644 --- a/document/src/vespa/document/select/constant.cpp +++ b/document/src/vespa/document/select/constant.cpp @@ -7,28 +7,10 @@ namespace document::select { -Constant::Constant(const vespalib::stringref & value) - : Node(value), - _value(false) +Constant::Constant(bool value) + : Node(value ? "true" : "false"), // TODO remove required name from Node + _value(value) { - if (value.size() == 4 && - (value[0] & 0xdf) == 'T' && - (value[1] & 0xdf) == 'R' && - (value[2] & 0xdf) == 'U' && - (value[3] & 0xdf) == 'E') - { - _value = true; - } else if (value.size() == 5 && - (value[0] & 0xdf) == 'F' && - (value[1] & 0xdf) == 'A' && - (value[2] & 0xdf) == 'L' && - (value[3] & 0xdf) == 'S' && - (value[4] & 0xdf) == 'E') - { - _value = false; - } else { - assert(false); - } } ResultList diff --git a/document/src/vespa/document/select/constant.h b/document/src/vespa/document/select/constant.h index 08be5c95ec7..46a98ed7eaa 100644 --- a/document/src/vespa/document/select/constant.h +++ b/document/src/vespa/document/select/constant.h @@ -5,9 +5,8 @@ * * @brief Class describing a constant in the select tree. * - * @author H�kon Humberset + * @author Håkon Humberset * @date 2005-06-07 - * @version $Id$ */ #pragma once @@ -23,7 +22,7 @@ private: bool _value; public: - explicit Constant(const vespalib::stringref & value); + explicit Constant(bool value); ResultList contains(const Context&) const override { return ResultList(Result::get(_value)); @@ -32,8 +31,8 @@ public: ResultList trace(const Context&, std::ostream& trace) const override; void print(std::ostream& out, bool verbose, const std::string& indent) const override; void visit(Visitor& v) const override; - bool getConstantValue() const { return _value; } - Node::UP clone() const override { return wrapParens(new Constant(_name)); } + bool getConstantValue() const noexcept { return _value; } + Node::UP clone() const override { return wrapParens(new Constant(_value)); } }; diff --git a/document/src/vespa/document/select/gid_filter.cpp b/document/src/vespa/document/select/gid_filter.cpp index ce3045564ba..71a57a0886a 100644 --- a/document/src/vespa/document/select/gid_filter.cpp +++ b/document/src/vespa/document/select/gid_filter.cpp @@ -22,7 +22,6 @@ struct NoOpVisitor : Visitor { void visitArithmeticValueNode(const ArithmeticValueNode&) override {} void visitFunctionValueNode(const FunctionValueNode&) override {} void visitIdValueNode(const IdValueNode&) override {} - void visitSearchColumnValueNode(const SearchColumnValueNode&) override {} void visitFieldValueNode(const FieldValueNode&) override {} void visitFloatValueNode(const FloatValueNode&) override {} void visitVariableValueNode(const VariableValueNode&) override {} diff --git a/document/src/vespa/document/select/grammar/lexer.ll b/document/src/vespa/document/select/grammar/lexer.ll new file mode 100644 index 00000000000..8cd5638c122 --- /dev/null +++ b/document/src/vespa/document/select/grammar/lexer.ll @@ -0,0 +1,182 @@ + /* Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. */ + + /* We use the .*xx-suffix to denote a build-time generated file */ +%option outfile="lexer.cxx" +%option header-file="lexer.hxx" + +%option c++ + /* Uncomment to enable debug tracing of parsing */ + /* %option debug */ +%option 8bit warn nodefault +%option noyywrap nounput +%option yyclass="document::select::DocSelScanner" + + /* Used to track source locations, see https://github.com/bingmann/flex-bison-cpp-example/blob/master/src/scanner.ll */ +%{ +#define YY_USER_ACTION yyloc->columns(yyleng); +%} + +%{ + +#include "parser.hxx" +#include <vespa/document/select/scanner.h> +#include <vespa/document/select/parse_utils.h> +#include <vespa/document/util/stringutil.h> +#include <vespa/vespalib/stllike/string.h> +#include <string> +#include <cstdlib> + +#undef YY_DECL +#define YY_DECL int document::select::DocSelScanner::yylex( \ + document::select::DocSelParser::semantic_type* yylval, \ + document::select::DocSelParser::location_type* yyloc) + +using token = document::select::DocSelParser::token; +using string = vespalib::string; + +// Inspired by https://coldfix.eu/2015/05/16/bison-c++11/ + +#define YIELD_TOKEN(name, field_name, value) \ + yylval->field_name = value; \ + return token::T_##name; + +#define INT_TOKEN(name, value) YIELD_TOKEN(name, i64_val, value) +#define STRING_TOKEN(name) YIELD_TOKEN(name, string_val, new string(yytext, yyleng)) +#define CONST_STR_TOKEN(name, value) YIELD_TOKEN(name, const_str_val, value) +#define TAGGED_TOKEN INT_TOKEN + +#define NAMED_TOKEN(name) return token::T_##name; + +%} + + /* Lexer fragments, used as part of token patterns */ + +SIGN [+-] +DECIMAL [0-9]+ +HEXDIGIT [0-9a-fA-F] +HEX 0[xX]{HEXDIGIT}{1,16} +OCTAL 0[0-7]* +EXPONENT [eE][+-]?[0-9]+ +IDCHARS [a-zA-Z_][a-zA-Z_0-9_]* +WS [ \f\r\t] + + /* It is weird that you can't do \' inside "" and vice versa, but that's the StringUtil::unescape logic today... */ +DQ_STRING \"(\\([\\tnfr"]|x{HEXDIGIT}{2})|[^"\\])*\" +SQ_STRING \'(\\([\\tnfr']|x{HEXDIGIT}{2})|[^'\\])*\' + +%% + + /* Code to take place at the beginning of yylex() */ +%{ + // TODO move to YY_USER_ACTION instead? + yyloc->step(); +%} + + /* TODO support length suffixes? supported in JavaCC grammar, but not in legacy Spirit grammar... */ +{HEX} { + // TODO replace with std::from_string() once compiler support is there + if (!util::parse_hex_i64(yytext + 2, yyleng - 2, yylval->i64_val)) { // Skip 0[xX] prefix + throw_parser_syntax_error(*yyloc, "Not a valid 64-bit hex integer: " + std::string(yytext, yyleng)); + } + return token::T_INTEGER; +} + + /* Sign is handled explicitly in the parser to avoid lexing ambiguities for expressions such as "1 -2" */ +{DECIMAL} { + if (!util::parse_i64(yytext, yyleng, yylval->i64_val)) { + throw_parser_syntax_error(*yyloc, "Not a valid signed 64-bit integer: " + std::string(yytext, yyleng)); + } + return token::T_INTEGER; +} + + /* + * We use a strict definition of floats when lexing, i.e. we require a dot + * in order to remove ambiguities with the base 10 integer token. + */ +[0-9]+(\.[0-9]*){EXPONENT}?[fFdD]? { + if (!util::parse_double(yytext, yyleng, yylval->double_val)) { + throw_parser_syntax_error(*yyloc, "Not a valid floating point number: " + std::string(yytext, yyleng)); + } + return token::T_FLOAT; +} + +({DQ_STRING}|{SQ_STRING}) { + // Always slice off start and end quote chars + yylval->string_val = new string(yytext + 1, yyleng - 2); + return token::T_STRING; +} + + /* FIXME this is a syntactic hack to "flatten" fieldpath map and array lookups into a single token + rather than match these structurally in the parser itself. This is due to the way fieldpaths + are handled in the legacy AST (i.e. as strings, not structures), and this must be changed first + before we can fix this. */ + /* Field path expressions do not support any other escapes than double quote char */ + /* TODO {WS} does not include newline, do we need to support that here? */ +\{{WS}*($?{IDCHARS}|{DECIMAL}|\"([^\\\"]|\\\")*\"){WS}*\} STRING_TOKEN(FP_MAP_LOOKUP) +\[{WS}*(${IDCHARS}|{DECIMAL}){WS}*\] STRING_TOKEN(FP_ARRAY_LOOKUP) + + /* Primary tokens are case insensitive */ +(?i:"id") NAMED_TOKEN(ID) +(?i:"null") NAMED_TOKEN(NULL) +(?i:"true") NAMED_TOKEN(TRUE) +(?i:"false") NAMED_TOKEN(FALSE) +(?i:"and") NAMED_TOKEN(AND) +(?i:"or") NAMED_TOKEN(OR) +(?i:"not") NAMED_TOKEN(NOT) + + /* We expose the verbatim input as the token value, as these may also be used for identifiers... */ +(?i:"user") STRING_TOKEN(USER) +(?i:"group") STRING_TOKEN(GROUP) +(?i:"scheme") STRING_TOKEN(SCHEME) +(?i:"namespace") STRING_TOKEN(NAMESPACE) +(?i:"specific") STRING_TOKEN(SPECIFIC) +(?i:"bucket") STRING_TOKEN(BUCKET) +(?i:"gid") STRING_TOKEN(GID) +(?i:"type") STRING_TOKEN(TYPE) +(?i:"order") STRING_TOKEN(ORDER) + +"now\(\)" NAMED_TOKEN(NOW_FUNC) /* This _is_ case-sensitive in the legacy parser */ + + /* Binary operators */ + /* TODO INT_TOKEN with code directly from selection operator node? Or direct operator object ptr? */ +"=" NAMED_TOKEN(GLOB) +"=~" NAMED_TOKEN(REGEX) +"==" NAMED_TOKEN(EQ) +"!=" NAMED_TOKEN(NE) +">=" NAMED_TOKEN(GE) +"<=" NAMED_TOKEN(LE) +">" NAMED_TOKEN(GT) +"<" NAMED_TOKEN(LT) + +"$" NAMED_TOKEN(DOLLAR) +"." NAMED_TOKEN(DOT) +"(" NAMED_TOKEN(LPAREN) +")" NAMED_TOKEN(RPAREN) +"," NAMED_TOKEN(COMMA) +"+" NAMED_TOKEN(PLUS) +"-" NAMED_TOKEN(MINUS) +"*" NAMED_TOKEN(MULTIPLY) +"/" NAMED_TOKEN(DIVIDE) +"%" NAMED_TOKEN(MODULO) + +{IDCHARS} STRING_TOKEN(IDENTIFIER) + +\n { + yyloc->lines(yyleng); + yyloc->step(); + return yytext[0]; +} + +{WS} { + yyloc->step(); +} + + /* + * Everything that hasn't already matched is an error. Throw exception immediately with the exact + * char to avoid getting auto-generated error messages with "unexpected $undefined" due to the + * resulting token not matching any existing, explicitly named tokens. + */ +. { throw_parser_syntax_error(*yyloc, "Unexpected character: '" + StringUtil::escape(vespalib::string(yytext, 1)) + "'"); } + +%% + diff --git a/document/src/vespa/document/select/grammar/parser.yy b/document/src/vespa/document/select/grammar/parser.yy new file mode 100644 index 00000000000..baf987355c9 --- /dev/null +++ b/document/src/vespa/document/select/grammar/parser.yy @@ -0,0 +1,374 @@ + /* Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. */ + +%output "parser.cxx" +%defines "parser.hxx" + + /* Skeleton implementation included as part of the generated source. Note: _not_ covered by the GPL. */ +%skeleton "lalr1.cc" + +%require "3.0" + + /* Uncomment to enable debugging of lexer invocations */ + /*%debug*/ + +%locations + +%define parse.error verbose +%define parse.assert + +%define api.token.prefix {T_} +%define api.namespace {document::select} +%define parser_class_name {DocSelParser} + + /* + * Due to current Bison variant support not being move-enabled (and our AST ptrs being move-only), + * we have to use good old POD unions for our rule results. Note that we have to use %destructor + * for all ptrs to ensure cleanup. + */ +%union { + int64_t i64_val; + double double_val; + const char* const_str_val; + vespalib::string* string_val; + Constant* constant_node; + ValueNode* value_node; + FieldExprNode* field_expr_node; + Node* abstract_node; +} + +%token END 0 "end of input" +%token NULL +%token TRUE +%token FALSE +%token AND +%token OR +%token NOT + + /* Specify aliases for several tokens for ease of use and better error reporting */ +%token GLOB "=" +%token REGEX "=~" +%token EQ "==" +%token NE "!=" +%token GE ">=" +%token LE "<=" +%token GT ">" +%token LT "<" +%token ID +%token NOW_FUNC + + /* + * Tokens that we only mention by alias in the grammar rules, but which we define + * explicitly to improve error reporting + */ +%token DOLLAR "$" +%token DOT "." +%token LPAREN "(" +%token RPAREN ")" +%token COMMA "," +%token PLUS "+" +%token MINUS "-" +%token MULTIPLY "*" +%token DIVIDE "/" +%token MODULO "%" + +%token <string_val> IDENTIFIER +%token <string_val> STRING +%token <string_val> FP_MAP_LOOKUP FP_ARRAY_LOOKUP +%token <double_val> FLOAT +%token <i64_val> INTEGER +%token <string_val> USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE ORDER + +%type <string_val> ident mangled_ident +%type <abstract_node> bool_ + /* TODO 'leaf' is a bad name for something that isn't a leaf... */ +%type <abstract_node> expression comparison logical_expr leaf doc_type +%type <string_val> id_arg +%type <value_node> number null_ value string arith_expr id_spec variable +%type <field_expr_node> field_spec + +%destructor { delete $$; } IDENTIFIER STRING FP_MAP_LOOKUP FP_ARRAY_LOOKUP +%destructor { delete $$; } USER GROUP SCHEME NAMESPACE SPECIFIC BUCKET GID TYPE ORDER +%destructor { delete $$; } null_ bool_ number string doc_type ident id_arg id_spec +%destructor { delete $$; } variable mangled_ident field_spec value arith_expr +%destructor { delete $$; } comparison leaf logical_expr expression + +%start entry + +%parse-param {DocSelScanner& scanner} +%parse-param {const BucketIdFactory& bucket_id_factory} +%parse-param {const DocumentTypeRepo& doc_type_repo} +%parse-param {std::unique_ptr<Node>& recv_expr} + + /* Generated parser header file verbatim */ +%code requires { + +#include "location.hh" +#include <vespa/document/select/constant.h> +#include <vespa/document/select/branch.h> +#include <vespa/document/select/compare.h> +#include <vespa/document/select/valuenodes.h> +#include <vespa/vespalib/stllike/string.h> +#include <memory> + +namespace document { +class BucketIdFactory; +class DocumentTypeRepo; +} + +namespace document::select { +class DocSelScanner; +class Node; +class Constant; +class ValueNode; +} + +} + +%code { + +// Bison has some chunky destructors that trigger inlining warnings. Disable warning +// for this translation unit, since we can't really do much about the code it generates. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Winline" + +#include <vespa/document/bucket/bucketidfactory.h> +#include <vespa/document/repo/documenttyperepo.h> +#include <vespa/document/select/scanner.h> +#include <vespa/document/select/constant.h> +#include <vespa/document/select/branch.h> +#include <vespa/document/select/compare.h> +#include <vespa/document/select/doctype.h> +#include <vespa/document/select/valuenodes.h> +#include <vespa/document/util/stringutil.h> +#include <vespa/vespalib/util/exceptions.h> +#include <string> +#include <iostream> +#include <sstream> +#include <memory> + +using string = vespalib::string; + +// Wrap grabbing pointers from sub-rules in a way that nulls out the +// stored attribute from the Bison stack. Otherwise, exception cleanup +// in the parser code will attempt to double-delete the pointee. +// Yes, it's not beautiful, but that's life when you're dealing with raw pointers. +template <typename T> +std::unique_ptr<T> steal(T*& ptr) noexcept { + std::unique_ptr<T> owned(ptr); + ptr = nullptr; + return owned; +} + +// yylex tokenization must defer to scanner instance given to parser +#undef yylex +#define yylex scanner.yylex + +} + +%code provides { + +// This cute little indirection is to get around the syntax_error constructor +// being defined as inline and therefore not being available outside the +// auto-generated parser source file. +[[noreturn]] void throw_parser_syntax_error(const document::select::DocSelParser::location_type& loc, + const std::string& msg); + +} + + +%left OR +%left AND +%left EQ NE LT GT LE GE GLOB REGEX +%left PLUS MINUS +%left MULTIPLY DIVIDE +%left MODULO /* Matches legacy parser recursive descent precedence */ +%precedence NEG +%right UNOT +%left NON_DOT +%precedence DOT /* Used to give higher precedence to id.foo vs id expressions. Re: "dangling else" problem */ + +%% + +null_ + : NULL { $$ = new NullValueNode(); } + ; + +bool_ + : TRUE { $$ = new Constant(true); } + | FALSE { $$ = new Constant(false); } + ; + +number + : INTEGER { $$ = new IntegerValueNode($1, false); } + | FLOAT { $$ = new FloatValueNode($1); } + ; + +string + : STRING { { + try { + $$ = new StringValueNode(StringUtil::unescape(*steal<string>($1))); + } catch (const vespalib::IllegalArgumentException& exc) { + throw syntax_error(@$, exc.getMessage()); + } + } } + ; + +doc_type + : ident { + if (doc_type_repo.getDocumentType(*$1) == nullptr) { + throw syntax_error(@$, vespalib::make_string("Document type '%s' not found", $1->c_str())); + } + $$ = new DocType(*steal<string>($1)); + } + ; + +ident + : IDENTIFIER { $$ = $1; } + | SCHEME { $$ = $1; } + | TYPE { $$ = $1; } + | NAMESPACE { $$ = $1; } + | SPECIFIC { $$ = $1; } + | BUCKET { $$ = $1; } + | GID { $$ = $1; } + | ORDER { $$ = $1; } + ; + +id_arg + : USER { $$ = $1; } + | GROUP { $$ = $1; } + | SCHEME { $$ = $1; } + | NAMESPACE { $$ = $1; } + | SPECIFIC { $$ = $1; } + | BUCKET { $$ = $1; } + | GID { $$ = $1; } + | TYPE { $$ = $1; } + ; + +id_spec + : ID %prec NON_DOT { $$ = new IdValueNode(bucket_id_factory, "id", ""); } /* Prefer shifting instead of reducing */ + | ID "." id_arg { $$ = new IdValueNode(bucket_id_factory, "id", *steal<string>($3)); } + | ID "." IDENTIFIER "(" ")" { $$ = new FunctionValueNode(*steal<string>($3), std::make_unique<IdValueNode>(bucket_id_factory, "id", "")); } + | ID "." ORDER "(" INTEGER "," INTEGER ")" { $$ = new IdValueNode(bucket_id_factory, "id", *steal<string>($3), $5, $7); } + ; + +variable + : "$" ident { $$ = new VariableValueNode(*steal<string>($2)); } + ; + + /* FIXME this is a horrible leftover of post-parsed fieldpath processing */ + /* At least we verify structural integrity at initial parse-time now... */ + /* Post-parsing should be replaced with an actual parse-time built AST! */ +mangled_ident + : ident { $$ = $1; } + | mangled_ident FP_MAP_LOOKUP { $1->append(*steal<string>($2)); $$ = $1; } + | mangled_ident FP_ARRAY_LOOKUP { $1->append(*steal<string>($2)); $$ = $1; } + ; + +field_spec + : ident "." mangled_ident { + if (doc_type_repo.getDocumentType(*$1) == nullptr) { + throw syntax_error(@$, vespalib::make_string("Document type '%s' not found", $1->c_str())); + } + $$ = new FieldExprNode(std::make_unique<FieldExprNode>(*steal<string>($1)), *steal<string>($3)); + } + | field_spec "." mangled_ident { $$ = new FieldExprNode(steal<FieldExprNode>($1), *steal<string>($3)); } + ; + +value + : null_ { $$ = $1; } + | string { $$ = $1; } + | id_spec { $$ = $1; } + | variable { $$ = $1; } + | NOW_FUNC { $$ = new CurrentTimeValueNode(); } + ; + +arith_expr + : value { $$ = $1; } + | number { $$ = $1; } + /* JavaCC and legacy parsers don't support unary plus/minus for _expressions_, just for numbers. So we have to fudge this a bit. */ + | "-" number %prec NEG { + if (dynamic_cast<IntegerValueNode*>($2) != nullptr) { + $$ = new IntegerValueNode(- static_cast<IntegerValueNode&>(*steal<ValueNode>($2)).getValue(), false); + } else { + $$ = new FloatValueNode(- dynamic_cast<FloatValueNode&>(*steal<ValueNode>($2)).getValue()); + } + } + | "+" number %prec NEG { $$ = $2; } + | field_spec { $$ = steal<FieldExprNode>($1)->convert_to_field_value().release(); } + | field_spec "(" ")" { $$ = steal<FieldExprNode>($1)->convert_to_function_call().release(); } + | arith_expr "+" arith_expr { $$ = new ArithmeticValueNode(steal<ValueNode>($1), "+", steal<ValueNode>($3)); } + | arith_expr "-" arith_expr { $$ = new ArithmeticValueNode(steal<ValueNode>($1), "-", steal<ValueNode>($3)); } + | arith_expr "*" arith_expr { $$ = new ArithmeticValueNode(steal<ValueNode>($1), "*", steal<ValueNode>($3)); } + | arith_expr "/" arith_expr { $$ = new ArithmeticValueNode(steal<ValueNode>($1), "/", steal<ValueNode>($3)); } + | arith_expr "%" arith_expr { $$ = new ArithmeticValueNode(steal<ValueNode>($1), "%", steal<ValueNode>($3)); } + | "(" arith_expr ")" { $$ = $2; $$->setParentheses(); } + | arith_expr "." IDENTIFIER "(" ")" { $$ = new FunctionValueNode(*steal<string>($3), steal<ValueNode>($1)); } /* FIXME shift/reduce conflict */ + ; + +comparison + : arith_expr EQ arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::EQ, steal<ValueNode>($3), bucket_id_factory); } + | arith_expr NE arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::NE, steal<ValueNode>($3), bucket_id_factory); } + | arith_expr GE arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::GEQ, steal<ValueNode>($3), bucket_id_factory); } + | arith_expr LE arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::LEQ, steal<ValueNode>($3), bucket_id_factory); } + | arith_expr GT arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::GT, steal<ValueNode>($3), bucket_id_factory); } + | arith_expr LT arith_expr { $$ = new Compare(steal<ValueNode>($1), FunctionOperator::LT, steal<ValueNode>($3), bucket_id_factory); } + | arith_expr GLOB arith_expr { $$ = new Compare(steal<ValueNode>($1), GlobOperator::GLOB, steal<ValueNode>($3), bucket_id_factory); } + | arith_expr REGEX arith_expr { $$ = new Compare(steal<ValueNode>($1), RegexOperator::REGEX, steal<ValueNode>($3), bucket_id_factory); } + ; + +leaf + : bool_ { $$ = $1; } + | comparison { $$ = $1; } + | doc_type { $$ = $1; } + | arith_expr { /* Actually field_spec, see comment below..! */ + // Grammar-wise, we _do not_ accept arbitrary arith_exprs at this level. But the + // selection grammar as it stands is otherwise ambiguous with LR(1) parsing. + // More specifically, if we used field_spec instead of arith_expr here, the parser + // state machine cannot decide what to do if it has processed the sequence '(' field_spec + // and sees the next token of ')'. Since both logical_expr and arith_expr allows for + // parenthesis expression recursion, the reduce step may produce either of these and + // is therefore technically undefined. By using arith_expr instead for this rule, all + // '(' field_spec ')' sequences result in an arith_expr rule match and the reduce/reduce + // conflict goes away. We can then do a sneaky "run-time" type check to ensure we only + // get the expected node from the rule. + // It's not pretty, but it avoids an undefined grammar (which is much less pretty!). + auto node = steal<ValueNode>($1); + if (dynamic_cast<FieldValueNode*>(node.get()) == nullptr) { + throw syntax_error(@$, "expected field spec, doctype, bool or comparison"); + } + // Implicit rewrite to non-null comparison node + $$ = new Compare(std::move(node), + FunctionOperator::NE, + std::make_unique<NullValueNode>(), + bucket_id_factory); + } + ; + +logical_expr + : leaf { $$ = $1; } + | logical_expr AND logical_expr { $$ = new And(steal<Node>($1), steal<Node>($3)); } + | logical_expr OR logical_expr { $$ = new Or(steal<Node>($1), steal<Node>($3)); } + | NOT logical_expr %prec UNOT { $$ = new Not(steal<Node>($2)); } + | "(" logical_expr ")" { $$ = $2; $$->setParentheses(); } + ; + +expression + : logical_expr { $$ = $1; } + ; + +entry + : expression END { recv_expr = steal<Node>($1); } + | END { recv_expr = std::make_unique<Constant>(true); } + ; + +%% + +void document::select::DocSelParser::error(const location_type& l, const std::string& what) { + throw syntax_error(l, what); +} + +void throw_parser_syntax_error(const document::select::DocSelParser::location_type& loc, const std::string& msg) { + throw document::select::DocSelParser::syntax_error(loc, msg); +} + +#pragma GCC diagnostic pop diff --git a/document/src/vespa/document/select/node.h b/document/src/vespa/document/select/node.h index eab46e824c8..83e2ea3542d 100644 --- a/document/src/vespa/document/select/node.h +++ b/document/src/vespa/document/select/node.h @@ -5,9 +5,8 @@ * * @brief Base class for all nodes in the document selection tree. * - * @author H�kon Humberset + * @author Håkon Humberset * @date 2005-06-07 - * @version $Id$ */ #pragma once diff --git a/document/src/vespa/document/select/orderingselector.cpp b/document/src/vespa/document/select/orderingselector.cpp index bf8e96c0533..836647aab26 100644 --- a/document/src/vespa/document/select/orderingselector.cpp +++ b/document/src/vespa/document/select/orderingselector.cpp @@ -137,7 +137,6 @@ namespace { void visitArithmeticValueNode(const ArithmeticValueNode &) override {} void visitFunctionValueNode(const FunctionValueNode &) override {} void visitIdValueNode(const IdValueNode &) override {} - void visitSearchColumnValueNode(const SearchColumnValueNode &) override {} void visitFieldValueNode(const FieldValueNode &) override {} void visitFloatValueNode(const FloatValueNode &) override {} void visitVariableValueNode(const VariableValueNode &) override {} diff --git a/document/src/vespa/document/select/parse_utils.cpp b/document/src/vespa/document/select/parse_utils.cpp new file mode 100644 index 00000000000..ab4ce2f6d4a --- /dev/null +++ b/document/src/vespa/document/select/parse_utils.cpp @@ -0,0 +1,37 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "parse_utils.h" +#include <boost/spirit/include/qi.hpp> + +namespace document::select::util { + +namespace qi = boost::spirit::qi; + +// TODO replace use of Spirit.Qi with std::from_string when available. +// Note: these parsers are all pure, reentrant and without locking. +bool parse_hex_i64(const char* str, size_t len, int64_t& out) { + const char* iter = str; + const char* end = str + len; + // Legacy parser parses hex numbers as u64 rather than i64 (then implicitly + // converts), so we do the same thing here to avoid change of semantics. + using u64_hex_parser = qi::uint_parser<uint64_t, 16, 1, 16>; + u64_hex_parser u64_hex; + uint64_t tmp = 0; + const bool ok = qi::parse(iter, end, u64_hex, tmp); + out = static_cast<int64_t>(tmp); + return (ok && (iter == end)); +} +bool parse_i64(const char* str, size_t len, int64_t& out) { + const char* iter = str; + const char* end = str + len; + const bool ok = qi::parse(iter, end, qi::long_long, out); + return (ok && (iter == end)); +} +bool parse_double(const char* str, size_t len, double& out) { + const char* iter = str; + const char* end = str + len; + const bool ok = qi::parse(iter, end, qi::double_, out); + return (ok && (iter == end)); +} + +} diff --git a/document/src/vespa/document/select/parse_utils.h b/document/src/vespa/document/select/parse_utils.h new file mode 100644 index 00000000000..38c36dfe94a --- /dev/null +++ b/document/src/vespa/document/select/parse_utils.h @@ -0,0 +1,17 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <stdint.h> +#include <stddef.h> + +namespace document::select::util { + +// Fast, locale-independent numeric parse helpers for Flex lexing. + +// For all parse_* functions, returns true if parsing is successful. False otherwise. +// Value of `out` is undefined if return value is false. +bool parse_hex_i64(const char* str, size_t len, int64_t& out); +bool parse_i64(const char* str, size_t len, int64_t& out); +bool parse_double(const char* str, size_t len, double& out); + +}
\ No newline at end of file diff --git a/document/src/vespa/document/select/parser.cpp b/document/src/vespa/document/select/parser.cpp index ceaf0b0c438..9f015409011 100644 --- a/document/src/vespa/document/select/parser.cpp +++ b/document/src/vespa/document/select/parser.cpp @@ -1,1493 +1,33 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - #include "parser.h" -#include "branch.h" -#include "compare.h" -#include "constant.h" -#include "operator.h" -#include "doctype.h" -#include "valuenode.h" -#include "simpleparser.h" - -#include <vespa/document/repo/documenttyperepo.h> +#include "scanner.h" #include <vespa/document/base/exceptions.h> #include <vespa/document/util/stringutil.h> -#include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/locale/c.h> -#include <boost/spirit/include/classic_chset.hpp> -#include <boost/spirit/include/classic_core.hpp> -#include <boost/spirit/include/classic_escape_char.hpp> -#include <boost/spirit/include/classic_grammar_def.hpp> -#include <boost/spirit/include/classic_parse_tree.hpp> -#include <boost/spirit/include/classic_tree_to_xml.hpp> -#include <iostream> -#include <map> +#include <vespa/vespalib/util/stringfmt.h> #include <sstream> -using boost::spirit::classic::tree_node; -using document::DocumentTypeRepo; -using std::unique_ptr; -using std::cerr; -using std::endl; -using std::istringstream; -using std::ostringstream; -using vespalib::IllegalStateException; - -/* - * This cannot be part of a plugin. boost contains constructs causing - * compiler to generate calls to atexit(). - */ - -#define parse_assert(a) - -namespace document { -namespace select { - -VESPA_IMPLEMENT_EXCEPTION(ParsingFailedException, vespalib::Exception); - -Parser::Parser(const DocumentTypeRepo& repo, - const BucketIdFactory& bucketIdFactory) - : _repo(repo), - _bucketIdFactory(bucketIdFactory) -{ -} - -namespace { - -/** - * Defines the grammar for the document selection text format. - */ -struct DocSelectionGrammar - : public boost::spirit::classic::grammar<DocSelectionGrammar> -{ - /** Node identifiers (value 0 should not be used) */ - enum ids { id_nil=1, id_bool, id_number, id_string, - id_doctype, id_fieldname, id_function, id_idarg, id_searchcolumnarg, - id_operator, id_idspec, id_searchcolumnspec, id_fieldspec, id_value, - id_valuefuncadd, id_valuefuncmul, id_valuefuncmod, - id_valuegroup, id_arithmvalue, - id_comparison, id_leaf, id_not, id_and, - id_or, id_group, id_order, id_expression, id_variable }; - - const DocumentTypeRepo &_repo; - const BucketIdFactory& _bucketIdFactory; - - DocSelectionGrammar(const DocumentTypeRepo& repo, - const BucketIdFactory& bucketIdFactory) - : _repo(repo), - _bucketIdFactory(bucketIdFactory) {} - - const BucketIdFactory& getBucketIdFactory() const - { return _bucketIdFactory; } - - /** Grammar base types. To be able to retrieve different grammars. */ - template <typename Scanner> - struct gram_base { - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_nil> > rule_nil; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_bool> > rule_bool; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_number> > rule_number; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_string> > rule_string; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_doctype> > rule_doctype; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_fieldname> > rule_fieldname; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_function> > rule_function; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_idarg> > rule_idarg; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_searchcolumnarg> > rule_searchcolumnarg; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_operator> > rule_operator; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_idspec> > rule_idspec; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_searchcolumnspec> > rule_searchcolumnspec; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_fieldspec> > rule_fieldspec; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_value> > rule_value; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_valuefuncadd> > rule_valuefuncadd; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_valuefuncmul> > rule_valuefuncmul; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_valuefuncmod> > rule_valuefuncmod; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_valuegroup> > rule_valuegroup; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_arithmvalue> > rule_arithmvalue; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_comparison> > rule_comparison; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_leaf> > rule_leaf; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_not> > rule_not; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_and> > rule_and; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_or> > rule_or; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_group> > rule_group; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_order> > rule_order; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_expression> > rule_expression; - typedef typename boost::spirit::classic::rule<Scanner, - boost::spirit::classic::parser_tag<id_variable> > rule_variable; - typedef boost::spirit::classic::grammar_def<rule_expression, - rule_leaf, - rule_arithmvalue> type; - }; - - template <typename Scanner> - struct definition : gram_base<Scanner>::type - { - typename gram_base<Scanner>::rule_nil _nil; - typename gram_base<Scanner>::rule_bool _bool; - typename gram_base<Scanner>::rule_number _number; - typename gram_base<Scanner>::rule_string _string; - typename gram_base<Scanner>::rule_doctype _doctype; - typename gram_base<Scanner>::rule_fieldname _fieldname; - typename gram_base<Scanner>::rule_function _function; - typename gram_base<Scanner>::rule_idarg _idarg; - typename gram_base<Scanner>::rule_searchcolumnarg _searchcolumnarg; - typename gram_base<Scanner>::rule_operator _operator; - typename gram_base<Scanner>::rule_idspec _idspec; - typename gram_base<Scanner>::rule_searchcolumnspec _searchcolumnspec; - typename gram_base<Scanner>::rule_fieldspec _fieldspec; - typename gram_base<Scanner>::rule_value _value; - typename gram_base<Scanner>::rule_valuefuncadd _valuefuncadd; - typename gram_base<Scanner>::rule_valuefuncmul _valuefuncmul; - typename gram_base<Scanner>::rule_valuefuncmod _valuefuncmod; - typename gram_base<Scanner>::rule_valuegroup _valuegroup; - typename gram_base<Scanner>::rule_arithmvalue _arithmvalue; - typename gram_base<Scanner>::rule_comparison _comparison; - typename gram_base<Scanner>::rule_leaf _leaf; - typename gram_base<Scanner>::rule_not _not; - typename gram_base<Scanner>::rule_and _and; - typename gram_base<Scanner>::rule_or _or; - typename gram_base<Scanner>::rule_group _group; - typename gram_base<Scanner>::rule_order _order; - typename gram_base<Scanner>::rule_expression _expression; - typename gram_base<Scanner>::rule_variable _variable; +namespace document::select { - definition(const DocSelectionGrammar&) - : _nil(), - _bool(), - _number(), - _string(), - _doctype(), - _fieldname(), - _function(), - _idarg(), - _operator(), - _idspec(), - _searchcolumnspec(), - _fieldspec(), - _value(), - _valuefuncadd(), - _valuefuncmul(), - _valuefuncmod(), - _valuegroup(), - _arithmvalue(), - _comparison(), - _leaf(), - _not(), - _and(), - _or(), - _group(), - _order(), - _expression(), - _variable() - { - using namespace boost::spirit::classic; +std::unique_ptr<Node> Parser::parse(const std::string& str) const { + try { + std::istringstream ss(str); + DocSelScanner scanner(&ss); - boost::spirit::classic::uint_parser<uint64_t, 16, 1, -1> hexvalue; - - // Initialize primitives - _nil = lexeme_d[ as_lower_d["null"] ]; - _bool = lexeme_d[ as_lower_d["true"] | as_lower_d["false"] ]; - _number = lexeme_d[ str_p("0x") >> hexvalue ] | lexeme_d[ real_p ]; - _string = ( lexeme_d[ - ( no_node_d[ ch_p('"') ] >> - token_node_d[ *( ~chset<>("\\\"\x00-\x1f\x7f-\xff") | - ( '\\' >> ( ch_p('\\') | 't' | 'n' | 'f' | 'r' | '"' | - (ch_p('x') >> xdigit_p >> xdigit_p) ) ) ) ] >> - no_node_d[ ch_p('"') ] ) | - ( no_node_d[ ch_p('\'') ] >> - token_node_d[ *( ~chset<>("\\'\x00-\x1f\x7f-\xff") | - ( '\\' >> ( ch_p('\\') | 't' | 'n' | 'f' | 'r' | '\'' | - (ch_p('x') >> xdigit_p >> xdigit_p) ) ) ) ] >> - no_node_d[ ch_p('\'') ] ) - ] ); - _doctype = lexeme_d[ token_node_d[ chset<>("_A-Za-z") - >> *(chset<>("_A-Za-z0-9")) ]]; - _fieldname = lexeme_d[ token_node_d[chset<>("_A-Za-z") - >> *(chset<>("_A-Za-z0-9{}[]$")) - ]]; - _function = lexeme_d[ token_node_d[ chset<>("A-Za-z") - >> *(chset<>("A-Za-z0-9")) ] - >> no_node_d[ str_p("()") ] ]; - - _order = as_lower_d["order"] - >> no_node_d[ ch_p('(') ] - >> _number - >> no_node_d[ ch_p(',') ] - >> _number - >> no_node_d[ ch_p(')') ]; - - _idarg = (as_lower_d[ "scheme"] | as_lower_d[ "namespace"] | - as_lower_d[ "specific" ] | as_lower_d[ "user" ] | - as_lower_d[ "group" ] | as_lower_d[ "bucket" ] | - as_lower_d[ "gid" ] | as_lower_d["type"] | _order); - - _searchcolumnarg = lexeme_d[ token_node_d[ *(chset<>("_A-Za-z0-9")) ]]; - _operator = (str_p(">=") | ">" | "==" | "=~" | "=" - | "<=" | "<" | "!="); - // Derived - _idspec = as_lower_d["id"] - >> !(no_node_d[ ch_p('.') ] >> _idarg); - _searchcolumnspec = as_lower_d["searchcolumn"] - >> !(no_node_d[ ch_p('.') ] >> _searchcolumnarg); - _fieldspec = _doctype - >> +( no_node_d[ ch_p('.') ] >> (_function | _fieldname)); - _variable = lexeme_d[ token_node_d[chset<>("$") - >> *(chset<>("A-Za-z0-9")) - ]]; - _value = (_valuegroup | _function | _nil | _number | _string - | _idspec | _searchcolumnspec | _fieldspec | _variable) - >> *(no_node_d[ ch_p('.') ] >> _function); - _valuefuncmod = (_valuegroup | _value) - >> +( ch_p('%') - >> (_valuegroup | _value) ); - _valuefuncmul = (_valuefuncmod | _valuegroup | _value) - >> +( (ch_p('*') | ch_p('/')) - >> (_valuefuncmod | _valuegroup | _value)); - _valuefuncadd - = (_valuefuncmul | _valuefuncmod | _valuegroup | _value) - >> +((ch_p('+') | ch_p('-')) - >> (_valuefuncmul | _valuefuncmod | _valuegroup | - _value)); - _valuegroup = no_node_d[ ch_p('(') ] >> _arithmvalue - >> no_node_d[ ch_p(')') ] - >> *(no_node_d[ ch_p('.') ] >> _function); - _arithmvalue = (_valuefuncadd | _valuefuncmul | _valuefuncmod - | _valuegroup | _value); - _comparison = _arithmvalue >> _operator >> _arithmvalue; - _leaf = _bool | _comparison | _fieldspec | _doctype; - - _not = (as_lower_d["not"] >> _group) - | (lexeme_d[ as_lower_d["not"] >> no_node_d[ space_p ] ] >> _leaf); - _and = (_not | _group | _leaf) - >> as_lower_d["and"] >> (_and | _not | _group | _leaf); - _or = (_and | _not | _group | _leaf) - >> as_lower_d["or"] >> (_or | _and | _not | _group | _leaf); - _group = no_node_d[ ch_p('(') ] - >> (_or | _and | _not | _group | _leaf) - >> no_node_d[ ch_p(')') ]; - - _expression = !(_or | _and | _not | _group | _leaf) >> end_p; - - this->start_parsers(_expression, _leaf, _arithmvalue); + std::unique_ptr<Node> root; + DocSelParser parser(scanner, _bucket_id_factory, _doc_type_repo, root); + if (parser.parse() != 0) { + throw ParsingFailedException( + vespalib::make_string("Unknown parse failure while parsing selection '%s'", str.c_str()), + VESPA_STRLOC); } - }; - -}; - -template<typename T> -std::unique_ptr<Node> -parseTree(DocSelectionGrammar& grammar, tree_node<T>& root) { - return parseNode(grammar, root); -} - -template<typename T> -std::unique_ptr<Node> -parseNode(DocSelectionGrammar& grammar, tree_node<T>& node) { - switch (node.value.id().to_long()) { - case DocSelectionGrammar::id_or: - return parseOr(grammar, node); - case DocSelectionGrammar::id_and: - return parseAnd(grammar, node); - case DocSelectionGrammar::id_not: - return parseNot(grammar, node); - case DocSelectionGrammar::id_group: - { - std::unique_ptr<Node> n(parseNode(grammar, node.children[0])); - n->setParentheses(); - return n; - } - case DocSelectionGrammar::id_leaf: - case DocSelectionGrammar::id_value: - parse_assert(node.children.size() == 1); - return parseNode(grammar, node.children[0]); - case DocSelectionGrammar::id_expression: - if (node.children.size() == 1) { - return parseNode(grammar, node.children[0]); - } - parse_assert(node.children.size() == 0); - return std::unique_ptr<Node>(new Constant("true")); - case DocSelectionGrammar::id_bool: - return parseBool(grammar, node); - case DocSelectionGrammar::id_comparison: - return parseComparison(grammar, node); - case DocSelectionGrammar::id_fieldspec: - return parseFieldSpec(grammar, node); - case DocSelectionGrammar::id_doctype: - return parseDocType(grammar, node); - } - vespalib::asciistream ost; - ost << "Received unhandled nodetype " - << node.value.id().to_long() << " in parseNode()\n"; - throw IllegalStateException(ost.str(), VESPA_STRLOC); -} - -template<typename T> -std::unique_ptr<Node> -parseOr(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_or); - parse_assert(node.children.size() == 3); - vespalib::string op(node.children[1].value.begin(), - node.children[1].value.end()); - return std::unique_ptr<Node>(new Or( - parseNode(grammar, node.children[0]), - parseNode(grammar, node.children[2]), - op.c_str())); -} - -template<typename T> -std::unique_ptr<Node> -parseAnd(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_and); - parse_assert(node.children.size() == 3); - vespalib::string op(node.children[1].value.begin(), - node.children[1].value.end()); - return std::unique_ptr<Node>(new And( - parseNode(grammar, node.children[0]), - parseNode(grammar, node.children[2]), - op.c_str())); -} - -template<typename T> -std::unique_ptr<Node> -parseNot(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_not); - parse_assert(node.children.size() == 2); - vespalib::string op(node.children[0].value.begin(), - node.children[0].value.end()); - return std::unique_ptr<Node>(new Not( - parseNode(grammar, node.children[1]), op.c_str())); -} - -template<typename T> -std::unique_ptr<Node> -parseBool(DocSelectionGrammar& grammar, tree_node<T>& node) { - (void) grammar; - parse_assert(node.value.id().to_long() == grammar.id_bool); - parse_assert(node.children.size() == 1); - parse_assert(node.children[0].value.id().to_long() == grammar.id_bool); - parse_assert(node.children[0].children.size() == 0); - vespalib::string s(node.children[0].value.begin(), node.children[0].value.end()); - return std::unique_ptr<Node>(new Constant(s)); -} - -template<typename T> -std::unique_ptr<Node> -parseComparison(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_comparison); - parse_assert(node.children.size() == 3); - parse_assert(node.children[1].children.size() == 1); - vespalib::string op(node.children[1].children[0].value.begin(), - node.children[1].children[0].value.end()); - return std::unique_ptr<Node>(new Compare( - parseArithmValue(grammar, node.children[0]), - Operator::get(op), - parseArithmValue(grammar, node.children[2]), - grammar.getBucketIdFactory())); -} - -template<typename T> -std::unique_ptr<Node> -parseFieldSpec(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_fieldspec); - return std::unique_ptr<Node>(new Compare( - parseFieldSpecValue(grammar, node), - Operator::get("!="), - std::unique_ptr<ValueNode>(new NullValueNode("null")), - grammar.getBucketIdFactory())); -} - -template<typename T> -std::unique_ptr<ValueNode> -parseVariable(DocSelectionGrammar& grammar, tree_node<T>& node) { - (void) grammar; - parse_assert(node.value.id().to_long() == grammar.id_variable); - vespalib::string varName(node.children[0].value.begin(), - node.children[0].value.end()); - return std::unique_ptr<ValueNode>(new VariableValueNode(varName.substr(1))); -} - -template<typename T> -std::unique_ptr<ValueNode> -parseGlobValueFunction(DocSelectionGrammar& grammar, tree_node<T>& node) { - (void) grammar; - parse_assert(node.value.id().to_long() == grammar.id_function); - vespalib::string varName(node.children[0].value.begin(), - node.children[0].value.end()); - if (varName == "now") { - return std::unique_ptr<ValueNode>(new CurrentTimeValueNode); + return root; + } catch (const DocSelParser::syntax_error& err) { + throw ParsingFailedException( + vespalib::make_string("%s at column %u when parsing selection '%s'", + err.what(), err.location.begin.column, str.c_str()), + VESPA_STRLOC); } - throw ParsingFailedException("Unexpected function name '" + varName - + "' found.", VESPA_STRLOC); } -template<typename T> -std::unique_ptr<Node> -parseDocType(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_doctype); - parse_assert(node.children.size() == 1); - parse_assert(node.children[0].value.id().to_long() == grammar.id_doctype); - parse_assert(node.children[0].children.size() == 0); - vespalib::string doctype(node.children[0].value.begin(), - node.children[0].value.end()); - // Verify existance of any version of document - if (!grammar._repo.getDocumentType(doctype)) { - throw ParsingFailedException("Document type " + doctype + " not found", - VESPA_STRLOC); - } - return std::unique_ptr<Node>(new DocType(doctype)); -} - -template<typename T> -std::unique_ptr<ValueNode> -addFunctions(DocSelectionGrammar& grammar, tree_node<T>& node, - std::unique_ptr<ValueNode> src, uint32_t index) -{ - (void) grammar; - while (index < node.children.size()) { - parse_assert(node.children[index].value.id().to_long() - == grammar.id_function); - vespalib::string func(node.children[index].children[0].value.begin(), - node.children[index].children[0].value.end()); - std::unique_ptr<ValueNode> fnode(new FunctionValueNode(func, std::move(src))); - src = std::move(fnode); - ++index; - } - return std::move(src); -} - -template<typename T> -std::unique_ptr<ValueNode> -parseArithmValue(DocSelectionGrammar& grammar, tree_node<T>& node) { - switch (node.value.id().to_long()) { - case DocSelectionGrammar::id_arithmvalue: - parse_assert(node.children.size() == 1); - return parseArithmValue(grammar, node.children[0]); - case DocSelectionGrammar::id_value: - return parseValue(grammar, node); - case DocSelectionGrammar::id_valuegroup: - return parseValueGroup(grammar, node); - case DocSelectionGrammar::id_valuefuncadd: - case DocSelectionGrammar::id_valuefuncmul: - case DocSelectionGrammar::id_valuefuncmod: - return parseValueArithmetics(grammar, node); - } - vespalib::asciistream ost; - ost << "Received unhandled nodetype " - << node.value.id().to_long() - << " in parseArithmValue()\n"; - throw IllegalStateException(ost.str(), VESPA_STRLOC); -} - -template<typename T> -std::unique_ptr<ValueNode> -parseValueArithmetics(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.children.size() >= 3 && node.children.size() % 2 == 1); - std::unique_ptr<ValueNode> lhs(parseArithmValue(grammar, node.children[0])); - for (unsigned int i = 1; i < node.children.size(); i += 2) { - vespalib::string op(node.children[i].value.begin(), - node.children[i].value.end()); - std::unique_ptr<ValueNode> rhs(parseArithmValue(grammar, - node.children[i + 1])); - std::unique_ptr<ValueNode> res( - new ArithmeticValueNode(std::move(lhs), op, std::move(rhs))); - lhs = std::move(res); - } - return lhs; -} - -template<typename T> -std::unique_ptr<ValueNode> -parseValueGroup(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_valuegroup); - parse_assert(node.children.size() >= 1); - std::unique_ptr<ValueNode> result( - parseArithmValue(grammar, node.children[0])); - result->setParentheses(); - return addFunctions(grammar, node, std::move(result), 1); -} - -template<typename T> -std::unique_ptr<ValueNode> -parseValue(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_value); - parse_assert(node.children.size() >= 1); - std::unique_ptr<ValueNode> result; - switch (node.children[0].value.id().to_long()) { - case DocSelectionGrammar::id_nil: - result = parseNilValue(grammar, node.children[0]); - break; - case DocSelectionGrammar::id_idspec: - result = parseIdSpecValue(grammar, node.children[0]); - break; - case DocSelectionGrammar::id_searchcolumnspec: - result = parseSearchColumnSpecValue(grammar, node.children[0]); - break; - case DocSelectionGrammar::id_fieldspec: - result = parseFieldSpecValue(grammar, node.children[0]); - break; - case DocSelectionGrammar::id_number: - result = parseNumberValue(grammar, node.children[0]); - break; - case DocSelectionGrammar::id_string: - result = parseStringValue(grammar, node.children[0]); - break; - case DocSelectionGrammar::id_valuegroup: - result = parseValueGroup(grammar, node.children[0]); - break; - case DocSelectionGrammar::id_variable: - result = parseVariable(grammar, node.children[0]); - break; - case DocSelectionGrammar::id_function: - result = parseGlobValueFunction(grammar, node.children[0]); - break; - default: - vespalib::asciistream ost; - ost << "Received unhandled nodetype " - << node.children[0].value.id().to_long() - << " in parseValue(), from node of type " - << node.value.id().to_long() << "\n"; - throw IllegalStateException(ost.str(), VESPA_STRLOC); - } - return addFunctions(grammar, node, std::move(result), 1); -} - -template<typename T> -std::unique_ptr<ValueNode> -parseNilValue(DocSelectionGrammar& grammar, tree_node<T>& node) { - (void) grammar; - parse_assert(node.value.id().to_long() == grammar.id_nil); - parse_assert(node.children.size() == 1); - parse_assert(node.children[0].children.size() == 0); - vespalib::string op(node.children[0].value.begin(), - node.children[0].value.end()); - return std::unique_ptr<ValueNode>(new NullValueNode(op)); -} - -template<typename T> -std::unique_ptr<ValueNode> -parseIdSpecValue(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_idspec); - parse_assert(node.children.size() >= 1); - parse_assert(node.children[0].children.size() == 0); - vespalib::string id(node.children[0].value.begin(), - node.children[0].value.end()); - if (node.children.size() == 1) { - return std::unique_ptr<ValueNode>( - new IdValueNode(grammar.getBucketIdFactory(), id, "")); - } - - vespalib::string type; - - int widthBits = -1; - int divisionBits = -1; - - if (node.children[1].children[0].value.id().to_long() == grammar.id_order) { - tree_node<T>& ordernode(node.children[1].children[0]); - type = vespalib::string(ordernode.children[0].value.begin(), - ordernode.children[0].value.end()); - - vespalib::string val = vespalib::string( - ordernode.children[1].children[0].value.begin(), - ordernode.children[1].children[0].value.end()); - widthBits = atoi(val.c_str()); - - val = vespalib::string(ordernode.children[2].children[0].value.begin(), - ordernode.children[2].children[0].value.end()); - divisionBits = atoi(val.c_str()); - } else { - type = vespalib::string(node.children[1].children[0].value.begin(), - node.children[1].children[0].value.end()); - } - - return std::unique_ptr<ValueNode>( - new IdValueNode(grammar.getBucketIdFactory(), id, type, - widthBits, divisionBits)); -} - -template<typename T> -std::unique_ptr<ValueNode> -parseSearchColumnSpecValue(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_searchcolumnspec); - parse_assert(node.children.size() == 2); - parse_assert(node.children[0].children.size() == 0); - parse_assert(node.children[1].value.id().to_long() == grammar.id_searchcolumnarg); - - vespalib::string id(node.children[0].value.begin(), - node.children[0].value.end()); - parse_assert(node.children.size() == 2); - - vespalib::string val = vespalib::string(node.children[1].children[0].value.begin(), - node.children[1].children[0].value.end()); - return std::unique_ptr<ValueNode>(new SearchColumnValueNode( - grammar.getBucketIdFactory(), id, atoi(val.c_str()))); -} - -template<typename T> -std::unique_ptr<ValueNode> -parseFieldSpecValue(DocSelectionGrammar& grammar, tree_node<T>& node) { - parse_assert(node.value.id().to_long() == grammar.id_fieldspec); - parse_assert(node.children.size() >= 2); - parse_assert(node.children[0].value.id().to_long() == grammar.id_doctype); - vespalib::string doctype(node.children[0].children[0].value.begin(), - node.children[0].children[0].value.end()); - // Verify that document type exist at any version - if (!grammar._repo.getDocumentType(doctype)) { - throw ParsingFailedException("Document type " + doctype + " not found", - VESPA_STRLOC); - } - std::unique_ptr<ValueNode> value; - uint32_t iterator = 2; - - parse_assert(node.children[1].value.id().to_long() == grammar.id_fieldname); - vespalib::string field(node.children[1].children[0].value.begin(), - node.children[1].children[0].value.end()); - while (iterator < node.children.size() - && node.children[iterator].value.id().to_long() == grammar.id_fieldname) - { - field += "." + vespalib::string( - node.children[iterator].children[0].value.begin(), - node.children[iterator].children[0].value.end()); - ++iterator; - } - value.reset(new FieldValueNode(doctype, field)); - - for (; iterator<node.children.size(); ++iterator) { - std::unique_ptr<ValueNode> child(std::move(value)); - vespalib::string function(node.children[iterator].children[0].value.begin(), - node.children[iterator].children[0].value.end()); - parse_assert(node.children[iterator].value.id().to_long() == grammar.id_function); - value.reset(new FunctionValueNode(function, std::move(child))); - } - return value; -} - -template<typename T> -std::unique_ptr<ValueNode> -parseNumberValue(DocSelectionGrammar& grammar, tree_node<T>& node) { - (void) grammar; - parse_assert(node.value.id().to_long() == grammar.id_number); - vespalib::string sval; - int base = 10; - if (node.children.size() == 2) { - base = 16; - sval = vespalib::string(node.children[1].value.begin(), - node.children[1].value.end()); - parse_assert(node.children[0].value.id().to_long() == grammar.id_number); - parse_assert(node.children[1].value.id().to_long() == grammar.id_number); - } else { - parse_assert(node.children.size() == 1); - sval = vespalib::string(node.children[0].value.begin(), - node.children[0].value.end()); - parse_assert(node.children[0].value.id().to_long() == grammar.id_number); - } - if (sval.find('.') != vespalib::string::npos) { - char* endptr; - double val = vespalib::locale::c::strtod(sval.c_str(), &endptr); - if (*endptr == '\0') { - return std::unique_ptr<ValueNode>(new FloatValueNode(val)); - } - } else { - char* endptr; - int64_t val; - if (base == 16) { - val = strtoull(sval.c_str(), &endptr, base); - } else { - val = strtoll(sval.c_str(), &endptr, base); - } - if (*endptr == '\0') { - return std::unique_ptr<ValueNode>(new IntegerValueNode(val, false)); - } - } - vespalib::string error = "'" + sval + "' is not a valid number."; - throw ParsingFailedException(error, VESPA_STRLOC); -} - -template<typename T> -std::unique_ptr<ValueNode> -parseStringValue(DocSelectionGrammar& grammar, tree_node<T>& node) { - (void) grammar; - parse_assert(node.value.id().to_long() == grammar.id_string); - if (node.children.size() == 0) { - return std::unique_ptr<ValueNode>(new StringValueNode("")); - } - parse_assert(node.children.size() == 1); - parse_assert(node.children[0].value.id().to_long() == grammar.id_string); - vespalib::string val(node.children[0].value.begin(), - node.children[0].value.end()); - return std::unique_ptr<ValueNode>(new StringValueNode(StringUtil::unescape(val))); -} - -template<typename Tree> -void printSpiritTree(std::ostream& out, Tree tree, const vespalib::string& query, - const DocSelectionGrammar& grammar) { - using boost::spirit::classic::parser_id; - - std::map<parser_id, vespalib::string> names; - names[parser_id(grammar.id_bool)] = "bool"; - names[parser_id(grammar.id_number)] = "number"; - names[parser_id(grammar.id_string)] = "string"; - names[parser_id(grammar.id_doctype)] = "doctype"; - names[parser_id(grammar.id_fieldname)] = "fieldname"; - names[parser_id(grammar.id_function)] = "function"; - names[parser_id(grammar.id_idarg)] = "idarg"; - names[parser_id(grammar.id_searchcolumnarg)] = "searchcolumnarg"; - names[parser_id(grammar.id_operator)] = "operator"; - names[parser_id(grammar.id_idspec)] = "idspec"; - names[parser_id(grammar.id_searchcolumnspec)] = "searchcolumnspec"; - names[parser_id(grammar.id_fieldspec)] = "fieldspec"; - names[parser_id(grammar.id_value)] = "value"; - names[parser_id(grammar.id_valuefuncadd)] = "valuefuncadd"; - names[parser_id(grammar.id_valuefuncmul)] = "valuefuncmul"; - names[parser_id(grammar.id_valuefuncmod)] = "valuefuncmod"; - names[parser_id(grammar.id_valuegroup)] = "valuegroup"; - names[parser_id(grammar.id_arithmvalue)] = "arithmvalue"; - names[parser_id(grammar.id_comparison)] = "comparison"; - names[parser_id(grammar.id_leaf)] = "leaf"; - names[parser_id(grammar.id_not)] = "not"; - names[parser_id(grammar.id_and)] = "and"; - names[parser_id(grammar.id_or)] = "or"; - names[parser_id(grammar.id_group)] = "group"; - names[parser_id(grammar.id_expression)] = "expression"; - tree_to_xml(out, tree, query.c_str(), names); -} - -template<typename Parser> -bool testExpr(const DocumentTypeRepo& repo, - const BucketIdFactory& factory, - const vespalib::string& expression, const Parser& parser, - const vespalib::string& result) -{ - //std::cerr << "Testing expression '" << expression << "'.\n"; - using boost::spirit::classic::space_p; - - DocSelectionGrammar grammar(repo, factory); - boost::spirit::classic::tree_parse_info<> info; - info = pt_parse(expression.c_str(), parser, - space_p); - std::ostringstream ost; - printSpiritTree(ost, info.trees, expression, grammar); - if (!info.full) { - cerr << "Expression '" << expression - << "' wasn't completely parsed\n" - << ost.str() << "\n"; - return false; - } - vespalib::string httpexpr = expression; - vespalib::string::size_type index; - while ((index = httpexpr.find('>')) != vespalib::string::npos) { - httpexpr = httpexpr.substr(0,index) + ">" - + httpexpr.substr(index+1); - } - vespalib::string fullresult = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n" - "<!DOCTYPE parsetree SYSTEM \"parsetree.dtd\">\n" - "<!-- " + httpexpr + " -->\n" + result; - //if (ost.str() != fullresult) { - if (fullresult != ost.str()) { - cerr << "Parsing expression '" << expression << "', expected\n" - << fullresult << "\nbut got\n" << ost.str() << "\n"; - return false; - } - return true; -} - -bool test(const DocumentTypeRepo& repo, - const BucketIdFactory& bucketIdFactory) -{ - //std::cerr << "\n\nTESTING DOCUMENT SELECT PARSER\n\n"; - DocSelectionGrammar grammar(repo, bucketIdFactory); - - using boost::spirit::classic::space_p; - - // Parser two is the arithmvalue.. - // idspec, fieldspec, number & stringval, + - * / % () - testExpr(repo, bucketIdFactory, "3.14", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>3.14</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "-999", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>-999</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "15e4", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>15e4</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "3.4e-4", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>3.4e-4</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "\" Test \"", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"string\">\n" - " <parsenode rule=\"string\">\n" - " <value> Test </value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "id", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"idspec\">\n" - " <parsenode rule=\"idspec\">\n" - " <value>id</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "id.namespace", - grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"idspec\">\n" - " <parsenode rule=\"idspec\">\n" - " <value>id</value>\n" - " </parsenode>\n" - " <parsenode rule=\"idarg\">\n" - " <parsenode rule=\"idarg\">\n" - " <value>namespace</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "id.hash()", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"idspec\">\n" - " <parsenode rule=\"idspec\">\n" - " <value>id</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"function\">\n" - " <parsenode rule=\"function\">\n" - " <value>hash</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, - "id.namespace.hash()", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"idspec\">\n" - " <parsenode rule=\"idspec\">\n" - " <value>id</value>\n" - " </parsenode>\n" - " <parsenode rule=\"idarg\">\n" - " <parsenode rule=\"idarg\">\n" - " <value>namespace</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"function\">\n" - " <parsenode rule=\"function\">\n" - " <value>hash</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, - "music.artist", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"fieldspec\">\n" - " <parsenode rule=\"doctype\">\n" - " <parsenode rule=\"doctype\">\n" - " <value>music</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"fieldname\">\n" - " <parsenode rule=\"fieldname\">\n" - " <value>artist</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, - "music.artist.lowercase()", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"fieldspec\">\n" - " <parsenode rule=\"doctype\">\n" - " <parsenode rule=\"doctype\">\n" - " <value>music</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"fieldname\">\n" - " <parsenode rule=\"fieldname\">\n" - " <value>artist</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"function\">\n" - " <parsenode rule=\"function\">\n" - " <value>lowercase</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "(43)", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"valuegroup\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>43</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, - "1 + 2 * 3 - 10 % 2 / 3", grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"valuefuncadd\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>1</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncadd\">\n" - " <value>+</value>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncmul\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>2</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncmul\">\n" - " <value>*</value>\n" - " </parsenode>\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>3</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncadd\">\n" - " <value>-</value>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncmul\">\n" - " <parsenode rule=\"valuefuncmod\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>10</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncmod\">\n" - " <value>%</value>\n" - " </parsenode>\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>2</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncmul\">\n" - " <value>/</value>\n" - " </parsenode>\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>3</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "(43 + 14) / 34", - grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"valuefuncmul\">\n" - " <parsenode rule=\"valuegroup\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"valuefuncadd\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>43</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncadd\">\n" - " <value>+</value>\n" - " </parsenode>\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>14</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncmul\">\n" - " <value>/</value>\n" - " </parsenode>\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>34</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "34 * (3 - 1) % 4", - grammar.use_parser<2>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"valuefuncmul\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>34</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncmul\">\n" - " <value>*</value>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncmod\">\n" - " <parsenode rule=\"valuegroup\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"valuefuncadd\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>3</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncadd\">\n" - " <value>-</value>\n" - " </parsenode>\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>1</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"valuefuncmod\">\n" - " <value>%</value>\n" - " </parsenode>\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>4</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - - // Parser 1 is a leaf. bool, comparison, fieldspec, doctype - testExpr(repo, bucketIdFactory, "true", grammar.use_parser<1>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"bool\">\n" - " <parsenode rule=\"bool\">\n" - " <value>true</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "false", grammar.use_parser<1>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"bool\">\n" - " <parsenode rule=\"bool\">\n" - " <value>false</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "music.test", grammar.use_parser<1>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"fieldspec\">\n" - " <parsenode rule=\"doctype\">\n" - " <parsenode rule=\"doctype\">\n" - " <value>music</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"fieldname\">\n" - " <parsenode rule=\"fieldname\">\n" - " <value>test</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, "music", grammar.use_parser<1>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"doctype\">\n" - " <parsenode rule=\"doctype\">\n" - " <value>music</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, - "music.artist = \"*john*\"", grammar.use_parser<1>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"comparison\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"fieldspec\">\n" - " <parsenode rule=\"doctype\">\n" - " <parsenode rule=\"doctype\">\n" - " <value>music</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"fieldname\">\n" - " <parsenode rule=\"fieldname\">\n" - " <value>artist</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"operator\">\n" - " <parsenode rule=\"operator\">\n" - " <value>=</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"string\">\n" - " <parsenode rule=\"string\">\n" - " <value>*john*</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, - "music.length >= 180", grammar.use_parser<1>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"comparison\">\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"fieldspec\">\n" - " <parsenode rule=\"doctype\">\n" - " <parsenode rule=\"doctype\">\n" - " <value>music</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"fieldname\">\n" - " <parsenode rule=\"fieldname\">\n" - " <value>length</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"operator\">\n" - " <parsenode rule=\"operator\">\n" - " <value>>=</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"arithmvalue\">\n" - " <parsenode rule=\"value\">\n" - " <parsenode rule=\"number\">\n" - " <parsenode rule=\"number\">\n" - " <value>180</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - - // Parser 0 - The whole expression - testExpr(repo, bucketIdFactory, - "true oR nOt false And true", grammar.use_parser<0>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"expression\">\n" - " <parsenode rule=\"or\">\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"bool\">\n" - " <parsenode rule=\"bool\">\n" - " <value>true</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"or\">\n" - " <value>oR</value>\n" - " </parsenode>\n" - " <parsenode rule=\"and\">\n" - " <parsenode rule=\"not\">\n" - " <parsenode rule=\"not\">\n" - " <value>nOt</value>\n" - " </parsenode>\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"bool\">\n" - " <parsenode rule=\"bool\">\n" - " <value>false</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"and\">\n" - " <value>And</value>\n" - " </parsenode>\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"bool\">\n" - " <parsenode rule=\"bool\">\n" - " <value>true</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, - "(true oR false) aNd true", grammar.use_parser<0>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"expression\">\n" - " <parsenode rule=\"and\">\n" - " <parsenode rule=\"group\">\n" - " <parsenode rule=\"or\">\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"bool\">\n" - " <parsenode rule=\"bool\">\n" - " <value>true</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"or\">\n" - " <value>oR</value>\n" - " </parsenode>\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"bool\">\n" - " <parsenode rule=\"bool\">\n" - " <value>false</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"and\">\n" - " <value>aNd</value>\n" - " </parsenode>\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"bool\">\n" - " <parsenode rule=\"bool\">\n" - " <value>true</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - testExpr(repo, bucketIdFactory, - "iddoc or not(notand and ornot)", grammar.use_parser<0>(), - "<parsetree version=\"1.0\">\n" - " <parsenode rule=\"expression\">\n" - " <parsenode rule=\"or\">\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"doctype\">\n" - " <parsenode rule=\"doctype\">\n" - " <value>iddoc</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"or\">\n" - " <value>or</value>\n" - " </parsenode>\n" - " <parsenode rule=\"not\">\n" - " <parsenode rule=\"not\">\n" - " <value>not</value>\n" - " </parsenode>\n" - " <parsenode rule=\"group\">\n" - " <parsenode rule=\"and\">\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"doctype\">\n" - " <parsenode rule=\"doctype\">\n" - " <value>notand</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " <parsenode rule=\"and\">\n" - " <value>and</value>\n" - " </parsenode>\n" - " <parsenode rule=\"leaf\">\n" - " <parsenode rule=\"doctype\">\n" - " <parsenode rule=\"doctype\">\n" - " <value>ornot</value>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - " </parsenode>\n" - "</parsetree>\n"); - return true; -} - -} - -vespalib::Lock Parser::_G_parseLock; - -unique_ptr<Node> Parser::parse(const vespalib::stringref & s) -{ - - simple::SelectionParser simple(_bucketIdFactory); - if (simple.parse(s) && simple.getRemaining().empty()) { - Node::UP tmp(simple.getNode()); - assert(tmp.get() != NULL); - return tmp; - } else { - return fullParse(s); - } -} - -unique_ptr<Node> Parser::fullParse(const vespalib::stringref & s) -{ - static bool haveTested = test(_repo, _bucketIdFactory); if (haveTested) {} - try{ - vespalib::LockGuard guard(_G_parseLock); - DocSelectionGrammar grammar(_repo, _bucketIdFactory); - boost::spirit::classic::tree_parse_info<> info - = pt_parse(&s[0], &s[0]+s.size(), - grammar.use_parser<0>(), boost::spirit::classic::space_p); - if (!info.full) { - vespalib::string unexpected(info.stop); - unsigned int position = s.size() - unexpected.size(); - if (unexpected.size() > 10) { - unexpected = unexpected.substr(0,10); - } - vespalib::asciistream ost; - ost << "Unexpected token at position " << position << " ('" - << unexpected << "') in query '" << s << "',"; - throw ParsingFailedException(ost.str(), VESPA_STRLOC); - } - parse_assert(info.trees.size() == 1); - //printSpiritTree(std::cerr, info.trees, s, grammar); - return parseTree(grammar, info.trees[0]); - } catch (ParsingFailedException& e) { - throw; - } catch (vespalib::Exception& e) { - throw ParsingFailedException("Parsing failed. See cause exception.", - e, VESPA_STRLOC); - } catch (std::exception& e) { - cerr << "Parser::parse() internal error: " - << e.what() << endl; - throw; // Program will abort when this tries to go out.. - } - return unique_ptr<Node>(); } -} // select -} // document diff --git a/document/src/vespa/document/select/parser.h b/document/src/vespa/document/select/parser.h index 4df00d64bf3..35d710298c3 100644 --- a/document/src/vespa/document/select/parser.h +++ b/document/src/vespa/document/select/parser.h @@ -1,37 +1,40 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - #pragma once #include "node.h" +#include "parsing_failed_exception.h" #include <vespa/document/bucket/bucketidfactory.h> -#include <vespa/vespalib/util/exception.h> -#include <vespa/vespalib/util/sync.h> - -namespace document { -class DocumentTypeRepo; - -namespace select { - -VESPA_DEFINE_EXCEPTION(ParsingFailedException, vespalib::Exception); - +#include <vespa/document/repo/documenttyperepo.h> +#include <memory> +#include <string> + +namespace document::select { + +/** + * Document selection parser built around Flex/Bison. O(n) on input size + * and non-locking. + * + * Thread safety: same as a std::vector + */ class Parser { + const DocumentTypeRepo&_doc_type_repo; + const BucketIdFactory& _bucket_id_factory; public: - Parser(const DocumentTypeRepo&, const BucketIdFactory& bucketIdFactory); + Parser(const DocumentTypeRepo& repo, const BucketIdFactory& bucket_id_factory) + : _doc_type_repo(repo), + _bucket_id_factory(bucket_id_factory) + {} /** * Returns a newly allocated AST root node representing the selection * if parsing is successful. Otherwise, ParsingFailedException will be * thrown. + * + * Thread safe, assuming referenced DocumentTypeRepo and BucketIdFactory + * instances are immutable. */ - std::unique_ptr<Node> parse(const vespalib::stringref& s); - -private: - std::unique_ptr<Node> fullParse(const vespalib::stringref& s); - static vespalib::Lock _G_parseLock; - const DocumentTypeRepo& _repo; - const BucketIdFactory& _bucketIdFactory; + std::unique_ptr<Node> parse(const std::string& str) const; }; -} // select -} // parser +} diff --git a/document/src/vespa/document/select/parsing_failed_exception.cpp b/document/src/vespa/document/select/parsing_failed_exception.cpp new file mode 100644 index 00000000000..ce02389ed46 --- /dev/null +++ b/document/src/vespa/document/select/parsing_failed_exception.cpp @@ -0,0 +1,9 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "parsing_failed_exception.h" +#include <vespa/document/base/exceptions.h> + +namespace document::select { + +VESPA_IMPLEMENT_EXCEPTION(ParsingFailedException, vespalib::Exception); + +}
\ No newline at end of file diff --git a/document/src/vespa/document/select/parsing_failed_exception.h b/document/src/vespa/document/select/parsing_failed_exception.h new file mode 100644 index 00000000000..54138a492e8 --- /dev/null +++ b/document/src/vespa/document/select/parsing_failed_exception.h @@ -0,0 +1,10 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/vespalib/util/exception.h> + +namespace document::select { + +VESPA_DEFINE_EXCEPTION(ParsingFailedException, vespalib::Exception); + +} diff --git a/document/src/vespa/document/select/scanner.h b/document/src/vespa/document/select/scanner.h new file mode 100644 index 00000000000..5aa9ea1c8d3 --- /dev/null +++ b/document/src/vespa/document/select/scanner.h @@ -0,0 +1,21 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#if !defined(yyFlexLexerOnce) +# include <FlexLexer.h> +#endif + +#include "parser.hxx" +#include "location.hh" +#include <iosfwd> + +namespace document::select { + +class DocSelScanner final : yyFlexLexer { +public: + explicit DocSelScanner(std::istream* in) : yyFlexLexer(in) {} + ~DocSelScanner() override = default; + int yylex(DocSelParser::semantic_type* yylval, DocSelParser::location_type* yyloc); +}; + +} diff --git a/document/src/vespa/document/select/traversingvisitor.cpp b/document/src/vespa/document/select/traversingvisitor.cpp index b8f34540b29..26de6093ddf 100644 --- a/document/src/vespa/document/select/traversingvisitor.cpp +++ b/document/src/vespa/document/select/traversingvisitor.cpp @@ -73,12 +73,6 @@ TraversingVisitor::visitIdValueNode(const IdValueNode &) void -TraversingVisitor::visitSearchColumnValueNode(const SearchColumnValueNode &) -{ -} - - -void TraversingVisitor::visitFieldValueNode(const FieldValueNode &) { } diff --git a/document/src/vespa/document/select/traversingvisitor.h b/document/src/vespa/document/select/traversingvisitor.h index 43d10cfcaa2..f8b0377b102 100644 --- a/document/src/vespa/document/select/traversingvisitor.h +++ b/document/src/vespa/document/select/traversingvisitor.h @@ -21,7 +21,6 @@ public: void visitInvalidConstant(const InvalidConstant &) override; void visitDocumentType(const DocType &) override; void visitIdValueNode(const IdValueNode &) override; - void visitSearchColumnValueNode(const SearchColumnValueNode &) override; void visitFieldValueNode(const FieldValueNode &) override; void visitFloatValueNode(const FloatValueNode &) override; void visitVariableValueNode(const VariableValueNode &) override; diff --git a/document/src/vespa/document/select/valuenodes.cpp b/document/src/vespa/document/select/valuenodes.cpp index 479896f9124..837ebd873e3 100644 --- a/document/src/vespa/document/select/valuenodes.cpp +++ b/document/src/vespa/document/select/valuenodes.cpp @@ -15,7 +15,6 @@ #include <iomanip> #include <sys/time.h> - #include <vespa/log/log.h> LOG_SETUP(".document.select.valuenode"); @@ -61,10 +60,7 @@ InvalidValueNode::print(std::ostream& out, bool verbose, if (hadParentheses()) out << ')'; } -NullValueNode::NullValueNode(const vespalib::stringref & name) - : _name(name) -{ } - +NullValueNode::NullValueNode() {} void NullValueNode::visit(Visitor &visitor) const @@ -79,7 +75,7 @@ NullValueNode::print(std::ostream& out, bool verbose, { (void) verbose; (void) indent; if (hadParentheses()) out << '('; - out << _name; + out << "null"; if (hadParentheses()) out << ')'; } @@ -678,88 +674,6 @@ IdValueNode::print(std::ostream& out, bool verbose, if (hadParentheses()) out << ')'; } -SearchColumnValueNode::SearchColumnValueNode( - const BucketIdFactory& bucketIdFactory, - const vespalib::stringref & name, int numColumns) - : _bucketIdFactory(bucketIdFactory), - _id(name), - _numColumns(numColumns), - _distribution(std::make_unique<BucketDistribution>(_numColumns, 16)) -{ -} - -int64_t -SearchColumnValueNode::getValue(const BucketId& id) const -{ - return _distribution->getColumn(id); -} - - -std::unique_ptr<Value> -SearchColumnValueNode::getValue(const Context& context) const -{ - if (context._doc != NULL) { - return getValue(context._doc->getId()); - } else if (context._docId != NULL) { - return getValue(*context._docId); - } else { - return getValue(context._docUpdate->getId()); - } -} - - -std::unique_ptr<Value> -SearchColumnValueNode::getValue(const DocumentId& id) const -{ - return std::unique_ptr<Value>(new IntegerValue( - getValue(_bucketIdFactory.getBucketId(id)), false)); -} - - -std::unique_ptr<Value> -SearchColumnValueNode::traceValue(const Context& context, - std::ostream &out) const -{ - if (context._doc != NULL) { - return traceValue(context._doc->getId(), out); - } else if (context._docId != NULL) { - return traceValue(*context._docId, out); - } else { - return traceValue(context._docUpdate->getId(), out); - } -} - - -std::unique_ptr<Value> -SearchColumnValueNode::traceValue(const DocumentId& id, - std::ostream& out) const -{ - std::unique_ptr<Value> result(new IntegerValue( - getValue(_bucketIdFactory.getBucketId(id)), false)); - out << "Resolved search column of doc \"" << id << "\" to " << *result - << "\n"; - return result; -} - - -void -SearchColumnValueNode::visit(Visitor &visitor) const -{ - visitor.visitSearchColumnValueNode(*this); -} - - -void -SearchColumnValueNode::print(std::ostream& out, bool verbose, - const std::string& indent) const -{ - (void) verbose; (void) indent; - if (hadParentheses()) out << '('; - out << _id; - out << '.' << _numColumns; - if (hadParentheses()) out << ')'; -} - namespace { union HashUnion { unsigned char _key[16]; @@ -1176,5 +1090,45 @@ ArithmeticValueNode::print(std::ostream& out, bool verbose, if (hadParentheses()) out << ')'; } +FieldExprNode::~FieldExprNode() = default; + +std::unique_ptr<FieldValueNode> FieldExprNode::convert_to_field_value() const { + const auto& doctype = resolve_doctype(); + // FIXME deprecate manual post-parsing of field expressions in favor of + // actually using the structural parser in the way nature intended. + vespalib::string mangled_expression; + build_mangled_expression(mangled_expression); + return std::make_unique<FieldValueNode>(doctype, mangled_expression); +} + +std::unique_ptr<FunctionValueNode> FieldExprNode::convert_to_function_call() const { + // Right hand expr string contains function call, lhs contains field spec on which + // the function is to be invoked. + if ((_left_expr == nullptr) || (_left_expr->_left_expr == nullptr)) { + throw vespalib::IllegalArgumentException( + vespalib::make_string("Cannot call function '%s' directly on document type", _right_expr.c_str())); + } + auto lhs = _left_expr->convert_to_field_value(); + const auto& function_name = _right_expr; + return std::make_unique<FunctionValueNode>(function_name, std::move(lhs)); +} + +void FieldExprNode::build_mangled_expression(vespalib::string& dest) const { + // Leftmost node is doctype, which should not be emitted as part of mangled expression. + if (_left_expr && _left_expr->_left_expr) { + _left_expr->build_mangled_expression(dest); + dest.push_back('.'); + } + dest.append(_right_expr); +} + +const vespalib::string& FieldExprNode::resolve_doctype() const { + const auto* leftmost = this; + while (leftmost->_left_expr) { + leftmost = leftmost->_left_expr.get(); + } + return leftmost->_right_expr; +} + } diff --git a/document/src/vespa/document/select/valuenodes.h b/document/src/vespa/document/select/valuenodes.h index 0464159b85f..bc1ec0e01e8 100644 --- a/document/src/vespa/document/select/valuenodes.h +++ b/document/src/vespa/document/select/valuenodes.h @@ -35,9 +35,8 @@ public: class NullValueNode : public ValueNode { - vespalib::string _name; public: - NullValueNode(const vespalib::stringref & name); + NullValueNode(); std::unique_ptr<Value> getValue(const Context&) const override { return std::unique_ptr<Value>(new NullValue()); @@ -48,7 +47,7 @@ public: void visit(Visitor& visitor) const override; ValueNode::UP clone() const override { - return wrapParens(new NullValueNode(_name)); + return wrapParens(new NullValueNode()); } }; @@ -56,7 +55,7 @@ class StringValueNode : public ValueNode { vespalib::string _value; public: - StringValueNode(const vespalib::stringref & val); + explicit StringValueNode(const vespalib::stringref & val); const vespalib::string& getValue() const { return _value; } @@ -115,6 +114,7 @@ class VariableValueNode : public ValueNode { vespalib::string _value; public: + // TODO stringref VariableValueNode(const vespalib::string & variableName) : _value(variableName) {} const vespalib::string& getVariableName() const { return _value; } @@ -183,6 +183,59 @@ private: void initFieldPath(const DocumentType&) const; }; +class FunctionValueNode; + +// Only used by the parser to build a partial field expression. Never part of +// an AST tree returned to the caller. +class FieldExprNode final : public ValueNode { + std::unique_ptr<FieldExprNode> _left_expr; + vespalib::string _right_expr; +public: + explicit FieldExprNode(const vespalib::string& doctype) : _left_expr(), _right_expr(doctype) {} + FieldExprNode(std::unique_ptr<FieldExprNode> left_expr, vespalib::stringref right_expr) + : _left_expr(std::move(left_expr)), _right_expr(right_expr) + {} + FieldExprNode(const FieldExprNode &) = delete; + FieldExprNode & operator = (const FieldExprNode &) = delete; + FieldExprNode(FieldExprNode &&) = default; + FieldExprNode & operator = (FieldExprNode &&) = default; + ~FieldExprNode(); + + std::unique_ptr<FieldValueNode> convert_to_field_value() const; + std::unique_ptr<FunctionValueNode> convert_to_function_call() const; +private: + void build_mangled_expression(vespalib::string& dest) const; + const vespalib::string& resolve_doctype() const; + + // These are not used, can just return dummy values. + std::unique_ptr<Value> getValue(const Context& context) const override { + (void) context; + return std::unique_ptr<Value>(); + } + std::unique_ptr<Value> traceValue(const Context &context, std::ostream& out) const override { + (void) context; + (void) out; + return std::unique_ptr<Value>(); + } + void print(std::ostream& out, bool verbose, const std::string& indent) const override { + (void) out; + (void) verbose; + (void) indent; + } + void visit(Visitor& visitor) const override { + (void) visitor; + } + + ValueNode::UP clone() const override { + if (_left_expr) { + return wrapParens(new FieldExprNode(std::unique_ptr<FieldExprNode>( + static_cast<FieldExprNode*>(_left_expr->clone().release())), _right_expr)); + } else { + return wrapParens(new FieldExprNode(_right_expr)); + } + } +}; + class IdValueNode : public ValueNode { public: @@ -222,35 +275,6 @@ private: int _divisionBits; }; -class SearchColumnValueNode : public ValueNode -{ -public: - SearchColumnValueNode(const BucketIdFactory& bucketIdFactory, - const vespalib::stringref & name, - int numColumns); - - int getColumns() { return _numColumns; } - - std::unique_ptr<Value> getValue(const Context& context) const override; - std::unique_ptr<Value> getValue(const DocumentId& id) const; - std::unique_ptr<Value> traceValue(const Context& context, std::ostream &out) const override; - std::unique_ptr<Value> traceValue(const DocumentId& val, std::ostream& out) const; - - int64_t getValue(const BucketId& bucketId) const; - void print(std::ostream& out, bool verbose, const std::string& indent) const override; - void visit(Visitor& visitor) const override; - - ValueNode::UP clone() const override { - return wrapParens(new SearchColumnValueNode(_bucketIdFactory, _id, _numColumns)); -} - -private: - const BucketIdFactory& _bucketIdFactory; - vespalib::string _id; - int _numColumns; - std::unique_ptr<BucketDistribution> _distribution; -}; - class FunctionValueNode : public ValueNode { public: diff --git a/document/src/vespa/document/select/visitor.h b/document/src/vespa/document/select/visitor.h index c89f0f24a6f..762d47c7c35 100644 --- a/document/src/vespa/document/select/visitor.h +++ b/document/src/vespa/document/select/visitor.h @@ -70,9 +70,6 @@ public: visitIdValueNode(const IdValueNode &) = 0; virtual void - visitSearchColumnValueNode(const SearchColumnValueNode &) = 0; - - virtual void visitFieldValueNode(const FieldValueNode &) = 0; virtual void |