diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-28 22:07:37 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-29 13:56:56 +0000 |
commit | d4712740d1281fa3a2fa945dfd3ea4c2182d663c (patch) | |
tree | 1ace22ed894ed094bbb5c1725899124e325781f0 /searchlib | |
parent | fef814ec263ce1ceca0416251b3204f43ee3ed30 (diff) |
- Let DotProduct,Wand and WeightedSet be Term nodes in the query tree as they really are.
That restricts the nodes to what they can really do and makes them significantly cheaper.
- In addition type conversion of numeric terms is delayed to when it is necessary.
And as next step they can be avoided completely.
Diffstat (limited to 'searchlib')
25 files changed, 406 insertions, 292 deletions
diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp index 04b16a73029..29ef4e9e6ef 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -425,11 +425,11 @@ TEST("require that attribute dot product works") { bool fast_search = ((i & 0x1) != 0); bool strict = ((i & 0x2) != 0); MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); - SimpleDotProduct node(field, 0, Weight(1)); - node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1)))); + SimpleDotProduct node(4, field, 0, Weight(1)); + node.addTerm("foo", Weight(1)); + node.addTerm("bar", Weight(1)); + node.addTerm("baz", Weight(1)); + node.addTerm("fox", Weight(1)); Result result = do_search(attribute_manager, node, strict); ASSERT_EQUAL(5u, result.hits.size()); if (fast_search) { @@ -457,11 +457,11 @@ TEST("require that attribute dot product can produce no hits") { bool fast_search = ((i & 0x1) != 0); bool strict = ((i & 0x2) != 0); MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); - SimpleDotProduct node(field, 0, Weight(1)); - node.append(Node::UP(new SimpleStringTerm("notfoo", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("notbar", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("notbaz", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("notfox", "", 0, Weight(1)))); + SimpleDotProduct node(4, field, 0, Weight(1)); + node.addTerm("notfoo", Weight(1)); + node.addTerm("notbar", Weight(1)); + node.addTerm("notbaz", Weight(1)); + node.addTerm("notfox", Weight(1)); Result result = do_search(attribute_manager, node, strict); ASSERT_EQUAL(0u, result.hits.size()); EXPECT_EQUAL(0u, result.est_hits); @@ -525,11 +525,11 @@ TEST("require that attribute parallel wand works") { bool fast_search = ((i & 0x1) != 0); bool strict = ((i & 0x2) != 0); MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); - SimpleWandTerm node(field, 0, Weight(1), 10, 500, 1.5); - node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1)))); + SimpleWandTerm node(4, field, 0, Weight(1), 10, 500, 1.5); + node.addTerm("foo", Weight(1)); + node.addTerm("bar", Weight(1)); + node.addTerm("baz", Weight(1)); + node.addTerm("fox", Weight(1)); Result result = do_search(attribute_manager, node, strict); EXPECT_FALSE(result.est_empty); if (fast_search) { @@ -561,11 +561,11 @@ TEST("require that attribute weighted set term works") { bool fast_search = ((i & 0x1) != 0); bool strict = ((i & 0x2) != 0); MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); - SimpleWeightedSetTerm node(field, 0, Weight(1)); - node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(10)))); - node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(20)))); - node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(30)))); - node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(40)))); + SimpleWeightedSetTerm node(4, field, 0, Weight(1)); + node.addTerm("foo", Weight(10)); + node.addTerm("bar", Weight(20)); + node.addTerm("baz", Weight(30)); + node.addTerm("fox", Weight(40)); Result result = do_search(attribute_manager, node, strict); EXPECT_FALSE(result.est_empty); ASSERT_EQUAL(5u, result.hits.size()); diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp index fe5014b6607..328cdcf663f 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp @@ -89,9 +89,9 @@ struct WS { } Node::UP createNode() const { - SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0)); + SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0)); for (size_t i = 0; i < tokens.size(); ++i) { - node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + node->addTerm(tokens[i].first, Weight(tokens[i].second)); } return Node::UP(node); } @@ -138,42 +138,30 @@ struct WS { } // namespace <unnamed> -class Test : public vespalib::TestApp -{ -public: - int Main() override; -}; - -int -Test::Main() -{ - TEST_INIT("attribute_weighted_set_test"); - { - MockAttributeManager manager; - setupAttributeManager(manager); - AttributeBlueprintFactory adapter; - - FakeResult expect = FakeResult() - .doc(3).elem(0).weight(30).pos(0) - .doc(5).elem(0).weight(50).pos(0) - .doc(7).elem(0).weight(70).pos(0); - WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30); - - EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true)); - EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true)); - EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false)); - - EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); - EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); - EXPECT_EQUAL(expect, ws.search(adapter, "string", true)); - EXPECT_EQUAL(expect, ws.search(adapter, "string", false)); - EXPECT_EQUAL(expect, ws.search(adapter, "multi", true)); - EXPECT_EQUAL(expect, ws.search(adapter, "multi", false)); - } - TEST_DONE(); +TEST("attribute_weighted_set_test") { + MockAttributeManager manager; + setupAttributeManager(manager); + AttributeBlueprintFactory adapter; + + FakeResult expect = FakeResult() + .doc(3).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30); + + EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true)); + EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true)); + EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false)); + + EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", false)); } -TEST_APPHOOK(Test); +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/customtypevisitor_test.cpp b/searchlib/src/tests/query/customtypevisitor_test.cpp index 3f7d57b7aa4..d0812a00ebf 100644 --- a/searchlib/src/tests/query/customtypevisitor_test.cpp +++ b/searchlib/src/tests/query/customtypevisitor_test.cpp @@ -15,18 +15,6 @@ using namespace search::query; namespace { -class Test : public vespalib::TestApp { - const char *current_state; - virtual void DumpState(bool) { - fprintf(stderr, "%s: ERROR: in %s\n", GetName(), current_state); - } - - template <class T> void requireThatNodeIsVisited(); - -public: - int Main() override; -}; - template <class Base> struct InitTerm : Base { InitTerm() : Base(typename Base::Type(), "view", 0, Weight(0)) {} @@ -49,9 +37,9 @@ struct MyStringTerm : InitTerm<StringTerm> {}; struct MySubstrTerm : InitTerm<SubstringTerm> {}; struct MySuffixTerm : InitTerm<SuffixTerm> {}; struct MyWeakAnd : WeakAnd { MyWeakAnd() : WeakAnd(1234, "view") {} }; -struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm("view", 0, Weight(42)) {} }; -struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct("view", 0, Weight(42)) {} }; -struct MyWandTerm : WandTerm { MyWandTerm() : WandTerm("view", 0, Weight(42), 57, 67, 77.7) {} }; +struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm(0, "view", 0, Weight(42)) {} }; +struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct(0, "view", 0, Weight(42)) {} }; +struct MyWandTerm : WandTerm { MyWandTerm() : WandTerm(0, "view", 0, Weight(42), 57, 67, 77.7) {} }; struct MyPredicateQuery : InitTerm<PredicateQuery> {}; struct MyRegExpTerm : InitTerm<RegExpTerm> {}; struct MyNearestNeighborTerm : NearestNeighborTerm {}; @@ -119,7 +107,7 @@ public: }; template <class T> -void Test::requireThatNodeIsVisited() { +void requireThatNodeIsVisited() { MyCustomVisitor visitor; Node::UP query(new T); visitor.isVisited<T>() = false; @@ -127,37 +115,28 @@ void Test::requireThatNodeIsVisited() { ASSERT_TRUE(visitor.isVisited<T>()); } -#define TEST_CALL(func) \ - current_state = #func; \ - func(); - -int -Test::Main() -{ - TEST_INIT("customtypevisitor_test"); - - TEST_CALL(requireThatNodeIsVisited<MyAnd>); - TEST_CALL(requireThatNodeIsVisited<MyAndNot>); - TEST_CALL(requireThatNodeIsVisited<MyNear>); - TEST_CALL(requireThatNodeIsVisited<MyONear>); - TEST_CALL(requireThatNodeIsVisited<MyOr>); - TEST_CALL(requireThatNodeIsVisited<MyPhrase>); - TEST_CALL(requireThatNodeIsVisited<MySameElement>); - TEST_CALL(requireThatNodeIsVisited<MyRangeTerm>); - TEST_CALL(requireThatNodeIsVisited<MyRank>); - TEST_CALL(requireThatNodeIsVisited<MyNumberTerm>); - TEST_CALL(requireThatNodeIsVisited<MyPrefixTerm>); - TEST_CALL(requireThatNodeIsVisited<MyStringTerm>); - TEST_CALL(requireThatNodeIsVisited<MySubstrTerm>); - TEST_CALL(requireThatNodeIsVisited<MySuffixTerm>); - TEST_CALL(requireThatNodeIsVisited<MyWeightedSetTerm>); - TEST_CALL(requireThatNodeIsVisited<MyDotProduct>); - TEST_CALL(requireThatNodeIsVisited<MyWandTerm>); - TEST_CALL(requireThatNodeIsVisited<MyPredicateQuery>); - TEST_CALL(requireThatNodeIsVisited<MyRegExpTerm>); - - TEST_DONE(); +TEST("customtypevisitor_test") { + + requireThatNodeIsVisited<MyAnd>(); + requireThatNodeIsVisited<MyAndNot>(); + requireThatNodeIsVisited<MyNear>(); + requireThatNodeIsVisited<MyONear>(); + requireThatNodeIsVisited<MyOr>(); + requireThatNodeIsVisited<MyPhrase>(); + requireThatNodeIsVisited<MySameElement>(); + requireThatNodeIsVisited<MyRangeTerm>(); + requireThatNodeIsVisited<MyRank>(); + requireThatNodeIsVisited<MyNumberTerm>(); + requireThatNodeIsVisited<MyPrefixTerm>(); + requireThatNodeIsVisited<MyStringTerm>(); + requireThatNodeIsVisited<MySubstrTerm>(); + requireThatNodeIsVisited<MySuffixTerm>(); + requireThatNodeIsVisited<MyWeightedSetTerm>(); + requireThatNodeIsVisited<MyDotProduct>(); + requireThatNodeIsVisited<MyWandTerm>(); + requireThatNodeIsVisited<MyPredicateQuery>(); + requireThatNodeIsVisited<MyRegExpTerm>(); } } // namespace -TEST_APPHOOK(Test); +TEST_MAIN() { TEST_RUN_ALL(); }
\ No newline at end of file diff --git a/searchlib/src/tests/query/query_visitor_test.cpp b/searchlib/src/tests/query/query_visitor_test.cpp index 946ad17352d..ef255ad6878 100644 --- a/searchlib/src/tests/query/query_visitor_test.cpp +++ b/searchlib/src/tests/query/query_visitor_test.cpp @@ -15,25 +15,6 @@ using namespace search::query; namespace { -class Test : public vespalib::TestApp { - void requireThatAllNodesCanBeVisited(); - - template <class T> void checkVisit(T *node); - -public: - int Main() override; -}; - -int -Test::Main() -{ - TEST_INIT("query_visitor_test"); - - TEST_DO(requireThatAllNodesCanBeVisited()); - - TEST_DONE(); -} - class MyVisitor : public QueryVisitor { public: @@ -69,7 +50,7 @@ public: }; template <class T> -void Test::checkVisit(T *node) { +void checkVisit(T *node) { Node::UP query(node); MyVisitor visitor; visitor.isVisited<T>() = false; @@ -77,7 +58,7 @@ void Test::checkVisit(T *node) { ASSERT_TRUE(visitor.isVisited<T>()); } -void Test::requireThatAllNodesCanBeVisited() { +TEST("requireThatAllNodesCanBeVisited") { checkVisit<And>(new SimpleAnd); checkVisit<AndNot>(new SimpleAndNot); checkVisit<Near>(new SimpleNear(0)); @@ -85,9 +66,9 @@ void Test::requireThatAllNodesCanBeVisited() { checkVisit<Or>(new SimpleOr); checkVisit<Phrase>(new SimplePhrase("field", 0, Weight(42))); checkVisit<SameElement>(new SimpleSameElement("field")); - checkVisit<WeightedSetTerm>(new SimpleWeightedSetTerm("field", 0, Weight(42))); - checkVisit<DotProduct>(new SimpleDotProduct("field", 0, Weight(42))); - checkVisit<WandTerm>(new SimpleWandTerm("field", 0, Weight(42), 57, 67, 77.7)); + checkVisit<WeightedSetTerm>(new SimpleWeightedSetTerm(0, "field", 0, Weight(42))); + checkVisit<DotProduct>(new SimpleDotProduct(0, "field", 0, Weight(42))); + checkVisit<WandTerm>(new SimpleWandTerm(0, "field", 0, Weight(42), 57, 67, 77.7)); checkVisit<Rank>(new SimpleRank); checkVisit<NumberTerm>(new SimpleNumberTerm("0.42", "field", 0, Weight(0))); const Location location(Point{10, 10}, 20, 0); @@ -104,4 +85,4 @@ void Test::requireThatAllNodesCanBeVisited() { } // namespace -TEST_APPHOOK(Test); +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp index 055d245e420..67b45d43406 100644 --- a/searchlib/src/tests/query/querybuilder_test.cpp +++ b/searchlib/src/tests/query/querybuilder_test.cpp @@ -90,18 +90,17 @@ Node::UP createQueryTree() { builder.addStringTerm(str[4], view[4], id[4], weight[4]); builder.addStringTerm(str[5], view[5], id[5], weight[5]); } - builder.addPredicateQuery(getPredicateQueryTerm(), - view[3], id[3], weight[3]); - builder.addDotProduct(3, view[2], id[2], weight[2]); + builder.addPredicateQuery(getPredicateQueryTerm(), view[3], id[3], weight[3]); { - builder.addStringTerm(str[3], view[3], id[3], weight[3]); - builder.addStringTerm(str[4], view[4], id[4], weight[4]); - builder.addStringTerm(str[5], view[5], id[5], weight[5]); + auto & n = builder.addDotProduct(3, view[2], id[2], weight[2]); + n.addTerm(str[3], weight[3]); + n.addTerm(str[4], weight[4]); + n.addTerm(str[5], weight[5]); } - builder.addWandTerm(2, view[0], id[0], weight[0], 57, 67, 77.7); { - builder.addStringTerm(str[1], view[1], id[1], weight[1]); - builder.addStringTerm(str[2], view[2], id[2], weight[2]); + auto & n = builder.addWandTerm(2, view[0], id[0], weight[0], 57, 67, 77.7); + n.addTerm(str[1], weight[1]); + n.addTerm(str[2], weight[2]); } builder.addRegExpTerm(str[5], view[5], id[5], weight[5]); builder.addSameElement(3, view[4]); @@ -246,23 +245,33 @@ void checkQueryTreeTypes(Node *node) { EXPECT_TRUE(checkTerm(predicateQuery, getPredicateQueryTerm(), view[3], id[3], weight[3])); auto* dotProduct = as_node<DotProduct>(and_node->getChildren()[6]); - EXPECT_EQUAL(3u, dotProduct->getChildren().size()); - string_term = as_node<StringTerm>(dotProduct->getChildren()[0]); - EXPECT_TRUE(checkTerm(string_term, str[3], view[3], id[3], weight[3])); - string_term = as_node<StringTerm>(dotProduct->getChildren()[1]); - EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4])); - string_term = as_node<StringTerm>(dotProduct->getChildren()[2]); - EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[5])); + EXPECT_EQUAL(3u, dotProduct->getNumTerms()); + + { + const auto &w1 = dotProduct->getAsString(0); + EXPECT_EQUAL(w1.first, str[3]); + EXPECT_TRUE(w1.second == weight[3]); + const auto &w2 = dotProduct->getAsString(1); + EXPECT_EQUAL(w2.first, str[4]); + EXPECT_TRUE(w2.second == weight[4]); + const auto &w3 = dotProduct->getAsString(2); + EXPECT_EQUAL(w3.first, str[5]); + EXPECT_TRUE(w3.second == weight[5]); + } auto* wandTerm = as_node<WandTerm>(and_node->getChildren()[7]); EXPECT_EQUAL(57u, wandTerm->getTargetNumHits()); EXPECT_EQUAL(67, wandTerm->getScoreThreshold()); EXPECT_EQUAL(77.7, wandTerm->getThresholdBoostFactor()); - EXPECT_EQUAL(2u, wandTerm->getChildren().size()); - string_term = as_node<StringTerm>(wandTerm->getChildren()[0]); - EXPECT_TRUE(checkTerm(string_term, str[1], view[1], id[1], weight[1])); - string_term = as_node<StringTerm>(wandTerm->getChildren()[1]); - EXPECT_TRUE(checkTerm(string_term, str[2], view[2], id[2], weight[2])); + EXPECT_EQUAL(2u, wandTerm->getNumTerms()); + { + const auto &w1 = wandTerm->getAsString(0); + EXPECT_EQUAL(w1.first, str[1]); + EXPECT_TRUE(w1.second == weight[1]); + const auto &w2 = wandTerm->getAsString(1); + EXPECT_EQUAL(w2.first, str[2]); + EXPECT_TRUE(w2.second == weight[2]); + } auto* regexp_term = as_node<RegExpTerm>(and_node->getChildren()[8]); EXPECT_TRUE(checkTerm(regexp_term, str[5], view[5], id[5], weight[5])); @@ -336,15 +345,15 @@ struct MyPhrase : Phrase { MyPhrase(const string &f, int32_t i, Weight w) : Phra struct MySameElement : SameElement { MySameElement(const string &f) : SameElement(f) {}}; struct MyWeightedSetTerm : WeightedSetTerm { - MyWeightedSetTerm(const string &f, int32_t i, Weight w) : WeightedSetTerm(f, i, w) {} + MyWeightedSetTerm(uint32_t n, const string &f, int32_t i, Weight w) : WeightedSetTerm(n, f, i, w) {} }; struct MyDotProduct : DotProduct { - MyDotProduct(const string &f, int32_t i, Weight w) : DotProduct(f, i, w) {} + MyDotProduct(uint32_t n, const string &f, int32_t i, Weight w) : DotProduct(n, f, i, w) {} }; struct MyWandTerm : WandTerm { - MyWandTerm(const string &f, int32_t i, Weight w, uint32_t targetNumHits, + MyWandTerm(uint32_t n, const string &f, int32_t i, Weight w, uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) - : WandTerm(f, i, w, targetNumHits, scoreThreshold, thresholdBoostFactor) {} + : WandTerm(n, f, i, w, targetNumHits, scoreThreshold, thresholdBoostFactor) {} }; struct MyRank : Rank {}; struct MyNumberTerm : NumberTerm { @@ -593,7 +602,7 @@ TEST("require that empty intermediate node can be added") { } TEST("control size of SimpleQueryStackDumpIterator") { - EXPECT_EQUAL(144u, sizeof(SimpleQueryStackDumpIterator)); + EXPECT_EQUAL(128u, sizeof(SimpleQueryStackDumpIterator)); } TEST("test query parsing error") { diff --git a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp index 7414e8b10f2..a05dcc4c6ea 100644 --- a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp +++ b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp @@ -45,9 +45,9 @@ struct DP { } Node::UP createNode() const { - SimpleDotProduct *node = new SimpleDotProduct("view", 0, Weight(0)); + SimpleDotProduct *node = new SimpleDotProduct(tokens.size(), "view", 0, Weight(0)); for (size_t i = 0; i < tokens.size(); ++i) { - node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + node->addTerm(tokens[i].first, Weight(tokens[i].second)); } return Node::UP(node); } diff --git a/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp b/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp index cbad6de25bb..167fc706f4d 100644 --- a/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp +++ b/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp @@ -156,9 +156,9 @@ TEST_F(FakeSearchableTest, require_that_weigheted_set_search_works) { source.addResult("fieldfoo", "friend3", FakeResult().doc(5)); - SimpleWeightedSetTerm weightedSet("fieldfoo", 1, w); - weightedSet.append(Node::UP(new SimpleStringTerm("friend1", "fieldfoo", 2, Weight(1)))); - weightedSet.append(Node::UP(new SimpleStringTerm("friend2", "fieldfoo", 3, Weight(2)))); + SimpleWeightedSetTerm weightedSet(2, "fieldfoo", 1, w); + weightedSet.addTerm("friend1", Weight(1)); + weightedSet.addTerm("friend2", Weight(2)); FieldSpecList fields; fields.add(FieldSpec("fieldfoo", 1, 1)); diff --git a/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp b/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp index 359a7f9c074..24253469dfc 100644 --- a/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp +++ b/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp @@ -34,9 +34,9 @@ TEST("test variations of getWeight") EXPECT_EQUAL(42, getWeight(SimpleStringTerm("foo", "bar", 1, Weight(42)))); EXPECT_EQUAL(42, getWeight(SimpleSubstringTerm("foo", "bar", 1, Weight(42)))); EXPECT_EQUAL(42, getWeight(SimpleSuffixTerm("foo", "bar", 1, Weight(42)))); - EXPECT_EQUAL(42, getWeight(SimpleWeightedSetTerm("bar", 1, Weight(42)))); - EXPECT_EQUAL(42, getWeight(SimpleDotProduct("bar", 1, Weight(42)))); - EXPECT_EQUAL(42, getWeight(SimpleWandTerm("bar", 1, Weight(42), 57, 67, 77.7))); + EXPECT_EQUAL(42, getWeight(SimpleWeightedSetTerm(0, "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleDotProduct(0, "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleWandTerm(0, "bar", 1, Weight(42), 57, 67, 77.7))); } TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp index f2c02d02080..b820a96fab6 100644 --- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -158,11 +158,10 @@ struct WandBlueprintSpec Node::UP createNode(uint32_t scoresToTrack = 100, score_t scoreThreshold = 0, double thresholdBoostFactor = 1) const { - SimpleWandTerm *node = new SimpleWandTerm("view", 0, Weight(0), + SimpleWandTerm *node = new SimpleWandTerm(tokens.size(), "view", 0, Weight(0), scoresToTrack, scoreThreshold, thresholdBoostFactor); for (size_t i = 0; i < tokens.size(); ++i) { - node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, - Weight(tokens[i].second)))); + node->addTerm(tokens[i].first, Weight(tokens[i].second)); } return Node::UP(node); } diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp index 8514a221230..95553f68cbc 100644 --- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -53,9 +53,9 @@ struct WS { } Node::UP createNode() const { - SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0)); + SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0)); for (size_t i = 0; i < tokens.size(); ++i) { - node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + node->addTerm(tokens[i].first,Weight(tokens[i].second)); } return Node::UP(node); } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 0c4c5271763..1e96b9be1d5 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -621,10 +621,10 @@ public: void visit(RegExpTerm & n) override { visitTerm(n); } template <typename WS> - void createDirectWeightedSet(WS *bp, search::query::Intermediate &n); + void createDirectWeightedSet(WS *bp, search::query::MultiTerm &n); template <typename WS> - void createShallowWeightedSet(WS *bp, search::query::Intermediate &n, const FieldSpec &fs, bool isInteger); + void createShallowWeightedSet(WS *bp, search::query::MultiTerm &n, const FieldSpec &fs, bool isInteger); static QueryTermSimple::UP extractTerm(const query::Node &node, bool isInteger) { @@ -634,6 +634,13 @@ public: } return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD); } + static QueryTermSimple::UP + extractTerm(vespalib::stringref term, bool isInteger) { + if (isInteger) { + return std::make_unique<QueryTermSimple>(term, QueryTermSimple::Type::WORD); + } + return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD); + } void visit(query::WeightedSetTerm &n) override { bool isSingleValue = !_attr.hasMultiValue(); @@ -641,15 +648,14 @@ public: bool isInteger = _attr.isIntegerType(); if (isSingleValue && (isString || isInteger)) { auto ws = std::make_unique<AttributeWeightedSetBlueprint>(_field, _attr); - for (size_t i = 0; i < n.getChildren().size(); ++i) { - const query::Node &node = *n.getChildren()[i]; - uint32_t weight = queryeval::getWeightFromNode(node).percent(); - ws->addToken(_attr.createSearchContext(extractTerm(node, isInteger), attribute::SearchContextParams()), weight); + for (size_t i = 0; i < n.getNumTerms(); ++i) { + auto term = n.getAsString(i); + ws->addToken(_attr.createSearchContext(extractTerm(term.first, isInteger), attribute::SearchContextParams()), term.second.percent()); } setResult(std::move(ws)); } else { if (_dwa != nullptr) { - auto *bp = new DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>(_field, _attr, *_dwa, n.getChildren().size()); + auto *bp = new DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>(_field, _attr, *_dwa, n.getNumTerms()); createDirectWeightedSet(bp, n); } else { auto *bp = new WeightedSetTermBlueprint(_field); @@ -660,7 +666,7 @@ public: void visit(query::DotProduct &n) override { if (_dwa != nullptr) { - auto *bp = new DirectWeightedSetBlueprint<queryeval::DotProductSearch>(_field, _attr, *_dwa, n.getChildren().size()); + auto *bp = new DirectWeightedSetBlueprint<queryeval::DotProductSearch>(_field, _attr, *_dwa, n.getNumTerms()); createDirectWeightedSet(bp, n); } else { auto *bp = new DotProductBlueprint(_field); @@ -672,7 +678,7 @@ public: if (_dwa != nullptr) { auto *bp = new DirectWandBlueprint(_field, *_dwa, n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(), - n.getChildren().size()); + n.getNumTerms()); createDirectWeightedSet(bp, n); } else { auto *bp = new ParallelWeakAndBlueprint(_field, @@ -725,24 +731,23 @@ public: template <typename WS> void -CreateBlueprintVisitor::createDirectWeightedSet(WS *bp, search::query::Intermediate &n) { +CreateBlueprintVisitor::createDirectWeightedSet(WS *bp, search::query::MultiTerm &n) { Blueprint::UP result(bp); - for (const Node * node : n.getChildren()) { - vespalib::string term = queryeval::termAsString(*node); - uint32_t weight = queryeval::getWeightFromNode(*node).percent(); - bp->addTerm(term, weight); + for (uint32_t i(0); i < n.getNumTerms(); i++) { + auto term = n.getAsString(i); + bp->addTerm(term.first, term.second.percent()); } setResult(std::move(result)); } template <typename WS> void -CreateBlueprintVisitor::createShallowWeightedSet(WS *bp, search::query::Intermediate &n, const FieldSpec &fs, bool isInteger) { +CreateBlueprintVisitor::createShallowWeightedSet(WS *bp, search::query::MultiTerm &n, const FieldSpec &fs, bool isInteger) { Blueprint::UP result(bp); - for (const Node * node : n.getChildren()) { - uint32_t weight = queryeval::getWeightFromNode(*node).percent(); + for (uint32_t i(0); i < n.getNumTerms(); i++) { FieldSpec childfs = bp->getNextChildField(fs); - bp->addTerm(std::make_unique<AttributeFieldBlueprint>(childfs, _attr, extractTerm(*node, isInteger)), weight); + auto term = n.getAsString(i); + bp->addTerm(std::make_unique<AttributeFieldBlueprint>(childfs, _attr, extractTerm(term.first, isInteger)), term.second.percent()); } setResult(std::move(result)); } diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp index d8be4a45af4..c37dba762ef 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp @@ -22,7 +22,7 @@ SimpleQueryStackDumpIterator::SimpleQueryStackDumpIterator(vespalib::stringref b _currArity(0), _curr_index_name(), _curr_term(), - _scratch(), + _curr_integer_term(0), _extraIntArg1(0), _extraIntArg2(0), _extraIntArg3(0), @@ -152,9 +152,7 @@ bool SimpleQueryStackDumpIterator::readNext() { case ParseItem::ITEM_PURE_WEIGHTED_LONG: { if (p + sizeof(int64_t) > _bufEnd) return false; - int64_t value = vespalib::nbo::n2h(*reinterpret_cast<const int64_t *>(p)); - auto res = std::to_chars(_scratch, _scratch + sizeof(_scratch), value, 10); - _curr_term = vespalib::stringref(_scratch, res.ptr - _scratch); + _curr_integer_term = vespalib::nbo::n2h(*reinterpret_cast<const int64_t *>(p)); p += sizeof(int64_t); _currArity = 0; } diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h index 9d076e81e37..8a9a28ebacb 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h @@ -36,7 +36,7 @@ private: vespalib::stringref _curr_index_name; /** The term in the current item */ vespalib::stringref _curr_term; - char _scratch[24]; + int64_t _curr_integer_term; /* extra arguments */ uint32_t _extraIntArg1; @@ -119,6 +119,7 @@ public: vespalib::stringref getIndexName() const { return _curr_index_name; } vespalib::stringref getTerm() const { return _curr_term; } + int64_t getIntergerTerm() const { return _curr_integer_term; } }; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index ec1b26ec143..cabc9b6dae4 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -2,7 +2,7 @@ #include "query.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> - +#include <charconv> #include <vespa/log/log.h> LOG_SETUP(".vsm.querynode"); @@ -90,7 +90,6 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor if (dynamic_cast<const SameElementQueryNode *>(parent) != nullptr) { index = parent->getIndex() + "." + index; } - vespalib::stringref term = queryRep.getTerm(); using TermType = QueryTerm::Type; TermType sTerm(TermType::WORD); switch (type) { @@ -112,12 +111,19 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor default: break; } - QueryTerm::string ssTerm(term); + QueryTerm::string ssTerm; + if (type == ParseItem::ITEM_PURE_WEIGHTED_LONG) { + char buf[24]; + auto res = std::to_chars(buf, buf + sizeof(buf), queryRep.getIntergerTerm(), 10); + ssTerm.assign(buf, res.ptr - buf); + } else { + ssTerm = queryRep.getTerm(); + } QueryTerm::string ssIndex(index); if (ssIndex == "sddocname") { // This is suboptimal as the term should be checked too. // But it will do for now as only correct sddocname queries are sent down. - qn.reset(new TrueNode()); + qn = std::make_unique<TrueNode>(); } else { auto qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm); qt->setWeight(queryRep.GetWeight()); @@ -131,7 +137,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor auto orqn = std::make_unique<EquivQueryNode>(); orqn->push_back(std::move(qt)); orqn->push_back(std::move(phrase)); - qn.reset(orqn.release()); + qn = std::move(orqn); } } } diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp index 4a4b606ef8f..ceeacb759b2 100644 --- a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp @@ -13,8 +13,5 @@ Near::~Near() = default; ONear::~ONear() = default; Phrase::~Phrase() = default; SameElement::~SameElement() = default; -WeightedSetTerm::~WeightedSetTerm() = default; -DotProduct::~DotProduct() = default; -WandTerm::~WandTerm() = default; } diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h index 0ff0b212dfd..06475c0cc63 100644 --- a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h +++ b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h @@ -122,36 +122,4 @@ private: bool _expensive; }; -class WeightedSetTerm : public QueryNodeMixin<WeightedSetTerm, Intermediate>, public Term { -public: - WeightedSetTerm(const vespalib::string &view, int32_t id, Weight weight) - : Term(view, id, weight) {} - virtual ~WeightedSetTerm() = 0; -}; - -class DotProduct : public QueryNodeMixin<DotProduct, Intermediate>, public Term { -public: - DotProduct(const vespalib::string &view, int32_t id, Weight weight) - : Term(view, id, weight) {} - virtual ~DotProduct() = 0; -}; - -class WandTerm : public QueryNodeMixin<WandTerm, Intermediate>, public Term { -private: - uint32_t _targetNumHits; - int64_t _scoreThreshold; - double _thresholdBoostFactor; -public: - WandTerm(const vespalib::string &view, int32_t id, Weight weight, - uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) - : Term(view, id, weight), - _targetNumHits(targetNumHits), - _scoreThreshold(scoreThreshold), - _thresholdBoostFactor(thresholdBoostFactor) {} - virtual ~WandTerm() = 0; - uint32_t getTargetNumHits() const { return _targetNumHits; } - int64_t getScoreThreshold() const { return _scoreThreshold; } - double getThresholdBoostFactor() const { return _thresholdBoostFactor; } -}; - } diff --git a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h index 8392730cd29..70cf2382523 100644 --- a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h +++ b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h @@ -50,9 +50,6 @@ class QueryBuilderBase std::stack<NodeInfo> _nodes; vespalib::string _error_msg; - void reportError(const vespalib::string &msg); - void reportError(const vespalib::string &msg, const Node & incomming, const Node & root); - protected: QueryBuilderBase(); ~QueryBuilderBase(); @@ -91,6 +88,9 @@ public: * build a new query tree with the same builder. */ void reset(); + + void reportError(const vespalib::string &msg); + void reportError(const vespalib::string &msg, const Node & incomming, const Node & root); }; @@ -126,17 +126,17 @@ typename NodeTypes::SameElement *createSameElement(vespalib::stringref view) { return new typename NodeTypes::SameElement(view); } template <class NodeTypes> -typename NodeTypes::WeightedSetTerm *createWeightedSetTerm(vespalib::stringref view, int32_t id, Weight weight) { - return new typename NodeTypes::WeightedSetTerm(view, id, weight); +typename NodeTypes::WeightedSetTerm *createWeightedSetTerm(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight) { + return new typename NodeTypes::WeightedSetTerm(num_terms, view, id, weight); } template <class NodeTypes> -typename NodeTypes::DotProduct *createDotProduct(vespalib::stringref view, int32_t id, Weight weight) { - return new typename NodeTypes::DotProduct(view, id, weight); +typename NodeTypes::DotProduct *createDotProduct(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight) { + return new typename NodeTypes::DotProduct(num_terms, view, id, weight); } template <class NodeTypes> typename NodeTypes::WandTerm * -createWandTerm(vespalib::stringref view, int32_t id, Weight weight, uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) { - return new typename NodeTypes::WandTerm(view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor); +createWandTerm(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight, uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) { + return new typename NodeTypes::WandTerm(num_terms, view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor); } template <class NodeTypes> typename NodeTypes::Rank *createRank() { @@ -262,12 +262,12 @@ public: } typename NodeTypes::WeightedSetTerm &addWeightedSetTerm( int child_count, stringref view, int32_t id, Weight weight) { adjustWeight(weight); - typename NodeTypes::WeightedSetTerm &node = addIntermediate(createWeightedSetTerm<NodeTypes>(view, id, weight), child_count); + typename NodeTypes::WeightedSetTerm &node = addTerm(createWeightedSetTerm<NodeTypes>(child_count, view, id, weight)); return node; } typename NodeTypes::DotProduct &addDotProduct( int child_count, stringref view, int32_t id, Weight weight) { adjustWeight(weight); - typename NodeTypes::DotProduct &node = addIntermediate( createDotProduct<NodeTypes>(view, id, weight), child_count); + typename NodeTypes::DotProduct &node = addTerm( createDotProduct<NodeTypes>(child_count, view, id, weight)); return node; } typename NodeTypes::WandTerm &addWandTerm( @@ -276,9 +276,8 @@ public: int64_t scoreThreshold, double thresholdBoostFactor) { adjustWeight(weight); - typename NodeTypes::WandTerm &node = addIntermediate( - createWandTerm<NodeTypes>(view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor), - child_count); + typename NodeTypes::WandTerm &node = addTerm( + createWandTerm<NodeTypes>(child_count, view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor)); return node; } typename NodeTypes::Rank &addRank(int child_count) { diff --git a/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h b/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h index 9e8c97cff94..dd398e11844 100644 --- a/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h +++ b/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h @@ -8,7 +8,7 @@ namespace search::query { template <typename T, typename Base> struct QueryNodeMixin : Base { - typedef QueryNodeMixin<T, Base> QueryNodeMixinType; + using QueryNodeMixinType = QueryNodeMixin<T, Base>; ~QueryNodeMixin() = 0; void accept(QueryVisitor &visitor) override { diff --git a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h index 4b9226f6112..24afe3d83c8 100644 --- a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h +++ b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h @@ -78,26 +78,32 @@ private: visitNodes(node.getChildren()); } + void replicateMultiTerm(const MultiTerm &original, MultiTerm & replica) { + for (uint32_t i(0); i < original.getNumTerms(); i++) { + auto v = original.getAsString(i); + replica.addTerm(v.first, v.second); + } + } + void visit(WeightedSetTerm &node) override { - replicate(node, _builder.addWeightedSetTerm(node.getChildren().size(), node.getView(), - node.getId(), node.getWeight())); - visitNodes(node.getChildren()); + auto & replica = _builder.addWeightedSetTerm(node.getNumTerms(), node.getView(), node.getId(), node.getWeight()); + replicate(node, replica); + replicateMultiTerm(node, replica); } void visit(DotProduct &node) override { - replicate(node, _builder.addDotProduct(node.getChildren().size(), node.getView(), - node.getId(), node.getWeight())); - visitNodes(node.getChildren()); + auto & replica = _builder.addDotProduct(node.getNumTerms(), node.getView(), node.getId(), node.getWeight()); + replicate(node, replica); + replicateMultiTerm(node, replica); } void visit(WandTerm &node) override { - replicate(node, _builder.addWandTerm(node.getChildren().size(), - node.getView(), - node.getId(), node.getWeight(), - node.getTargetNumHits(), - node.getScoreThreshold(), - node.getThresholdBoostFactor())); - visitNodes(node.getChildren()); + auto & replica = _builder.addWandTerm(node.getNumTerms(), node.getView(), node.getId(), node.getWeight(), + node.getTargetNumHits(), + node.getScoreThreshold(), + node.getThresholdBoostFactor()); + replicate(node, replica); + replicateMultiTerm(node, replica); } void visit(Rank &node) override { diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.h b/searchlib/src/vespa/searchlib/query/tree/simplequery.h index db517edc348..bdf1141fde5 100644 --- a/searchlib/src/vespa/searchlib/query/tree/simplequery.h +++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.h @@ -35,17 +35,17 @@ struct SimpleSameElement : SameElement { SimpleSameElement(vespalib::stringref view) : SameElement(view) {} }; struct SimpleWeightedSetTerm : WeightedSetTerm { - SimpleWeightedSetTerm(vespalib::stringref view, int32_t id, Weight weight) - : WeightedSetTerm(view, id, weight) {} + SimpleWeightedSetTerm(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight) + : WeightedSetTerm(num_terms, view, id, weight) {} }; struct SimpleDotProduct : DotProduct { - SimpleDotProduct(vespalib::stringref view, int32_t id, Weight weight) - : DotProduct(view, id, weight) {} + SimpleDotProduct(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight) + : DotProduct(num_terms, view, id, weight) {} }; struct SimpleWandTerm : WandTerm { - SimpleWandTerm(vespalib::stringref view, int32_t id, Weight weight, + SimpleWandTerm(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight, uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) - : WandTerm(view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor) {} + : WandTerm(num_terms, view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor) {} }; struct SimpleRank : Rank {}; struct SimpleNumberTerm : NumberTerm { diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp index 13756af3c27..5ad5fa80da4 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp @@ -169,18 +169,28 @@ class QueryNodeConverter : public QueryVisitor { if (typefield & ParseItem::IF_FLAGS) { appendByte(flags); } - appendCompressedPositiveNumber(node.getChildren().size()); + appendCompressedPositiveNumber(node.getNumTerms()); appendString(node.getView()); } + void createMultiTermNodes(const MultiTerm & mt) { + for (size_t i = 0; i < mt.getNumTerms(); ++i) { + auto term = mt.getAsString(i); + uint8_t typeField = static_cast<uint8_t>(ParseItem::ITEM_PURE_WEIGHTED_STRING) | static_cast<uint8_t>(ParseItem::IF_WEIGHT); + appendByte(typeField); + appendCompressedNumber(term.second.percent()); + appendString(term.first); + } + } + void visit(WeightedSetTerm &node) override { createWeightedSet(node, static_cast<uint8_t>(ParseItem::ITEM_WEIGHTED_SET) | static_cast<uint8_t>(ParseItem::IF_WEIGHT)); - visitNodes(node.getChildren()); + createMultiTermNodes(node); } void visit(DotProduct &node) override { createWeightedSet(node, static_cast<uint8_t>(ParseItem::ITEM_DOT_PRODUCT) | static_cast<uint8_t>(ParseItem::IF_WEIGHT)); - visitNodes(node.getChildren()); + createMultiTermNodes(node); } void visit(WandTerm &node) override { @@ -188,7 +198,7 @@ class QueryNodeConverter : public QueryVisitor { appendCompressedPositiveNumber(node.getTargetNumHits()); appendDouble(node.getScoreThreshold()); appendDouble(node.getThresholdBoostFactor()); - visitNodes(node.getChildren()); + createMultiTermNodes(node); } void visit(Rank &node) override { diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h index eab397d321c..b22f7d47615 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h @@ -8,6 +8,8 @@ #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <vespa/searchlib/common/geo_location_parser.h> #include <vespa/vespalib/objects/hexdump.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <charconv> namespace search::query { @@ -45,8 +47,26 @@ public: return builder.build(); } -private: - static Term * createQueryTerm(search::SimpleQueryStackDumpIterator &queryStack, QueryBuilder<NodeTypes> & builder, vespalib::stringref & pureTermView) { +private: + static void populateMultiTerm(search::SimpleQueryStackDumpIterator &queryStack, QueryBuilderBase & builder, MultiTerm & mt) { + for (uint32_t i(0); i < mt.getNumTerms(); i++) { + queryStack.next(); + ParseItem::ItemType type = queryStack.getType(); + switch (type) { + case ParseItem::ITEM_PURE_WEIGHTED_LONG: + mt.addTerm(queryStack.getIntergerTerm(), queryStack.GetWeight()); + break; + case ParseItem::ITEM_PURE_WEIGHTED_STRING: + mt.addTerm(queryStack.getTerm(), queryStack.GetWeight()); + break; + default: + builder.reportError(vespalib::make_string("Got unexpected node %d for multiterm node at child term %d", type, i)); + return; + } + } + } + static Term * + createQueryTerm(search::SimpleQueryStackDumpIterator &queryStack, QueryBuilder<NodeTypes> & builder, vespalib::stringref & pureTermView) { uint32_t arity = queryStack.getArity(); ParseItem::ItemType type = queryStack.getType(); Node::UP node; @@ -92,14 +112,18 @@ private: vespalib::stringref view = queryStack.getIndexName(); int32_t id = queryStack.getUniqueId(); Weight weight = queryStack.GetWeight(); - t = &builder.addWeightedSetTerm(arity, view, id, weight); + auto & ws = builder.addWeightedSetTerm(arity, view, id, weight); pureTermView = vespalib::stringref(); + populateMultiTerm(queryStack, builder, ws); + t = &ws; } else if (type == ParseItem::ITEM_DOT_PRODUCT) { vespalib::stringref view = queryStack.getIndexName(); int32_t id = queryStack.getUniqueId(); Weight weight = queryStack.GetWeight(); - t = &builder.addDotProduct(arity, view, id, weight); + auto & dotProduct = builder.addDotProduct(arity, view, id, weight); pureTermView = vespalib::stringref(); + populateMultiTerm(queryStack, builder, dotProduct); + t = &dotProduct; } else if (type == ParseItem::ITEM_WAND) { vespalib::stringref view = queryStack.getIndexName(); int32_t id = queryStack.getUniqueId(); @@ -107,9 +131,10 @@ private: uint32_t targetNumHits = queryStack.getTargetNumHits(); double scoreThreshold = queryStack.getScoreThreshold(); double thresholdBoostFactor = queryStack.getThresholdBoostFactor(); - t = &builder.addWandTerm(arity, view, id, weight, - targetNumHits, scoreThreshold, thresholdBoostFactor); + auto & wand = builder.addWandTerm(arity, view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor); pureTermView = vespalib::stringref(); + populateMultiTerm(queryStack, builder, wand); + t = & wand; } else if (type == ParseItem::ITEM_NOT) { builder.addAndNot(arity); } else if (type == ParseItem::ITEM_NEAREST_NEIGHBOR) { @@ -135,7 +160,9 @@ private: } else if (type == ParseItem::ITEM_PURE_WEIGHTED_STRING) { t = &builder.addStringTerm(term, pureTermView, id, weight); } else if (type == ParseItem::ITEM_PURE_WEIGHTED_LONG) { - t = &builder.addNumberTerm(term, pureTermView, id, weight); + char buf[24]; + auto res = std::to_chars(buf, buf + sizeof(buf), queryStack.getIntergerTerm(), 10); + t = &builder.addNumberTerm(vespalib::stringref(buf, res.ptr - buf), pureTermView, id, weight); } else if (type == ParseItem::ITEM_PREFIXTERM) { t = &builder.addPrefixTerm(term, view, id, weight); } else if (type == ParseItem::ITEM_SUBSTRINGTERM) { diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp index b9a9687c85b..ab8431668d8 100644 --- a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp @@ -1,26 +1,105 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "termnodes.h" +#include <vespa/vespalib/util/exceptions.h> +#include <charconv> +using vespalib::IllegalArgumentException; +using vespalib::stringref; +using vespalib::make_string_short::fmt; namespace search::query { -NumberTerm::~NumberTerm() = default; - -PrefixTerm::~PrefixTerm() = default; - -RangeTerm::~RangeTerm() = default; - StringTerm::StringTerm(const Type &term, vespalib::stringref view, int32_t id, Weight weight) : QueryNodeMixinType(term, view, id, weight) {} -StringTerm::~StringTerm() = default; +NumberTerm::~NumberTerm() = default; +PrefixTerm::~PrefixTerm() = default; +RangeTerm::~RangeTerm() = default; +StringTerm::~StringTerm() = default; SubstringTerm::~SubstringTerm() = default; - SuffixTerm::~SuffixTerm() = default; - LocationTerm::~LocationTerm() = default; - RegExpTerm::~RegExpTerm() = default; +WeightedSetTerm::~WeightedSetTerm() = default; +DotProduct::~DotProduct() = default; +WandTerm::~WandTerm() = default; + +namespace { + +void badType(const char *expected, const char *got) __attribute__((noinline)); +void badType(const char *expected, const char *got) { + throw IllegalArgumentException(fmt("Expected '%s' type, got '%s'", expected, got), VESPA_STRLOC); +} + +class StringTermVector final : public MultiTerm::TermVector { +public: + StringTermVector(uint32_t sz) : _terms() { _terms.reserve(sz); } + void addTerm(stringref term, Weight weight) override { + _terms.emplace_back(term, weight); + } + void addTerm(int64_t, Weight) override { + badType("string", "int64_t"); + } + StringAndWeight getAsString(uint32_t index) const override { + const auto & v = _terms[index]; + return StringAndWeight(v.first, v.second); + } + IntegerAndWeight getAsInteger(uint32_t index) const override { + const auto & v = _terms[index]; + int64_t value(0); + std::from_chars(v.first.c_str(), v.first.c_str() + v.first.size(), value); + return IntegerAndWeight(value, v.second); + } +private: + std::vector<std::pair<vespalib::string, Weight>> _terms; +}; + +class IntegerTermVector final : public MultiTerm::TermVector { +public: + IntegerTermVector(uint32_t sz) : _terms() { _terms.reserve(sz); } + void addTerm(stringref, Weight) override { + badType("int64_t", "string"); + } + void addTerm(int64_t term, Weight weight) override { + _terms.emplace_back(term, weight); + } + StringAndWeight getAsString(uint32_t index) const override { + const auto & v = _terms[index]; + auto res = std::to_chars(_scratchPad, _scratchPad + sizeof(_scratchPad), v.first, 10); + return StringAndWeight(stringref(_scratchPad, res.ptr - _scratchPad), v.second); + } + IntegerAndWeight getAsInteger(uint32_t index) const override { + return _terms[index]; + } +private: + std::vector<IntegerAndWeight> _terms; + mutable char _scratchPad[24]; +}; + +} + +MultiTerm::MultiTerm(uint32_t num_terms) + : _terms(), + _num_terms(num_terms) +{} + +MultiTerm::~MultiTerm() = default; + +void +MultiTerm::addTerm(vespalib::stringref term, Weight weight) { + if ( ! _terms) { + _terms = std::make_unique<StringTermVector>(_num_terms); + } + _terms->addTerm(term, weight); +} + +void +MultiTerm::addTerm(int64_t term, Weight weight) { + if ( ! _terms) { + _terms = std::make_unique<IntegerTermVector>(_num_terms); + } + _terms->addTerm(term, weight); +} } diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.h b/searchlib/src/vespa/searchlib/query/tree/termnodes.h index 3eda0732470..0524405f7f5 100644 --- a/searchlib/src/vespa/searchlib/query/tree/termnodes.h +++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.h @@ -153,5 +153,67 @@ public: double get_distance_threshold() const { return _distance_threshold; } }; +class MultiTerm : public Node { +public: + using StringAndWeight = std::pair<vespalib::stringref, Weight>; + using IntegerAndWeight = std::pair<int64_t, Weight>; + struct TermVector { + using StringAndWeight = MultiTerm::StringAndWeight; + using IntegerAndWeight = MultiTerm::IntegerAndWeight; + virtual ~TermVector() = default; + virtual void addTerm(vespalib::stringref term, Weight weight) = 0; + virtual void addTerm(int64_t term, Weight weight) = 0; + virtual StringAndWeight getAsString(uint32_t index) const = 0; + virtual IntegerAndWeight getAsInteger(uint32_t index) const = 0; + }; + ~MultiTerm() override; + void addTerm(vespalib::stringref term, Weight weight); + void addTerm(int64_t term, Weight weight); + StringAndWeight getAsString(uint32_t index) const { return _terms->getAsString(index); } + IntegerAndWeight getAsInteger(uint32_t index) const { return _terms->getAsInteger(index); } + uint32_t getNumTerms() const { return _num_terms; } +protected: + MultiTerm(uint32_t num_terms); +private: + std::unique_ptr<TermVector> _terms; + uint32_t _num_terms; +}; + +class WeightedSetTerm : public QueryNodeMixin<WeightedSetTerm, MultiTerm>, public Term { +public: + WeightedSetTerm(uint32_t num_terms, const vespalib::string &view, int32_t id, Weight weight) + : QueryNodeMixinType(num_terms), + Term(view, id, weight) + {} + virtual ~WeightedSetTerm() = 0; +}; + +class DotProduct : public QueryNodeMixin<DotProduct, MultiTerm>, public Term { +public: + DotProduct(uint32_t num_terms, const vespalib::string &view, int32_t id, Weight weight) + : QueryNodeMixinType(num_terms), + Term(view, id, weight) + {} + virtual ~DotProduct() = 0; +}; + +class WandTerm : public QueryNodeMixin<WandTerm, MultiTerm>, public Term { +private: + uint32_t _targetNumHits; + int64_t _scoreThreshold; + double _thresholdBoostFactor; +public: + WandTerm(uint32_t num_terms, const vespalib::string &view, int32_t id, Weight weight, + uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) + : QueryNodeMixinType(num_terms), + Term(view, id, weight), + _targetNumHits(targetNumHits), + _scoreThreshold(scoreThreshold), + _thresholdBoostFactor(thresholdBoostFactor) {} + virtual ~WandTerm() = 0; + uint32_t getTargetNumHits() const { return _targetNumHits; } + int64_t getScoreThreshold() const { return _scoreThreshold; } + double getThresholdBoostFactor() const { return _thresholdBoostFactor; } +}; } diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp index 3731b2ff6a8..3ad0a8a7829 100644 --- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp @@ -66,12 +66,12 @@ template <typename WS, typename NODE> void CreateBlueprintVisitorHelper::createWeightedSet(std::unique_ptr<WS> bp, NODE &n) { FieldSpecList fields; - for (size_t i = 0; i < n.getChildren().size(); ++i) { + for (size_t i = 0; i < n.getNumTerms(); ++i) { fields.clear(); fields.add(bp->getNextChildField(_field)); - const query::Node &node = *n.getChildren()[i]; - uint32_t weight = getWeightFromNode(node).percent(); - bp->addTerm(_searchable.createBlueprint(_requestContext, fields, node), weight); + const auto & term = n.getAsString(i); + query::SimpleStringTerm node(term.first, n.getView(), 0, term.second); // TODO Temporary + bp->addTerm(_searchable.createBlueprint(_requestContext, fields, node), term.second.percent()); } setResult(std::move(bp)); } |