diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-30 17:04:14 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-30 17:04:14 +0200 |
commit | 262607d6fcb6a5bdbdb96c8da0e5988c39b2c546 (patch) | |
tree | 3c3563ccfd8f4c487da6cf16a74ce7624c012967 /searchlib | |
parent | 1e5e5325e02f79a9875b83fb1e8edd9919844f3a (diff) | |
parent | d40cbb5e16fe71f516280793d12d9e83a1dc1bda (diff) |
Merge pull request #17226 from vespa-engine/balder/dotproduct-as-term
- Let DotProduct,Wand and WeightedSet be Term nodes in the query treeā¦
Diffstat (limited to 'searchlib')
34 files changed, 645 insertions, 370 deletions
diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp index 3bf16aa3e7e..d965f555b79 100644 --- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -474,20 +474,16 @@ BitVectorTest::test(BasicType bt, bool filter) { Config cfg(bt, ct); - AttributePtr v = make(cfg, pref, fastSearch, - enableBitVectors, enableOnlyBitVector, filter); + AttributePtr v = make(cfg, pref, fastSearch, enableBitVectors, enableOnlyBitVector, filter); addDocs(v, 1024); VectorType &tv = as<VectorType>(v); populate(tv, 2, 1023, true); SearchContextPtr sc = getSearch<VectorType>(tv, true); - checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter, - true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter, true); sc = getSearch<VectorType>(tv, false); - checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && - !filter, true); - const search::IDocumentWeightAttribute *dwa = - v->asDocumentWeightAttribute(); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && !filter, true); + const search::IDocumentWeightAttribute *dwa = v->asDocumentWeightAttribute(); if (dwa != nullptr) { search::IDocumentWeightAttribute::LookupResult lres = dwa->lookup(getSearchStr<VectorType>(), dwa->get_dictionary_snapshot()); @@ -504,21 +500,16 @@ BitVectorTest::test(BasicType bt, } populate(tv, 2, 973, false); sc = getSearch<VectorType>(tv, true); - checkSearch(v, std::move(sc), 977, 1022, 10, !enableOnlyBitVector && - !filter, true); + checkSearch(v, std::move(sc), 977, 1022, 10, !enableOnlyBitVector &&!filter, true); populate(tv, 2, 973, true); sc = getSearch<VectorType>(tv, true); - checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter, - true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter, true); addDocs(v, 15000); sc = getSearch<VectorType>(tv, true); - checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && - !filter, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && !filter, true); populateAll(tv, 10, 15000, true); sc = getSearch<VectorType>(tv, true); - checkSearch(v, std::move(sc), 2, 14999, 14992, - !enableBitVectors && !filter, - false); + checkSearch(v, std::move(sc), 2, 14999, 14992, !enableBitVectors && !filter, false); } diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp index 04b16a73029..29ef4e9e6ef 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -425,11 +425,11 @@ TEST("require that attribute dot product works") { bool fast_search = ((i & 0x1) != 0); bool strict = ((i & 0x2) != 0); MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); - SimpleDotProduct node(field, 0, Weight(1)); - node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1)))); + SimpleDotProduct node(4, field, 0, Weight(1)); + node.addTerm("foo", Weight(1)); + node.addTerm("bar", Weight(1)); + node.addTerm("baz", Weight(1)); + node.addTerm("fox", Weight(1)); Result result = do_search(attribute_manager, node, strict); ASSERT_EQUAL(5u, result.hits.size()); if (fast_search) { @@ -457,11 +457,11 @@ TEST("require that attribute dot product can produce no hits") { bool fast_search = ((i & 0x1) != 0); bool strict = ((i & 0x2) != 0); MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); - SimpleDotProduct node(field, 0, Weight(1)); - node.append(Node::UP(new SimpleStringTerm("notfoo", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("notbar", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("notbaz", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("notfox", "", 0, Weight(1)))); + SimpleDotProduct node(4, field, 0, Weight(1)); + node.addTerm("notfoo", Weight(1)); + node.addTerm("notbar", Weight(1)); + node.addTerm("notbaz", Weight(1)); + node.addTerm("notfox", Weight(1)); Result result = do_search(attribute_manager, node, strict); ASSERT_EQUAL(0u, result.hits.size()); EXPECT_EQUAL(0u, result.est_hits); @@ -525,11 +525,11 @@ TEST("require that attribute parallel wand works") { bool fast_search = ((i & 0x1) != 0); bool strict = ((i & 0x2) != 0); MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); - SimpleWandTerm node(field, 0, Weight(1), 10, 500, 1.5); - node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1)))); - node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1)))); + SimpleWandTerm node(4, field, 0, Weight(1), 10, 500, 1.5); + node.addTerm("foo", Weight(1)); + node.addTerm("bar", Weight(1)); + node.addTerm("baz", Weight(1)); + node.addTerm("fox", Weight(1)); Result result = do_search(attribute_manager, node, strict); EXPECT_FALSE(result.est_empty); if (fast_search) { @@ -561,11 +561,11 @@ TEST("require that attribute weighted set term works") { bool fast_search = ((i & 0x1) != 0); bool strict = ((i & 0x2) != 0); MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); - SimpleWeightedSetTerm node(field, 0, Weight(1)); - node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(10)))); - node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(20)))); - node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(30)))); - node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(40)))); + SimpleWeightedSetTerm node(4, field, 0, Weight(1)); + node.addTerm("foo", Weight(10)); + node.addTerm("bar", Weight(20)); + node.addTerm("baz", Weight(30)); + node.addTerm("fox", Weight(40)); Result result = do_search(attribute_manager, node, strict); EXPECT_FALSE(result.est_empty); ASSERT_EQUAL(5u, result.hits.size()); diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp index fe5014b6607..328cdcf663f 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp @@ -89,9 +89,9 @@ struct WS { } Node::UP createNode() const { - SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0)); + SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0)); for (size_t i = 0; i < tokens.size(); ++i) { - node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + node->addTerm(tokens[i].first, Weight(tokens[i].second)); } return Node::UP(node); } @@ -138,42 +138,30 @@ struct WS { } // namespace <unnamed> -class Test : public vespalib::TestApp -{ -public: - int Main() override; -}; - -int -Test::Main() -{ - TEST_INIT("attribute_weighted_set_test"); - { - MockAttributeManager manager; - setupAttributeManager(manager); - AttributeBlueprintFactory adapter; - - FakeResult expect = FakeResult() - .doc(3).elem(0).weight(30).pos(0) - .doc(5).elem(0).weight(50).pos(0) - .doc(7).elem(0).weight(70).pos(0); - WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30); - - EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true)); - EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true)); - EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false)); - - EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); - EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); - EXPECT_EQUAL(expect, ws.search(adapter, "string", true)); - EXPECT_EQUAL(expect, ws.search(adapter, "string", false)); - EXPECT_EQUAL(expect, ws.search(adapter, "multi", true)); - EXPECT_EQUAL(expect, ws.search(adapter, "multi", false)); - } - TEST_DONE(); +TEST("attribute_weighted_set_test") { + MockAttributeManager manager; + setupAttributeManager(manager); + AttributeBlueprintFactory adapter; + + FakeResult expect = FakeResult() + .doc(3).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30); + + EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true)); + EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true)); + EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false)); + + EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", false)); } -TEST_APPHOOK(Test); +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/customtypevisitor_test.cpp b/searchlib/src/tests/query/customtypevisitor_test.cpp index 3f7d57b7aa4..d0812a00ebf 100644 --- a/searchlib/src/tests/query/customtypevisitor_test.cpp +++ b/searchlib/src/tests/query/customtypevisitor_test.cpp @@ -15,18 +15,6 @@ using namespace search::query; namespace { -class Test : public vespalib::TestApp { - const char *current_state; - virtual void DumpState(bool) { - fprintf(stderr, "%s: ERROR: in %s\n", GetName(), current_state); - } - - template <class T> void requireThatNodeIsVisited(); - -public: - int Main() override; -}; - template <class Base> struct InitTerm : Base { InitTerm() : Base(typename Base::Type(), "view", 0, Weight(0)) {} @@ -49,9 +37,9 @@ struct MyStringTerm : InitTerm<StringTerm> {}; struct MySubstrTerm : InitTerm<SubstringTerm> {}; struct MySuffixTerm : InitTerm<SuffixTerm> {}; struct MyWeakAnd : WeakAnd { MyWeakAnd() : WeakAnd(1234, "view") {} }; -struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm("view", 0, Weight(42)) {} }; -struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct("view", 0, Weight(42)) {} }; -struct MyWandTerm : WandTerm { MyWandTerm() : WandTerm("view", 0, Weight(42), 57, 67, 77.7) {} }; +struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm(0, "view", 0, Weight(42)) {} }; +struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct(0, "view", 0, Weight(42)) {} }; +struct MyWandTerm : WandTerm { MyWandTerm() : WandTerm(0, "view", 0, Weight(42), 57, 67, 77.7) {} }; struct MyPredicateQuery : InitTerm<PredicateQuery> {}; struct MyRegExpTerm : InitTerm<RegExpTerm> {}; struct MyNearestNeighborTerm : NearestNeighborTerm {}; @@ -119,7 +107,7 @@ public: }; template <class T> -void Test::requireThatNodeIsVisited() { +void requireThatNodeIsVisited() { MyCustomVisitor visitor; Node::UP query(new T); visitor.isVisited<T>() = false; @@ -127,37 +115,28 @@ void Test::requireThatNodeIsVisited() { ASSERT_TRUE(visitor.isVisited<T>()); } -#define TEST_CALL(func) \ - current_state = #func; \ - func(); - -int -Test::Main() -{ - TEST_INIT("customtypevisitor_test"); - - TEST_CALL(requireThatNodeIsVisited<MyAnd>); - TEST_CALL(requireThatNodeIsVisited<MyAndNot>); - TEST_CALL(requireThatNodeIsVisited<MyNear>); - TEST_CALL(requireThatNodeIsVisited<MyONear>); - TEST_CALL(requireThatNodeIsVisited<MyOr>); - TEST_CALL(requireThatNodeIsVisited<MyPhrase>); - TEST_CALL(requireThatNodeIsVisited<MySameElement>); - TEST_CALL(requireThatNodeIsVisited<MyRangeTerm>); - TEST_CALL(requireThatNodeIsVisited<MyRank>); - TEST_CALL(requireThatNodeIsVisited<MyNumberTerm>); - TEST_CALL(requireThatNodeIsVisited<MyPrefixTerm>); - TEST_CALL(requireThatNodeIsVisited<MyStringTerm>); - TEST_CALL(requireThatNodeIsVisited<MySubstrTerm>); - TEST_CALL(requireThatNodeIsVisited<MySuffixTerm>); - TEST_CALL(requireThatNodeIsVisited<MyWeightedSetTerm>); - TEST_CALL(requireThatNodeIsVisited<MyDotProduct>); - TEST_CALL(requireThatNodeIsVisited<MyWandTerm>); - TEST_CALL(requireThatNodeIsVisited<MyPredicateQuery>); - TEST_CALL(requireThatNodeIsVisited<MyRegExpTerm>); - - TEST_DONE(); +TEST("customtypevisitor_test") { + + requireThatNodeIsVisited<MyAnd>(); + requireThatNodeIsVisited<MyAndNot>(); + requireThatNodeIsVisited<MyNear>(); + requireThatNodeIsVisited<MyONear>(); + requireThatNodeIsVisited<MyOr>(); + requireThatNodeIsVisited<MyPhrase>(); + requireThatNodeIsVisited<MySameElement>(); + requireThatNodeIsVisited<MyRangeTerm>(); + requireThatNodeIsVisited<MyRank>(); + requireThatNodeIsVisited<MyNumberTerm>(); + requireThatNodeIsVisited<MyPrefixTerm>(); + requireThatNodeIsVisited<MyStringTerm>(); + requireThatNodeIsVisited<MySubstrTerm>(); + requireThatNodeIsVisited<MySuffixTerm>(); + requireThatNodeIsVisited<MyWeightedSetTerm>(); + requireThatNodeIsVisited<MyDotProduct>(); + requireThatNodeIsVisited<MyWandTerm>(); + requireThatNodeIsVisited<MyPredicateQuery>(); + requireThatNodeIsVisited<MyRegExpTerm>(); } } // namespace -TEST_APPHOOK(Test); +TEST_MAIN() { TEST_RUN_ALL(); }
\ No newline at end of file diff --git a/searchlib/src/tests/query/query_visitor_test.cpp b/searchlib/src/tests/query/query_visitor_test.cpp index 946ad17352d..ef255ad6878 100644 --- a/searchlib/src/tests/query/query_visitor_test.cpp +++ b/searchlib/src/tests/query/query_visitor_test.cpp @@ -15,25 +15,6 @@ using namespace search::query; namespace { -class Test : public vespalib::TestApp { - void requireThatAllNodesCanBeVisited(); - - template <class T> void checkVisit(T *node); - -public: - int Main() override; -}; - -int -Test::Main() -{ - TEST_INIT("query_visitor_test"); - - TEST_DO(requireThatAllNodesCanBeVisited()); - - TEST_DONE(); -} - class MyVisitor : public QueryVisitor { public: @@ -69,7 +50,7 @@ public: }; template <class T> -void Test::checkVisit(T *node) { +void checkVisit(T *node) { Node::UP query(node); MyVisitor visitor; visitor.isVisited<T>() = false; @@ -77,7 +58,7 @@ void Test::checkVisit(T *node) { ASSERT_TRUE(visitor.isVisited<T>()); } -void Test::requireThatAllNodesCanBeVisited() { +TEST("requireThatAllNodesCanBeVisited") { checkVisit<And>(new SimpleAnd); checkVisit<AndNot>(new SimpleAndNot); checkVisit<Near>(new SimpleNear(0)); @@ -85,9 +66,9 @@ void Test::requireThatAllNodesCanBeVisited() { checkVisit<Or>(new SimpleOr); checkVisit<Phrase>(new SimplePhrase("field", 0, Weight(42))); checkVisit<SameElement>(new SimpleSameElement("field")); - checkVisit<WeightedSetTerm>(new SimpleWeightedSetTerm("field", 0, Weight(42))); - checkVisit<DotProduct>(new SimpleDotProduct("field", 0, Weight(42))); - checkVisit<WandTerm>(new SimpleWandTerm("field", 0, Weight(42), 57, 67, 77.7)); + checkVisit<WeightedSetTerm>(new SimpleWeightedSetTerm(0, "field", 0, Weight(42))); + checkVisit<DotProduct>(new SimpleDotProduct(0, "field", 0, Weight(42))); + checkVisit<WandTerm>(new SimpleWandTerm(0, "field", 0, Weight(42), 57, 67, 77.7)); checkVisit<Rank>(new SimpleRank); checkVisit<NumberTerm>(new SimpleNumberTerm("0.42", "field", 0, Weight(0))); const Location location(Point{10, 10}, 20, 0); @@ -104,4 +85,4 @@ void Test::requireThatAllNodesCanBeVisited() { } // namespace -TEST_APPHOOK(Test); +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp index 055d245e420..47dccc76603 100644 --- a/searchlib/src/tests/query/querybuilder_test.cpp +++ b/searchlib/src/tests/query/querybuilder_test.cpp @@ -90,18 +90,17 @@ Node::UP createQueryTree() { builder.addStringTerm(str[4], view[4], id[4], weight[4]); builder.addStringTerm(str[5], view[5], id[5], weight[5]); } - builder.addPredicateQuery(getPredicateQueryTerm(), - view[3], id[3], weight[3]); - builder.addDotProduct(3, view[2], id[2], weight[2]); + builder.addPredicateQuery(getPredicateQueryTerm(), view[3], id[3], weight[3]); { - builder.addStringTerm(str[3], view[3], id[3], weight[3]); - builder.addStringTerm(str[4], view[4], id[4], weight[4]); - builder.addStringTerm(str[5], view[5], id[5], weight[5]); + auto & n = builder.addDotProduct(3, view[2], id[2], weight[2]); + n.addTerm(str[3], weight[3]); + n.addTerm(str[4], weight[4]); + n.addTerm(str[5], weight[5]); } - builder.addWandTerm(2, view[0], id[0], weight[0], 57, 67, 77.7); { - builder.addStringTerm(str[1], view[1], id[1], weight[1]); - builder.addStringTerm(str[2], view[2], id[2], weight[2]); + auto & n = builder.addWandTerm(2, view[0], id[0], weight[0], 57, 67, 77.7); + n.addTerm(str[1], weight[1]); + n.addTerm(str[2], weight[2]); } builder.addRegExpTerm(str[5], view[5], id[5], weight[5]); builder.addSameElement(3, view[4]); @@ -246,23 +245,33 @@ void checkQueryTreeTypes(Node *node) { EXPECT_TRUE(checkTerm(predicateQuery, getPredicateQueryTerm(), view[3], id[3], weight[3])); auto* dotProduct = as_node<DotProduct>(and_node->getChildren()[6]); - EXPECT_EQUAL(3u, dotProduct->getChildren().size()); - string_term = as_node<StringTerm>(dotProduct->getChildren()[0]); - EXPECT_TRUE(checkTerm(string_term, str[3], view[3], id[3], weight[3])); - string_term = as_node<StringTerm>(dotProduct->getChildren()[1]); - EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4])); - string_term = as_node<StringTerm>(dotProduct->getChildren()[2]); - EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[5])); + EXPECT_EQUAL(3u, dotProduct->getNumTerms()); + + { + const auto &w1 = dotProduct->getAsString(0); + EXPECT_EQUAL(w1.first, str[3]); + EXPECT_TRUE(w1.second == weight[3]); + const auto &w2 = dotProduct->getAsString(1); + EXPECT_EQUAL(w2.first, str[4]); + EXPECT_TRUE(w2.second == weight[4]); + const auto &w3 = dotProduct->getAsString(2); + EXPECT_EQUAL(w3.first, str[5]); + EXPECT_TRUE(w3.second == weight[5]); + } auto* wandTerm = as_node<WandTerm>(and_node->getChildren()[7]); EXPECT_EQUAL(57u, wandTerm->getTargetNumHits()); EXPECT_EQUAL(67, wandTerm->getScoreThreshold()); EXPECT_EQUAL(77.7, wandTerm->getThresholdBoostFactor()); - EXPECT_EQUAL(2u, wandTerm->getChildren().size()); - string_term = as_node<StringTerm>(wandTerm->getChildren()[0]); - EXPECT_TRUE(checkTerm(string_term, str[1], view[1], id[1], weight[1])); - string_term = as_node<StringTerm>(wandTerm->getChildren()[1]); - EXPECT_TRUE(checkTerm(string_term, str[2], view[2], id[2], weight[2])); + EXPECT_EQUAL(2u, wandTerm->getNumTerms()); + { + const auto &w1 = wandTerm->getAsString(0); + EXPECT_EQUAL(w1.first, str[1]); + EXPECT_TRUE(w1.second == weight[1]); + const auto &w2 = wandTerm->getAsString(1); + EXPECT_EQUAL(w2.first, str[2]); + EXPECT_TRUE(w2.second == weight[2]); + } auto* regexp_term = as_node<RegExpTerm>(and_node->getChildren()[8]); EXPECT_TRUE(checkTerm(regexp_term, str[5], view[5], id[5], weight[5])); @@ -336,15 +345,15 @@ struct MyPhrase : Phrase { MyPhrase(const string &f, int32_t i, Weight w) : Phra struct MySameElement : SameElement { MySameElement(const string &f) : SameElement(f) {}}; struct MyWeightedSetTerm : WeightedSetTerm { - MyWeightedSetTerm(const string &f, int32_t i, Weight w) : WeightedSetTerm(f, i, w) {} + MyWeightedSetTerm(uint32_t n, const string &f, int32_t i, Weight w) : WeightedSetTerm(n, f, i, w) {} }; struct MyDotProduct : DotProduct { - MyDotProduct(const string &f, int32_t i, Weight w) : DotProduct(f, i, w) {} + MyDotProduct(uint32_t n, const string &f, int32_t i, Weight w) : DotProduct(n, f, i, w) {} }; struct MyWandTerm : WandTerm { - MyWandTerm(const string &f, int32_t i, Weight w, uint32_t targetNumHits, + MyWandTerm(uint32_t n, const string &f, int32_t i, Weight w, uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) - : WandTerm(f, i, w, targetNumHits, scoreThreshold, thresholdBoostFactor) {} + : WandTerm(n, f, i, w, targetNumHits, scoreThreshold, thresholdBoostFactor) {} }; struct MyRank : Rank {}; struct MyNumberTerm : NumberTerm { @@ -593,7 +602,7 @@ TEST("require that empty intermediate node can be added") { } TEST("control size of SimpleQueryStackDumpIterator") { - EXPECT_EQUAL(144u, sizeof(SimpleQueryStackDumpIterator)); + EXPECT_EQUAL(128u, sizeof(SimpleQueryStackDumpIterator)); } TEST("test query parsing error") { @@ -633,6 +642,77 @@ TEST("test query parsing error") { EXPECT_FALSE(new_node); } +class SimpleMultiTerm : public MultiTerm { +public: + SimpleMultiTerm(size_t numTerms) : MultiTerm(numTerms) {} + void accept(QueryVisitor & ) override { } +}; + +TEST("initial state of MultiTerm") { + SimpleMultiTerm mt(7); + EXPECT_EQUAL(7u, mt.getNumTerms()); + EXPECT_TRUE(MultiTerm::Type::UNKNOWN == mt.getType()); +} + +void +verify_multiterm_get(const MultiTerm & mt) { + EXPECT_EQUAL(7u, mt.getNumTerms()); + for (int64_t i(0); i < mt.getNumTerms(); i++) { + auto v = mt.getAsInteger(i); + EXPECT_EQUAL(v.first, i-3); + EXPECT_EQUAL(v.second.percent(), i-4); + } + for (int64_t i(0); i < mt.getNumTerms(); i++) { + auto v = mt.getAsString(i); + char buf[24]; + auto res = std::to_chars(buf, buf + sizeof(buf), i-3); + EXPECT_EQUAL(v.first, vespalib::stringref(buf, res.ptr - buf)); + EXPECT_EQUAL(v.second.percent(), i-4); + } +} + +TEST("add and get of integer MultiTerm") { + SimpleMultiTerm mt(7); + for (int64_t i(0); i < mt.getNumTerms(); i++) { + mt.addTerm(i-3, Weight(i-4)); + } + EXPECT_TRUE(MultiTerm::Type::INTEGER == mt.getType()); + verify_multiterm_get(mt); +} + +TEST("add and get of string MultiTerm") { + SimpleMultiTerm mt(7); + for (int64_t i(0); i < mt.getNumTerms(); i++) { + char buf[24]; + auto res = std::to_chars(buf, buf + sizeof(buf), i-3); + mt.addTerm(vespalib::stringref(buf, res.ptr - buf), Weight(i-4)); + } + EXPECT_TRUE(MultiTerm::Type::STRING == mt.getType()); + verify_multiterm_get(mt); +} + +TEST("first string then integer MultiTerm") { + SimpleMultiTerm mt(7); + mt.addTerm("-3", Weight(-4)); + for (int64_t i(1); i < mt.getNumTerms(); i++) { + mt.addTerm(i-3, Weight(i-4)); + } + EXPECT_TRUE(MultiTerm::Type::STRING == mt.getType()); + verify_multiterm_get(mt); +} + +TEST("first integer then string MultiTerm") { + SimpleMultiTerm mt(7); + mt.addTerm(-3, Weight(-4)); + for (int64_t i(1); i < mt.getNumTerms(); i++) { + char buf[24]; + auto res = std::to_chars(buf, buf + sizeof(buf), i-3); + mt.addTerm(vespalib::stringref(buf, res.ptr - buf), Weight(i-4)); + } + EXPECT_TRUE(MultiTerm::Type::INTEGER == mt.getType()); + verify_multiterm_get(mt); +} + } // namespace TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp index 7414e8b10f2..a05dcc4c6ea 100644 --- a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp +++ b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp @@ -45,9 +45,9 @@ struct DP { } Node::UP createNode() const { - SimpleDotProduct *node = new SimpleDotProduct("view", 0, Weight(0)); + SimpleDotProduct *node = new SimpleDotProduct(tokens.size(), "view", 0, Weight(0)); for (size_t i = 0; i < tokens.size(); ++i) { - node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + node->addTerm(tokens[i].first, Weight(tokens[i].second)); } return Node::UP(node); } diff --git a/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp b/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp index cbad6de25bb..167fc706f4d 100644 --- a/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp +++ b/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp @@ -156,9 +156,9 @@ TEST_F(FakeSearchableTest, require_that_weigheted_set_search_works) { source.addResult("fieldfoo", "friend3", FakeResult().doc(5)); - SimpleWeightedSetTerm weightedSet("fieldfoo", 1, w); - weightedSet.append(Node::UP(new SimpleStringTerm("friend1", "fieldfoo", 2, Weight(1)))); - weightedSet.append(Node::UP(new SimpleStringTerm("friend2", "fieldfoo", 3, Weight(2)))); + SimpleWeightedSetTerm weightedSet(2, "fieldfoo", 1, w); + weightedSet.addTerm("friend1", Weight(1)); + weightedSet.addTerm("friend2", Weight(2)); FieldSpecList fields; fields.add(FieldSpec("fieldfoo", 1, 1)); diff --git a/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp b/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp index 359a7f9c074..24253469dfc 100644 --- a/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp +++ b/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp @@ -34,9 +34,9 @@ TEST("test variations of getWeight") EXPECT_EQUAL(42, getWeight(SimpleStringTerm("foo", "bar", 1, Weight(42)))); EXPECT_EQUAL(42, getWeight(SimpleSubstringTerm("foo", "bar", 1, Weight(42)))); EXPECT_EQUAL(42, getWeight(SimpleSuffixTerm("foo", "bar", 1, Weight(42)))); - EXPECT_EQUAL(42, getWeight(SimpleWeightedSetTerm("bar", 1, Weight(42)))); - EXPECT_EQUAL(42, getWeight(SimpleDotProduct("bar", 1, Weight(42)))); - EXPECT_EQUAL(42, getWeight(SimpleWandTerm("bar", 1, Weight(42), 57, 67, 77.7))); + EXPECT_EQUAL(42, getWeight(SimpleWeightedSetTerm(0, "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleDotProduct(0, "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleWandTerm(0, "bar", 1, Weight(42), 57, 67, 77.7))); } TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp index f2c02d02080..b820a96fab6 100644 --- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -158,11 +158,10 @@ struct WandBlueprintSpec Node::UP createNode(uint32_t scoresToTrack = 100, score_t scoreThreshold = 0, double thresholdBoostFactor = 1) const { - SimpleWandTerm *node = new SimpleWandTerm("view", 0, Weight(0), + SimpleWandTerm *node = new SimpleWandTerm(tokens.size(), "view", 0, Weight(0), scoresToTrack, scoreThreshold, thresholdBoostFactor); for (size_t i = 0; i < tokens.size(); ++i) { - node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, - Weight(tokens[i].second)))); + node->addTerm(tokens[i].first, Weight(tokens[i].second)); } return Node::UP(node); } diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp index 8514a221230..95553f68cbc 100644 --- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -53,9 +53,9 @@ struct WS { } Node::UP createNode() const { - SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0)); + SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0)); for (size_t i = 0; i < tokens.size(); ++i) { - node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + node->addTerm(tokens[i].first,Weight(tokens[i].second)); } return Node::UP(node); } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 0c4c5271763..1edcd67f5cb 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -37,6 +37,7 @@ #include <vespa/vespalib/util/regexp.h> #include <vespa/vespalib/util/stringfmt.h> #include <sstream> +#include <charconv> #include <vespa/log/log.h> LOG_SETUP(".searchlib.attribute.attribute_blueprint_factory"); @@ -58,6 +59,7 @@ using search::query::StackDumpCreator; using search::query::StringTerm; using search::query::SubstringTerm; using search::query::SuffixTerm; +using search::query::MultiTerm; using search::queryeval::AndBlueprint; using search::queryeval::AndSearchStrict; using search::queryeval::Blueprint; @@ -81,10 +83,26 @@ using search::tensor::DenseTensorAttribute; using vespalib::geo::ZCurve; using vespalib::make_string; using vespalib::string; +using vespalib::stringref; namespace search { namespace { +class NodeAsKey final : public IDocumentWeightAttribute::LookupKey { +public: + NodeAsKey(const Node & node, vespalib::string & scratchPad) + : _node(node), + _scratchPad(scratchPad) + { } + + stringref asString() const override { + return queryeval::termAsString(_node, _scratchPad); + } + +private: + const Node & _node; + vespalib::string & _scratchPad; +}; //----------------------------------------------------------------------------- /** @@ -312,6 +330,24 @@ make_location_blueprint(const FieldSpec &field, const IAttributeVector &attribut return root; } +class LookupKey : public IDocumentWeightAttribute::LookupKey { +public: + LookupKey(MultiTerm & terms, uint32_t index) : _terms(terms), _index(index) {} + + stringref asString() const override { + return _terms.getAsString(_index).first; + } + + bool asInteger(int64_t &value) const override { + value = _terms.getAsInteger(_index).first; + return true; + } + +private: + const MultiTerm & _terms; + uint32_t _index; +}; + //----------------------------------------------------------------------------- template <typename SearchType> @@ -342,8 +378,8 @@ public: _terms.reserve(size_hint); } - void addTerm(const vespalib::string &term, int32_t weight) { - IDocumentWeightAttribute::LookupResult result = _attr.lookup(term, _dictionary_snapshot); + void addTerm(const IDocumentWeightAttribute::LookupKey & key, int32_t weight) { + IDocumentWeightAttribute::LookupResult result = _attr.lookup(key, _dictionary_snapshot); HitEstimate childEst(result.posting_size, (result.posting_size == 0)); if (!childEst.empty) { if (_estimate.empty) { @@ -422,14 +458,13 @@ public: _terms(), _attr(attr), _dictionary_snapshot(_attr.get_dictionary_snapshot()) - { _weights.reserve(size_hint); _terms.reserve(size_hint); } - void addTerm(const vespalib::string &term, int32_t weight) { - IDocumentWeightAttribute::LookupResult result = _attr.lookup(term, _dictionary_snapshot); + void addTerm(const IDocumentWeightAttribute::LookupKey & key, int32_t weight) { + IDocumentWeightAttribute::LookupResult result = _attr.lookup(key, _dictionary_snapshot); HitEstimate childEst(result.posting_size, (result.posting_size == 0)); if (!childEst.empty) { if (_estimate.empty) { @@ -487,13 +522,14 @@ private: public: DirectAttributeBlueprint(const FieldSpec &field, const vespalib::string & name, const IAttributeVector &iattr, - const IDocumentWeightAttribute &attr, const vespalib::string &term) + const IDocumentWeightAttribute &attr, + const IDocumentWeightAttribute::LookupKey & key) : SimpleLeafBlueprint(field), _attrName(name), _iattr(iattr), _attr(attr), _dictionary_snapshot(_attr.get_dictionary_snapshot()), - _dict_entry(_attr.lookup(term, _dictionary_snapshot)) + _dict_entry(_attr.lookup(key, _dictionary_snapshot)) { setEstimate(HitEstimate(_dict_entry.posting_size, (_dict_entry.posting_size == 0))); } @@ -547,6 +583,7 @@ private: const FieldSpec &_field; const IAttributeVector &_attr; const IDocumentWeightAttribute *_dwa; + vespalib::string _scratchPad; public: CreateBlueprintVisitor(Searchable &searchable, const IRequestContext &requestContext, @@ -554,15 +591,17 @@ public: : CreateBlueprintVisitorHelper(searchable, field, requestContext), _field(field), _attr(attr), - _dwa(attr.asDocumentWeightAttribute()) + _dwa(attr.asDocumentWeightAttribute()), + _scratchPad() { } + ~CreateBlueprintVisitor() override; template <class TermNode> void visitTerm(TermNode &n, bool simple = false) { if (simple && (_dwa != nullptr) && !_field.isFilter() && n.isRanked()) { - vespalib::string term = queryeval::termAsString(n); - setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr.getName(), _attr, *_dwa, term)); + NodeAsKey key(n, _scratchPad); + setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr.getName(), _attr, *_dwa, key)); } else { const string stack = StackDumpCreator::create(n); setResult(std::make_unique<AttributeFieldBlueprint>(_field, _attr, stack)); @@ -621,14 +660,13 @@ public: void visit(RegExpTerm & n) override { visitTerm(n); } template <typename WS> - void createDirectWeightedSet(WS *bp, search::query::Intermediate &n); + void createDirectWeightedSet(WS *bp, MultiTerm &n); template <typename WS> - void createShallowWeightedSet(WS *bp, search::query::Intermediate &n, const FieldSpec &fs, bool isInteger); + void createShallowWeightedSet(WS *bp, MultiTerm &n, const FieldSpec &fs, bool isInteger); static QueryTermSimple::UP - extractTerm(const query::Node &node, bool isInteger) { - vespalib::string term = queryeval::termAsString(node); + extractTerm(vespalib::stringref term, bool isInteger) { if (isInteger) { return std::make_unique<QueryTermSimple>(term, QueryTermSimple::Type::WORD); } @@ -641,15 +679,14 @@ public: bool isInteger = _attr.isIntegerType(); if (isSingleValue && (isString || isInteger)) { auto ws = std::make_unique<AttributeWeightedSetBlueprint>(_field, _attr); - for (size_t i = 0; i < n.getChildren().size(); ++i) { - const query::Node &node = *n.getChildren()[i]; - uint32_t weight = queryeval::getWeightFromNode(node).percent(); - ws->addToken(_attr.createSearchContext(extractTerm(node, isInteger), attribute::SearchContextParams()), weight); + for (size_t i = 0; i < n.getNumTerms(); ++i) { + auto term = n.getAsString(i); + ws->addToken(_attr.createSearchContext(extractTerm(term.first, isInteger), attribute::SearchContextParams()), term.second.percent()); } setResult(std::move(ws)); } else { if (_dwa != nullptr) { - auto *bp = new DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>(_field, _attr, *_dwa, n.getChildren().size()); + auto *bp = new DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>(_field, _attr, *_dwa, n.getNumTerms()); createDirectWeightedSet(bp, n); } else { auto *bp = new WeightedSetTermBlueprint(_field); @@ -660,7 +697,7 @@ public: void visit(query::DotProduct &n) override { if (_dwa != nullptr) { - auto *bp = new DirectWeightedSetBlueprint<queryeval::DotProductSearch>(_field, _attr, *_dwa, n.getChildren().size()); + auto *bp = new DirectWeightedSetBlueprint<queryeval::DotProductSearch>(_field, _attr, *_dwa, n.getNumTerms()); createDirectWeightedSet(bp, n); } else { auto *bp = new DotProductBlueprint(_field); @@ -672,7 +709,7 @@ public: if (_dwa != nullptr) { auto *bp = new DirectWandBlueprint(_field, *_dwa, n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(), - n.getChildren().size()); + n.getNumTerms()); createDirectWeightedSet(bp, n); } else { auto *bp = new ParallelWeakAndBlueprint(_field, @@ -725,28 +762,28 @@ public: template <typename WS> void -CreateBlueprintVisitor::createDirectWeightedSet(WS *bp, search::query::Intermediate &n) { +CreateBlueprintVisitor::createDirectWeightedSet(WS *bp, MultiTerm &n) { Blueprint::UP result(bp); - for (const Node * node : n.getChildren()) { - vespalib::string term = queryeval::termAsString(*node); - uint32_t weight = queryeval::getWeightFromNode(*node).percent(); - bp->addTerm(term, weight); + for (uint32_t i(0); i < n.getNumTerms(); i++) { + bp->addTerm(LookupKey(n, i), n.weight(i).percent()); } setResult(std::move(result)); } template <typename WS> void -CreateBlueprintVisitor::createShallowWeightedSet(WS *bp, search::query::Intermediate &n, const FieldSpec &fs, bool isInteger) { +CreateBlueprintVisitor::createShallowWeightedSet(WS *bp, MultiTerm &n, const FieldSpec &fs, bool isInteger) { Blueprint::UP result(bp); - for (const Node * node : n.getChildren()) { - uint32_t weight = queryeval::getWeightFromNode(*node).percent(); + for (uint32_t i(0); i < n.getNumTerms(); i++) { FieldSpec childfs = bp->getNextChildField(fs); - bp->addTerm(std::make_unique<AttributeFieldBlueprint>(childfs, _attr, extractTerm(*node, isInteger)), weight); + auto term = n.getAsString(i); + bp->addTerm(std::make_unique<AttributeFieldBlueprint>(childfs, _attr, extractTerm(term.first, isInteger)), term.second.percent()); } setResult(std::move(result)); } +CreateBlueprintVisitor::~CreateBlueprintVisitor() = default; + } // namespace //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp index 12ce1a22209..965ff7b065a 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp @@ -1,3 +1,32 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "i_document_weight_attribute.h" +#include <charconv> + +namespace search { +namespace { +class StringAsKey final : public IDocumentWeightAttribute::LookupKey { +public: + StringAsKey(vespalib::stringref key) + : _key(key) + { } + + vespalib::stringref asString() const override { return _key; } +private: + vespalib::stringref _key; +}; +} + +bool +IDocumentWeightAttribute::LookupKey::asInteger(int64_t &value) const { + vespalib::stringref str = asString(); + const char *end = str.data() + str.size(); + auto res = std::from_chars(str.data(), end, value); + return res.ptr == end; +} + +IDocumentWeightAttribute::LookupResult +IDocumentWeightAttribute::lookup(vespalib::stringref term, vespalib::datastore::EntryRef dictionary_snapshot) const { + return lookup(StringAsKey(term), dictionary_snapshot); +} +}
\ No newline at end of file diff --git a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h index e76fbcb0378..a3b045fdd3f 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h @@ -3,17 +3,20 @@ #pragma once #include "postinglisttraits.h" - #include <functional> namespace search { -namespace query { class Node; } - using DocumentWeightIterator = attribute::PostingListTraits<int32_t>::const_iterator; struct IDocumentWeightAttribute { + struct LookupKey { + virtual ~LookupKey() = default; + virtual vespalib::stringref asString() const = 0; + virtual bool asInteger(int64_t &value) const; + }; + struct LookupResult { const vespalib::datastore::EntryRef posting_idx; const uint32_t posting_size; @@ -25,7 +28,8 @@ struct IDocumentWeightAttribute : posting_idx(posting_idx_in), posting_size(posting_size_in), min_weight(min_weight_in), max_weight(max_weight_in), enum_idx(enum_idx_in) {} }; virtual vespalib::datastore::EntryRef get_dictionary_snapshot() const = 0; - virtual LookupResult lookup(const vespalib::string &term, vespalib::datastore::EntryRef dictionary_snapshot) const = 0; + virtual LookupResult lookup(const LookupKey & key, vespalib::datastore::EntryRef dictionary_snapshot) const = 0; + LookupResult lookup(vespalib::stringref term, vespalib::datastore::EntryRef dictionary_snapshot) const; /* * Collect enum indexes (via callback) where folded * (e.g. lowercased) value equals the folded value for enum_idx. diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h index c09366cdaea..d8d7e7f902c 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h @@ -36,7 +36,7 @@ private: const MultiValueNumericPostingAttribute &self; DocumentWeightAttributeAdapter(const MultiValueNumericPostingAttribute &self_in) : self(self_in) {} vespalib::datastore::EntryRef get_dictionary_snapshot() const override; - LookupResult lookup(const vespalib::string &term, vespalib::datastore::EntryRef dictionary_snapshot) const override; + LookupResult lookup(const LookupKey & key, vespalib::datastore::EntryRef dictionary_snapshot) const override; void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const override; void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const override; DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp index f91dac630d3..5f0b06a1294 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp @@ -3,6 +3,7 @@ #pragma once #include "multinumericpostattribute.h" +#include <charconv> namespace search { @@ -92,12 +93,11 @@ MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::get_dic template <typename B, typename M> IDocumentWeightAttribute::LookupResult -MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::lookup(const vespalib::string &term, vespalib::datastore::EntryRef dictionary_snapshot) const +MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::lookup(const LookupKey & key, vespalib::datastore::EntryRef dictionary_snapshot) const { const IEnumStoreDictionary& dictionary = self._enumStore.get_dictionary(); - char *end = nullptr; - int64_t int_term = strtoll(term.c_str(), &end, 10); - if (*end == '\0') { + int64_t int_term; + if (key.asInteger(int_term)) { auto comp = self._enumStore.make_comparator(int_term); auto find_result = dictionary.find_posting_list(comp, dictionary_snapshot); if (find_result.first.valid()) { diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h index c324ecdf125..f80d85dadee 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h @@ -34,7 +34,7 @@ private: const MultiValueStringPostingAttributeT &self; DocumentWeightAttributeAdapter(const MultiValueStringPostingAttributeT &self_in) : self(self_in) {} vespalib::datastore::EntryRef get_dictionary_snapshot() const override; - LookupResult lookup(const vespalib::string &term, vespalib::datastore::EntryRef dictionary_snapshot) const override; + LookupResult lookup(const LookupKey & key, vespalib::datastore::EntryRef dictionary_snapshot) const override; void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const override; void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const override; DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index 7dea95b2e55..817f7630a3a 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -108,10 +108,10 @@ MultiValueStringPostingAttributeT<B, T>::DocumentWeightAttributeAdapter::get_dic template <typename B, typename T> IDocumentWeightAttribute::LookupResult -MultiValueStringPostingAttributeT<B, T>::DocumentWeightAttributeAdapter::lookup(const vespalib::string &term, vespalib::datastore::EntryRef dictionary_snapshot) const +MultiValueStringPostingAttributeT<B, T>::DocumentWeightAttributeAdapter::lookup(const LookupKey & key, vespalib::datastore::EntryRef dictionary_snapshot) const { const IEnumStoreDictionary& dictionary = self._enumStore.get_dictionary(); - auto comp = self._enumStore.make_folded_comparator(term.c_str()); + auto comp = self._enumStore.make_folded_comparator(key.asString().data()); auto find_result = dictionary.find_posting_list(comp, dictionary_snapshot); if (find_result.first.valid()) { auto pidx = find_result.second; diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp index d8be4a45af4..c37dba762ef 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp @@ -22,7 +22,7 @@ SimpleQueryStackDumpIterator::SimpleQueryStackDumpIterator(vespalib::stringref b _currArity(0), _curr_index_name(), _curr_term(), - _scratch(), + _curr_integer_term(0), _extraIntArg1(0), _extraIntArg2(0), _extraIntArg3(0), @@ -152,9 +152,7 @@ bool SimpleQueryStackDumpIterator::readNext() { case ParseItem::ITEM_PURE_WEIGHTED_LONG: { if (p + sizeof(int64_t) > _bufEnd) return false; - int64_t value = vespalib::nbo::n2h(*reinterpret_cast<const int64_t *>(p)); - auto res = std::to_chars(_scratch, _scratch + sizeof(_scratch), value, 10); - _curr_term = vespalib::stringref(_scratch, res.ptr - _scratch); + _curr_integer_term = vespalib::nbo::n2h(*reinterpret_cast<const int64_t *>(p)); p += sizeof(int64_t); _currArity = 0; } diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h index 9d076e81e37..8a9a28ebacb 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h @@ -36,7 +36,7 @@ private: vespalib::stringref _curr_index_name; /** The term in the current item */ vespalib::stringref _curr_term; - char _scratch[24]; + int64_t _curr_integer_term; /* extra arguments */ uint32_t _extraIntArg1; @@ -119,6 +119,7 @@ public: vespalib::stringref getIndexName() const { return _curr_index_name; } vespalib::stringref getTerm() const { return _curr_term; } + int64_t getIntergerTerm() const { return _curr_integer_term; } }; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index ec1b26ec143..cabc9b6dae4 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -2,7 +2,7 @@ #include "query.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> - +#include <charconv> #include <vespa/log/log.h> LOG_SETUP(".vsm.querynode"); @@ -90,7 +90,6 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor if (dynamic_cast<const SameElementQueryNode *>(parent) != nullptr) { index = parent->getIndex() + "." + index; } - vespalib::stringref term = queryRep.getTerm(); using TermType = QueryTerm::Type; TermType sTerm(TermType::WORD); switch (type) { @@ -112,12 +111,19 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor default: break; } - QueryTerm::string ssTerm(term); + QueryTerm::string ssTerm; + if (type == ParseItem::ITEM_PURE_WEIGHTED_LONG) { + char buf[24]; + auto res = std::to_chars(buf, buf + sizeof(buf), queryRep.getIntergerTerm(), 10); + ssTerm.assign(buf, res.ptr - buf); + } else { + ssTerm = queryRep.getTerm(); + } QueryTerm::string ssIndex(index); if (ssIndex == "sddocname") { // This is suboptimal as the term should be checked too. // But it will do for now as only correct sddocname queries are sent down. - qn.reset(new TrueNode()); + qn = std::make_unique<TrueNode>(); } else { auto qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm); qt->setWeight(queryRep.GetWeight()); @@ -131,7 +137,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor auto orqn = std::make_unique<EquivQueryNode>(); orqn->push_back(std::move(qt)); orqn->push_back(std::move(phrase)); - qn.reset(orqn.release()); + qn = std::move(orqn); } } } diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp index 4a4b606ef8f..ceeacb759b2 100644 --- a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp @@ -13,8 +13,5 @@ Near::~Near() = default; ONear::~ONear() = default; Phrase::~Phrase() = default; SameElement::~SameElement() = default; -WeightedSetTerm::~WeightedSetTerm() = default; -DotProduct::~DotProduct() = default; -WandTerm::~WandTerm() = default; } diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h index 0ff0b212dfd..06475c0cc63 100644 --- a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h +++ b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h @@ -122,36 +122,4 @@ private: bool _expensive; }; -class WeightedSetTerm : public QueryNodeMixin<WeightedSetTerm, Intermediate>, public Term { -public: - WeightedSetTerm(const vespalib::string &view, int32_t id, Weight weight) - : Term(view, id, weight) {} - virtual ~WeightedSetTerm() = 0; -}; - -class DotProduct : public QueryNodeMixin<DotProduct, Intermediate>, public Term { -public: - DotProduct(const vespalib::string &view, int32_t id, Weight weight) - : Term(view, id, weight) {} - virtual ~DotProduct() = 0; -}; - -class WandTerm : public QueryNodeMixin<WandTerm, Intermediate>, public Term { -private: - uint32_t _targetNumHits; - int64_t _scoreThreshold; - double _thresholdBoostFactor; -public: - WandTerm(const vespalib::string &view, int32_t id, Weight weight, - uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) - : Term(view, id, weight), - _targetNumHits(targetNumHits), - _scoreThreshold(scoreThreshold), - _thresholdBoostFactor(thresholdBoostFactor) {} - virtual ~WandTerm() = 0; - uint32_t getTargetNumHits() const { return _targetNumHits; } - int64_t getScoreThreshold() const { return _scoreThreshold; } - double getThresholdBoostFactor() const { return _thresholdBoostFactor; } -}; - } diff --git a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h index 8392730cd29..70cf2382523 100644 --- a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h +++ b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h @@ -50,9 +50,6 @@ class QueryBuilderBase std::stack<NodeInfo> _nodes; vespalib::string _error_msg; - void reportError(const vespalib::string &msg); - void reportError(const vespalib::string &msg, const Node & incomming, const Node & root); - protected: QueryBuilderBase(); ~QueryBuilderBase(); @@ -91,6 +88,9 @@ public: * build a new query tree with the same builder. */ void reset(); + + void reportError(const vespalib::string &msg); + void reportError(const vespalib::string &msg, const Node & incomming, const Node & root); }; @@ -126,17 +126,17 @@ typename NodeTypes::SameElement *createSameElement(vespalib::stringref view) { return new typename NodeTypes::SameElement(view); } template <class NodeTypes> -typename NodeTypes::WeightedSetTerm *createWeightedSetTerm(vespalib::stringref view, int32_t id, Weight weight) { - return new typename NodeTypes::WeightedSetTerm(view, id, weight); +typename NodeTypes::WeightedSetTerm *createWeightedSetTerm(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight) { + return new typename NodeTypes::WeightedSetTerm(num_terms, view, id, weight); } template <class NodeTypes> -typename NodeTypes::DotProduct *createDotProduct(vespalib::stringref view, int32_t id, Weight weight) { - return new typename NodeTypes::DotProduct(view, id, weight); +typename NodeTypes::DotProduct *createDotProduct(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight) { + return new typename NodeTypes::DotProduct(num_terms, view, id, weight); } template <class NodeTypes> typename NodeTypes::WandTerm * -createWandTerm(vespalib::stringref view, int32_t id, Weight weight, uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) { - return new typename NodeTypes::WandTerm(view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor); +createWandTerm(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight, uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) { + return new typename NodeTypes::WandTerm(num_terms, view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor); } template <class NodeTypes> typename NodeTypes::Rank *createRank() { @@ -262,12 +262,12 @@ public: } typename NodeTypes::WeightedSetTerm &addWeightedSetTerm( int child_count, stringref view, int32_t id, Weight weight) { adjustWeight(weight); - typename NodeTypes::WeightedSetTerm &node = addIntermediate(createWeightedSetTerm<NodeTypes>(view, id, weight), child_count); + typename NodeTypes::WeightedSetTerm &node = addTerm(createWeightedSetTerm<NodeTypes>(child_count, view, id, weight)); return node; } typename NodeTypes::DotProduct &addDotProduct( int child_count, stringref view, int32_t id, Weight weight) { adjustWeight(weight); - typename NodeTypes::DotProduct &node = addIntermediate( createDotProduct<NodeTypes>(view, id, weight), child_count); + typename NodeTypes::DotProduct &node = addTerm( createDotProduct<NodeTypes>(child_count, view, id, weight)); return node; } typename NodeTypes::WandTerm &addWandTerm( @@ -276,9 +276,8 @@ public: int64_t scoreThreshold, double thresholdBoostFactor) { adjustWeight(weight); - typename NodeTypes::WandTerm &node = addIntermediate( - createWandTerm<NodeTypes>(view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor), - child_count); + typename NodeTypes::WandTerm &node = addTerm( + createWandTerm<NodeTypes>(child_count, view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor)); return node; } typename NodeTypes::Rank &addRank(int child_count) { diff --git a/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h b/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h index 9e8c97cff94..dd398e11844 100644 --- a/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h +++ b/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h @@ -8,7 +8,7 @@ namespace search::query { template <typename T, typename Base> struct QueryNodeMixin : Base { - typedef QueryNodeMixin<T, Base> QueryNodeMixinType; + using QueryNodeMixinType = QueryNodeMixin<T, Base>; ~QueryNodeMixin() = 0; void accept(QueryVisitor &visitor) override { diff --git a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h index 4b9226f6112..43364dc8575 100644 --- a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h +++ b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h @@ -78,26 +78,42 @@ private: visitNodes(node.getChildren()); } + void replicateMultiTerm(const MultiTerm &original, MultiTerm & replica) { + if (original.getType() == MultiTerm::Type::STRING) { + for (uint32_t i(0); i < original.getNumTerms(); i++) { + auto v = original.getAsString(i); + replica.addTerm(v.first, v.second); + } + } else if (original.getType() == MultiTerm::Type::INTEGER) { + for (uint32_t i(0); i < original.getNumTerms(); i++) { + auto v = original.getAsInteger(i); + replica.addTerm(v.first, v.second); + } + } else { + assert (original.getType() == MultiTerm::Type::UNKNOWN); + assert (original.getNumTerms() == 0); + } + } + void visit(WeightedSetTerm &node) override { - replicate(node, _builder.addWeightedSetTerm(node.getChildren().size(), node.getView(), - node.getId(), node.getWeight())); - visitNodes(node.getChildren()); + auto & replica = _builder.addWeightedSetTerm(node.getNumTerms(), node.getView(), node.getId(), node.getWeight()); + replicate(node, replica); + replicateMultiTerm(node, replica); } void visit(DotProduct &node) override { - replicate(node, _builder.addDotProduct(node.getChildren().size(), node.getView(), - node.getId(), node.getWeight())); - visitNodes(node.getChildren()); + auto & replica = _builder.addDotProduct(node.getNumTerms(), node.getView(), node.getId(), node.getWeight()); + replicate(node, replica); + replicateMultiTerm(node, replica); } void visit(WandTerm &node) override { - replicate(node, _builder.addWandTerm(node.getChildren().size(), - node.getView(), - node.getId(), node.getWeight(), - node.getTargetNumHits(), - node.getScoreThreshold(), - node.getThresholdBoostFactor())); - visitNodes(node.getChildren()); + auto & replica = _builder.addWandTerm(node.getNumTerms(), node.getView(), node.getId(), node.getWeight(), + node.getTargetNumHits(), + node.getScoreThreshold(), + node.getThresholdBoostFactor()); + replicate(node, replica); + replicateMultiTerm(node, replica); } void visit(Rank &node) override { diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.h b/searchlib/src/vespa/searchlib/query/tree/simplequery.h index db517edc348..bdf1141fde5 100644 --- a/searchlib/src/vespa/searchlib/query/tree/simplequery.h +++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.h @@ -35,17 +35,17 @@ struct SimpleSameElement : SameElement { SimpleSameElement(vespalib::stringref view) : SameElement(view) {} }; struct SimpleWeightedSetTerm : WeightedSetTerm { - SimpleWeightedSetTerm(vespalib::stringref view, int32_t id, Weight weight) - : WeightedSetTerm(view, id, weight) {} + SimpleWeightedSetTerm(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight) + : WeightedSetTerm(num_terms, view, id, weight) {} }; struct SimpleDotProduct : DotProduct { - SimpleDotProduct(vespalib::stringref view, int32_t id, Weight weight) - : DotProduct(view, id, weight) {} + SimpleDotProduct(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight) + : DotProduct(num_terms, view, id, weight) {} }; struct SimpleWandTerm : WandTerm { - SimpleWandTerm(vespalib::stringref view, int32_t id, Weight weight, + SimpleWandTerm(uint32_t num_terms, vespalib::stringref view, int32_t id, Weight weight, uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) - : WandTerm(view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor) {} + : WandTerm(num_terms, view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor) {} }; struct SimpleRank : Rank {}; struct SimpleNumberTerm : NumberTerm { diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp index 13756af3c27..5ad5fa80da4 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp @@ -169,18 +169,28 @@ class QueryNodeConverter : public QueryVisitor { if (typefield & ParseItem::IF_FLAGS) { appendByte(flags); } - appendCompressedPositiveNumber(node.getChildren().size()); + appendCompressedPositiveNumber(node.getNumTerms()); appendString(node.getView()); } + void createMultiTermNodes(const MultiTerm & mt) { + for (size_t i = 0; i < mt.getNumTerms(); ++i) { + auto term = mt.getAsString(i); + uint8_t typeField = static_cast<uint8_t>(ParseItem::ITEM_PURE_WEIGHTED_STRING) | static_cast<uint8_t>(ParseItem::IF_WEIGHT); + appendByte(typeField); + appendCompressedNumber(term.second.percent()); + appendString(term.first); + } + } + void visit(WeightedSetTerm &node) override { createWeightedSet(node, static_cast<uint8_t>(ParseItem::ITEM_WEIGHTED_SET) | static_cast<uint8_t>(ParseItem::IF_WEIGHT)); - visitNodes(node.getChildren()); + createMultiTermNodes(node); } void visit(DotProduct &node) override { createWeightedSet(node, static_cast<uint8_t>(ParseItem::ITEM_DOT_PRODUCT) | static_cast<uint8_t>(ParseItem::IF_WEIGHT)); - visitNodes(node.getChildren()); + createMultiTermNodes(node); } void visit(WandTerm &node) override { @@ -188,7 +198,7 @@ class QueryNodeConverter : public QueryVisitor { appendCompressedPositiveNumber(node.getTargetNumHits()); appendDouble(node.getScoreThreshold()); appendDouble(node.getThresholdBoostFactor()); - visitNodes(node.getChildren()); + createMultiTermNodes(node); } void visit(Rank &node) override { diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h index eab397d321c..27df4f6b6e5 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h @@ -8,6 +8,8 @@ #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <vespa/searchlib/common/geo_location_parser.h> #include <vespa/vespalib/objects/hexdump.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <charconv> namespace search::query { @@ -45,8 +47,29 @@ public: return builder.build(); } -private: - static Term * createQueryTerm(search::SimpleQueryStackDumpIterator &queryStack, QueryBuilder<NodeTypes> & builder, vespalib::stringref & pureTermView) { +private: + static void populateMultiTerm(search::SimpleQueryStackDumpIterator &queryStack, QueryBuilderBase & builder, MultiTerm & mt) { + uint32_t added(0); + for (added = 0; (added < mt.getNumTerms()) && queryStack.next(); added++) { + ParseItem::ItemType type = queryStack.getType(); + switch (type) { + case ParseItem::ITEM_PURE_WEIGHTED_LONG: + mt.addTerm(queryStack.getIntergerTerm(), queryStack.GetWeight()); + break; + case ParseItem::ITEM_PURE_WEIGHTED_STRING: + mt.addTerm(queryStack.getTerm(), queryStack.GetWeight()); + break; + default: + builder.reportError(vespalib::make_string("Got unexpected node %d for multiterm node at child term %d", type, added)); + return; + } + } + if (added < mt.getNumTerms()) { + builder.reportError(vespalib::make_string("Too few nodes(%d) for multiterm(%d)", added, mt.getNumTerms())); + } + } + static Term * + createQueryTerm(search::SimpleQueryStackDumpIterator &queryStack, QueryBuilder<NodeTypes> & builder, vespalib::stringref & pureTermView) { uint32_t arity = queryStack.getArity(); ParseItem::ItemType type = queryStack.getType(); Node::UP node; @@ -92,14 +115,18 @@ private: vespalib::stringref view = queryStack.getIndexName(); int32_t id = queryStack.getUniqueId(); Weight weight = queryStack.GetWeight(); - t = &builder.addWeightedSetTerm(arity, view, id, weight); + auto & ws = builder.addWeightedSetTerm(arity, view, id, weight); pureTermView = vespalib::stringref(); + populateMultiTerm(queryStack, builder, ws); + t = &ws; } else if (type == ParseItem::ITEM_DOT_PRODUCT) { vespalib::stringref view = queryStack.getIndexName(); int32_t id = queryStack.getUniqueId(); Weight weight = queryStack.GetWeight(); - t = &builder.addDotProduct(arity, view, id, weight); + auto & dotProduct = builder.addDotProduct(arity, view, id, weight); pureTermView = vespalib::stringref(); + populateMultiTerm(queryStack, builder, dotProduct); + t = &dotProduct; } else if (type == ParseItem::ITEM_WAND) { vespalib::stringref view = queryStack.getIndexName(); int32_t id = queryStack.getUniqueId(); @@ -107,9 +134,10 @@ private: uint32_t targetNumHits = queryStack.getTargetNumHits(); double scoreThreshold = queryStack.getScoreThreshold(); double thresholdBoostFactor = queryStack.getThresholdBoostFactor(); - t = &builder.addWandTerm(arity, view, id, weight, - targetNumHits, scoreThreshold, thresholdBoostFactor); + auto & wand = builder.addWandTerm(arity, view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor); pureTermView = vespalib::stringref(); + populateMultiTerm(queryStack, builder, wand); + t = & wand; } else if (type == ParseItem::ITEM_NOT) { builder.addAndNot(arity); } else if (type == ParseItem::ITEM_NEAREST_NEIGHBOR) { @@ -135,7 +163,9 @@ private: } else if (type == ParseItem::ITEM_PURE_WEIGHTED_STRING) { t = &builder.addStringTerm(term, pureTermView, id, weight); } else if (type == ParseItem::ITEM_PURE_WEIGHTED_LONG) { - t = &builder.addNumberTerm(term, pureTermView, id, weight); + char buf[24]; + auto res = std::to_chars(buf, buf + sizeof(buf), queryStack.getIntergerTerm(), 10); + t = &builder.addNumberTerm(vespalib::stringref(buf, res.ptr - buf), pureTermView, id, weight); } else if (type == ParseItem::ITEM_PREFIXTERM) { t = &builder.addPrefixTerm(term, view, id, weight); } else if (type == ParseItem::ITEM_SUBSTRINGTERM) { diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp index b9a9687c85b..4caf3c2ff89 100644 --- a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp @@ -1,26 +1,114 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "termnodes.h" +#include <vespa/vespalib/util/exceptions.h> +#include <charconv> +using vespalib::IllegalArgumentException; +using vespalib::stringref; +using vespalib::make_string_short::fmt; namespace search::query { -NumberTerm::~NumberTerm() = default; - -PrefixTerm::~PrefixTerm() = default; - -RangeTerm::~RangeTerm() = default; - StringTerm::StringTerm(const Type &term, vespalib::stringref view, int32_t id, Weight weight) : QueryNodeMixinType(term, view, id, weight) {} -StringTerm::~StringTerm() = default; +NumberTerm::~NumberTerm() = default; +PrefixTerm::~PrefixTerm() = default; +RangeTerm::~RangeTerm() = default; +StringTerm::~StringTerm() = default; SubstringTerm::~SubstringTerm() = default; - SuffixTerm::~SuffixTerm() = default; - LocationTerm::~LocationTerm() = default; - RegExpTerm::~RegExpTerm() = default; +WeightedSetTerm::~WeightedSetTerm() = default; +DotProduct::~DotProduct() = default; +WandTerm::~WandTerm() = default; + +namespace { + +class StringTermVector final : public MultiTerm::TermVector { +public: + StringTermVector(uint32_t sz) : _terms() { _terms.reserve(sz); } + void addTerm(stringref term, Weight weight) override { + _terms.emplace_back(term, weight); + } + void addTerm(int64_t value, Weight weight) override { + char buf[24]; + auto res = std::to_chars(buf, buf + sizeof(buf), value, 10); + addTerm(stringref(buf, res.ptr - buf), weight); + } + StringAndWeight getAsString(uint32_t index) const override { + const auto & v = _terms[index]; + return StringAndWeight(v.first, v.second); + } + IntegerAndWeight getAsInteger(uint32_t index) const override { + const auto & v = _terms[index]; + int64_t value(0); + std::from_chars(v.first.c_str(), v.first.c_str() + v.first.size(), value); + return IntegerAndWeight(value, v.second); + } + Weight getWeight(uint32_t index) const override { + return _terms[index].second; + } +private: + std::vector<std::pair<vespalib::string, Weight>> _terms; +}; + +class IntegerTermVector final : public MultiTerm::TermVector { +public: + IntegerTermVector(uint32_t sz) : _terms() { _terms.reserve(sz); } + void addTerm(stringref valueS, Weight weight) override { + int64_t value; + std::from_chars(valueS.data(), valueS.data() + valueS.size(), value); + addTerm(value, weight); + } + void addTerm(int64_t term, Weight weight) override { + _terms.emplace_back(term, weight); + } + StringAndWeight getAsString(uint32_t index) const override { + const auto & v = _terms[index]; + auto res = std::to_chars(_scratchPad, _scratchPad + sizeof(_scratchPad)-1, v.first, 10); + res.ptr[0] = '\0'; + return StringAndWeight(stringref(_scratchPad, res.ptr - _scratchPad), v.second); + } + IntegerAndWeight getAsInteger(uint32_t index) const override { + return _terms[index]; + } + Weight getWeight(uint32_t index) const override { + return _terms[index].second; + } +private: + std::vector<IntegerAndWeight> _terms; + mutable char _scratchPad[24]; +}; + +} + +MultiTerm::MultiTerm(uint32_t num_terms) + : _terms(), + _num_terms(num_terms), + _type(Type::UNKNOWN) +{} + +MultiTerm::~MultiTerm() = default; + +void +MultiTerm::addTerm(vespalib::stringref term, Weight weight) { + if ( ! _terms) { + _terms = std::make_unique<StringTermVector>(_num_terms); + _type = Type::STRING; + } + _terms->addTerm(term, weight); +} + +void +MultiTerm::addTerm(int64_t term, Weight weight) { + if ( ! _terms) { + _terms = std::make_unique<IntegerTermVector>(_num_terms); + _type = Type::INTEGER; + } + _terms->addTerm(term, weight); +} } diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.h b/searchlib/src/vespa/searchlib/query/tree/termnodes.h index 3eda0732470..8fbaacdd20d 100644 --- a/searchlib/src/vespa/searchlib/query/tree/termnodes.h +++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.h @@ -153,5 +153,75 @@ public: double get_distance_threshold() const { return _distance_threshold; } }; +class MultiTerm : public Node { +public: + enum class Type {STRING, INTEGER, UNKNOWN}; + using StringAndWeight = std::pair<vespalib::stringref, Weight>; + using IntegerAndWeight = std::pair<int64_t, Weight>; + struct TermVector { + using StringAndWeight = MultiTerm::StringAndWeight; + using IntegerAndWeight = MultiTerm::IntegerAndWeight; + virtual ~TermVector() = default; + virtual void addTerm(vespalib::stringref term, Weight weight) = 0; + virtual void addTerm(int64_t term, Weight weight) = 0; + virtual StringAndWeight getAsString(uint32_t index) const = 0; + virtual IntegerAndWeight getAsInteger(uint32_t index) const = 0; + virtual Weight getWeight(uint32_t index) const = 0; + }; + ~MultiTerm() override; + void addTerm(vespalib::stringref term, Weight weight); + void addTerm(int64_t term, Weight weight); + // Note that the first refers to a zero terminated string. + // That is required as the comparator for the enum store requires it. + StringAndWeight getAsString(uint32_t index) const { return _terms->getAsString(index); } + IntegerAndWeight getAsInteger(uint32_t index) const { return _terms->getAsInteger(index); } + Weight weight(uint32_t index) const { return _terms->getWeight(index); } + uint32_t getNumTerms() const { return _num_terms; } + Type getType() const { return _type; } +protected: + MultiTerm(uint32_t num_terms); +private: + std::unique_ptr<TermVector> _terms; + uint32_t _num_terms; + Type _type; +}; + +class WeightedSetTerm : public QueryNodeMixin<WeightedSetTerm, MultiTerm>, public Term { +public: + WeightedSetTerm(uint32_t num_terms, const vespalib::string &view, int32_t id, Weight weight) + : QueryNodeMixinType(num_terms), + Term(view, id, weight) + {} + virtual ~WeightedSetTerm() = 0; +}; + +class DotProduct : public QueryNodeMixin<DotProduct, MultiTerm>, public Term { +public: + DotProduct(uint32_t num_terms, const vespalib::string &view, int32_t id, Weight weight) + : QueryNodeMixinType(num_terms), + Term(view, id, weight) + {} + virtual ~DotProduct() = 0; +}; + +class WandTerm : public QueryNodeMixin<WandTerm, MultiTerm>, public Term { +private: + uint32_t _targetNumHits; + int64_t _scoreThreshold; + double _thresholdBoostFactor; +public: + WandTerm(uint32_t num_terms, const vespalib::string &view, int32_t id, Weight weight, + uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) + : QueryNodeMixinType(num_terms), + Term(view, id, weight), + _targetNumHits(targetNumHits), + _scoreThreshold(scoreThreshold), + _thresholdBoostFactor(thresholdBoostFactor) + {} + virtual ~WandTerm() = 0; + uint32_t getTargetNumHits() const { return _targetNumHits; } + int64_t getScoreThreshold() const { return _scoreThreshold; } + double getThresholdBoostFactor() const { return _thresholdBoostFactor; } +}; } diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp index 3731b2ff6a8..a642010139b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp @@ -66,12 +66,12 @@ template <typename WS, typename NODE> void CreateBlueprintVisitorHelper::createWeightedSet(std::unique_ptr<WS> bp, NODE &n) { FieldSpecList fields; - for (size_t i = 0; i < n.getChildren().size(); ++i) { + for (size_t i = 0; i < n.getNumTerms(); ++i) { fields.clear(); fields.add(bp->getNextChildField(_field)); - const query::Node &node = *n.getChildren()[i]; - uint32_t weight = getWeightFromNode(node).percent(); - bp->addTerm(_searchable.createBlueprint(_requestContext, fields, node), weight); + auto term = n.getAsString(i); + query::SimpleStringTerm node(term.first, n.getView(), 0, term.second); // TODO Temporary + bp->addTerm(_searchable.createBlueprint(_requestContext, fields, node), term.second.percent()); } setResult(std::move(bp)); } diff --git a/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp index 7a97110713d..0c881bf32f3 100644 --- a/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp @@ -38,44 +38,46 @@ using search::query::WandTerm; using search::query::WeakAnd; using search::query::WeightedSetTerm; using vespalib::string; +using vespalib::stringref; namespace search::queryeval { -vespalib::string termAsString(double float_term) { - vespalib::asciistream os; - return (os << float_term).str(); -} +namespace { -vespalib::string termAsString(int64_t int_term) { +stringref +termAsString(const search::query::Range &term, string & scratchPad) { vespalib::asciistream os; - return (os << int_term).str(); + scratchPad = (os << term).str(); + return scratchPad; } -vespalib::string termAsString(const search::query::Range &term) { +stringref +termAsString(const search::query::Location &term, string & scratchPad) { vespalib::asciistream os; - return (os << term).str(); + scratchPad = (os << term).str(); + return scratchPad; } -vespalib::string termAsString(const search::query::Location &term) { - vespalib::asciistream os; - return (os << term).str(); +stringref +termAsString(const string &term, string &) { + return term; } -namespace { struct TermAsStringVisitor : public QueryVisitor { - string term; - bool isSet; + string & _scratchPad; + stringref term; + bool isSet; - TermAsStringVisitor() : term(), isSet(false) {} + TermAsStringVisitor(string & scratchPad) : _scratchPad(scratchPad), term(), isSet(false) {} template <class TermNode> void visitTerm(TermNode &n) { - term = termAsString(n.getTerm()); + term = termAsString(n.getTerm(), _scratchPad); isSet = true; } void illegalVisit() { - term.clear(); + term = stringref(); isSet = false; } @@ -104,15 +106,30 @@ struct TermAsStringVisitor : public QueryVisitor { void visit(PredicateQuery &) override {illegalVisit(); } void visit(NearestNeighborTerm &) override { illegalVisit(); } }; + +void throwFailure(const search::query::Node &term_node) __attribute((noinline)); + +void +throwFailure(const search::query::Node &term_node) { + string err(vespalib::make_string("Trying to convert a non-term node ('%s') to a term string.", typeid(term_node).name())); + LOG(warning, "%s", err.c_str()); + throw vespalib::IllegalArgumentException(err, VESPA_STRLOC); +} + } // namespace -string termAsString(const Node &term_node) { - TermAsStringVisitor visitor; +string +termAsString(const Node &term_node) { + string scratchPad; + return termAsString(term_node, scratchPad); +} + +stringref +termAsString(const search::query::Node &term_node, string & scratchPad) { + TermAsStringVisitor visitor(scratchPad); const_cast<Node &>(term_node).accept(visitor); if (!visitor.isSet) { - vespalib::string err(vespalib::make_string("Trying to convert a non-term node ('%s') to a term string.", typeid(term_node).name())); - LOG(warning, "%s", err.c_str()); - throw vespalib::IllegalArgumentException(err, VESPA_STRLOC); + throwFailure(term_node); } return visitor.term; } diff --git a/searchlib/src/vespa/searchlib/queryeval/termasstring.h b/searchlib/src/vespa/searchlib/queryeval/termasstring.h index c40050f0e2b..8b05bf9eddd 100644 --- a/searchlib/src/vespa/searchlib/queryeval/termasstring.h +++ b/searchlib/src/vespa/searchlib/queryeval/termasstring.h @@ -2,26 +2,13 @@ #pragma once -#include <vespa/searchlib/query/tree/location.h> -#include <vespa/searchlib/query/tree/range.h> -#include <string> +#include <vespa/vespalib/stllike/string.h> namespace search::query { class Node; } namespace search::queryeval { -inline const vespalib::string &termAsString(const vespalib::string &term) { - return term; -} - -vespalib::string termAsString(double float_term); - -vespalib::string termAsString(int64_t int_term); - -vespalib::string termAsString(const search::query::Range &term); - -vespalib::string termAsString(const search::query::Location &term); - vespalib::string termAsString(const search::query::Node &term_node); +vespalib::stringref termAsString(const search::query::Node &term_node, vespalib::string & scratchPad); } |