diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-01-12 14:40:39 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2023-01-12 14:40:39 +0000 |
commit | ca18a63e3b492b218026ca012970d4bb7cecc992 (patch) | |
tree | 169dd9600b7501d3dfd58c73c2e54c1de99d6a42 /searchcore | |
parent | 6520197c31113ba7cb173138f2431d3a481ab494 (diff) |
Expose SameElement query terms to ranking.
A TermFieldMatchData is allocated per SameElement term,
and this is used to signal matching docids in doUnpack() on the SameElement search iterator.
This allows using the matches() rank feature on a field (virtual) that is searched using a SameElement term.
Diffstat (limited to 'searchcore')
15 files changed, 64 insertions, 38 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index 70f6cf73a72..ca689d4c4c6 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -99,9 +99,9 @@ vespalib::string make_simple_stack_dump(const vespalib::string &field, const ves vespalib::string make_same_element_stack_dump(const vespalib::string &a1_term, const vespalib::string &f1_term) { QueryBuilder<ProtonNodeTypes> builder; - builder.addSameElement(2, "my"); - builder.addStringTerm(a1_term, "a1", 1, search::query::Weight(1)); - builder.addStringTerm(f1_term, "f1", 2, search::query::Weight(1)); + builder.addSameElement(2, "my", 0, Weight(1)); + builder.addStringTerm(a1_term, "a1", 1, Weight(1)); + builder.addStringTerm(f1_term, "f1", 2, Weight(1)); return StackDumpCreator::create(*builder.build()); } diff --git a/searchcore/src/tests/proton/matching/query_test.cpp b/searchcore/src/tests/proton/matching/query_test.cpp index 4c81aa7e347..575a52d01fb 100644 --- a/searchcore/src/tests/proton/matching/query_test.cpp +++ b/searchcore/src/tests/proton/matching/query_test.cpp @@ -119,7 +119,7 @@ class Test : public vespalib::TestApp { void requireThatRankBlueprintStaysOnTopAfterWhiteListing(); void requireThatAndNotBlueprintStaysOnTopAfterWhiteListing(); void requireThatSameElementTermsAreProperlyPrefixed(); - void requireThatSameElementDoesNotAllocateMatchData(); + void requireThatSameElementAllocatesMatchData(); void requireThatSameElementIteratorsCanBeBuilt(); void requireThatConstBoolBlueprintsAreCreatedCorrectly(); void global_filter_is_calculated_and_handled(); @@ -225,7 +225,7 @@ Node::UP buildSameElementQueryTree(const ViewResolver &resolver, const search::fef::IIndexEnvironment &idxEnv) { QueryBuilder<ProtonNodeTypes> query_builder; - query_builder.addSameElement(2, field); + query_builder.addSameElement(2, field, 2, Weight(0)); query_builder.addStringTerm(string_term, field, 0, Weight(0)); query_builder.addStringTerm(prefix_term, field, 1, Weight(0)); Node::UP node = query_builder.build(); @@ -1026,9 +1026,9 @@ search::query::Node::UP make_same_element_stack_dump(const vespalib::string &prefix, const vespalib::string &term_prefix) { QueryBuilder<ProtonNodeTypes> builder; - builder.addSameElement(2, prefix); - builder.addStringTerm("xyz", term_prefix + "f1", 1, search::query::Weight(1)); - builder.addStringTerm("abc", term_prefix + "f2", 2, search::query::Weight(1)); + builder.addSameElement(2, prefix, 0, Weight(1)); + builder.addStringTerm("xyz", term_prefix + "f1", 1, Weight(1)); + builder.addStringTerm("abc", term_prefix + "f2", 2, Weight(1)); vespalib::string stack = StackDumpCreator::create(*builder.build()); search::SimpleQueryStackDumpIterator stack_dump_iterator(stack); SameElementModifier sem; @@ -1070,14 +1070,14 @@ Test::requireThatSameElementTermsAreProperlyPrefixed() } void -Test::requireThatSameElementDoesNotAllocateMatchData() +Test::requireThatSameElementAllocatesMatchData() { Node::UP node = buildSameElementQueryTree(ViewResolver(), plain_index_env); MatchDataLayout mdl; MatchDataReserveVisitor visitor(mdl); node->accept(visitor); MatchData::UP match_data = mdl.createMatchData(); - EXPECT_EQUAL(0u, match_data->getNumTermFields()); + EXPECT_EQUAL(1u, match_data->getNumTermFields()); } void @@ -1215,7 +1215,7 @@ Test::Main() TEST_CALL(requireThatRankBlueprintStaysOnTopAfterWhiteListing); TEST_CALL(requireThatAndNotBlueprintStaysOnTopAfterWhiteListing); TEST_CALL(requireThatSameElementTermsAreProperlyPrefixed); - TEST_CALL(requireThatSameElementDoesNotAllocateMatchData); + TEST_CALL(requireThatSameElementAllocatesMatchData); TEST_CALL(requireThatSameElementIteratorsCanBeBuilt); TEST_CALL(requireThatConstBoolBlueprintsAreCreatedCorrectly); TEST_CALL(global_filter_is_calculated_and_handled); diff --git a/searchcore/src/tests/proton/matching/querynodes_test.cpp b/searchcore/src/tests/proton/matching/querynodes_test.cpp index c73483efed5..15fcc8a3fd7 100644 --- a/searchcore/src/tests/proton/matching/querynodes_test.cpp +++ b/searchcore/src/tests/proton/matching/querynodes_test.cpp @@ -214,7 +214,7 @@ public: using QB = QueryBuilder<ProtonNodeTypes>; struct Phrase { void addToBuilder(QB& b) { b.addPhrase(2, view, id, weight); }}; -struct SameElement { void addToBuilder(QB& b) { b.addSameElement(2, view); }}; +struct SameElement { void addToBuilder(QB& b) { b.addSameElement(2, view, id, weight); }}; struct Near { void addToBuilder(QB& b) { b.addNear(2, distance); } }; struct ONear { void addToBuilder(QB& b) { b.addONear(2, distance); } }; struct Or { void addToBuilder(QB& b) { b.addOr(2); } }; @@ -301,7 +301,7 @@ SearchIterator *getParent<SameElement>(SearchIterator *a, SearchIterator *b) { // we only check how many term/field combinations // are below the SameElement parent: // two terms searching in one index field - return new SameElementSearch(nullptr, std::move(children), true); + return new SameElementSearch(tmd, nullptr, std::move(children), true); } template <> diff --git a/searchcore/src/tests/proton/matching/resolveviewvisitor_test.cpp b/searchcore/src/tests/proton/matching/resolveviewvisitor_test.cpp index c9216c85af2..a47531c5575 100644 --- a/searchcore/src/tests/proton/matching/resolveviewvisitor_test.cpp +++ b/searchcore/src/tests/proton/matching/resolveviewvisitor_test.cpp @@ -136,12 +136,13 @@ TEST_F("require that equiv nodes resolve view from children", Fixture) { EXPECT_EQUAL(field2, base.field(1).field_name); } -TEST_F("require that view is resolved for SameElement children", Fixture) { +TEST_F("require that view is resolved for SameElement and its children", Fixture) { ViewResolver resolver; resolver.add(view, field1); + resolver.add("view2", field2); QueryBuilder<ProtonNodeTypes> builder; - builder.addSameElement(2, ""); + ProtonSameElement &same_elem = builder.addSameElement(2, "view2", 13, weight); ProtonStringTerm &my_term = builder.addStringTerm(term, view, 42, weight); builder.addStringTerm(term, field2, 43, weight); Node::UP node = builder.build(); @@ -149,6 +150,9 @@ TEST_F("require that view is resolved for SameElement children", Fixture) { ResolveViewVisitor visitor(resolver, f.index_environment); node->accept(visitor); + ASSERT_EQUAL(1u, same_elem.numFields()); + EXPECT_EQUAL(field2, same_elem.field(0).field_name); + ASSERT_EQUAL(1u, my_term.numFields()); EXPECT_EQUAL(field1, my_term.field(0).field_name); } diff --git a/searchcore/src/tests/proton/matching/same_element_builder/same_element_builder_test.cpp b/searchcore/src/tests/proton/matching/same_element_builder/same_element_builder_test.cpp index cb2fa52767a..e52995c1bd2 100644 --- a/searchcore/src/tests/proton/matching/same_element_builder/same_element_builder_test.cpp +++ b/searchcore/src/tests/proton/matching/same_element_builder/same_element_builder_test.cpp @@ -35,6 +35,7 @@ using search::queryeval::Blueprint; using search::queryeval::EmptyBlueprint; using search::queryeval::FakeBlueprint; using search::queryeval::FakeRequestContext; +using search::queryeval::FieldSpec; using search::queryeval::IntermediateBlueprint; using search::queryeval::SameElementBlueprint; @@ -89,8 +90,9 @@ struct FakeTerms { struct BuilderFixture { FakeRequestContext req_ctx; FakeSearchContext ctx; + FieldSpec field; SameElementBuilder builder; - BuilderFixture() : req_ctx(), ctx(), builder(req_ctx, ctx, "foo", false) { + BuilderFixture() : req_ctx(), ctx(), field("foo", 3, 5), builder(req_ctx, ctx, field, false) { ctx.attr().tag("attr"); ctx.addIdx(0).idx(0).getFake().tag("idx"); } @@ -104,9 +106,14 @@ const FakeBlueprint *as_fake(const Blueprint *bp) { return dynamic_cast<const FakeBlueprint*>(bp); } -void verify_children(Blueprint *bp, std::initializer_list<const char *> tags) { +void verify_blueprint(Blueprint *bp, std::initializer_list<const char *> tags) { SameElementBlueprint *se = dynamic_cast<SameElementBlueprint*>(bp); ASSERT_TRUE(se != nullptr); + ASSERT_EQUAL(1u, se->getState().numFields()); + const auto &field = se->getState().field(0); + EXPECT_EQUAL(3u, field.getFieldId()); + EXPECT_EQUAL(5u, field.getHandle()); + EXPECT_FALSE(field.isFilter()); ASSERT_EQUAL(se->terms().size(), tags.size()); size_t idx = 0; for (const char *tag: tags) { @@ -120,7 +127,7 @@ TEST_FF("require that same element blueprint can be built", BuilderFixture(), Fa f1.builder.add_child(f2.idx_string_term); f1.builder.add_child(f2.attr_string_term); Blueprint::UP result = f1.builder.build(); - TEST_DO(verify_children(result.get(), {"idx", "attr"})); + TEST_DO(verify_blueprint(result.get(), {"idx", "attr"})); } TEST_FF("require that terms searching multiple fields are ignored", BuilderFixture(), FakeTerms()) { @@ -128,7 +135,7 @@ TEST_FF("require that terms searching multiple fields are ignored", BuilderFixtu f1.builder.add_child(f2.attr_string_term); f1.builder.add_child(f2.both_string_term); // ignored Blueprint::UP result = f1.builder.build(); - TEST_DO(verify_children(result.get(), {"idx", "attr"})); + TEST_DO(verify_blueprint(result.get(), {"idx", "attr"})); } TEST_FF("require that all relevant term types can be used", BuilderFixture(), FakeTerms()) { @@ -141,7 +148,7 @@ TEST_FF("require that all relevant term types can be used", BuilderFixture(), Fa f1.builder.add_child(f2.attr_suffix_term); f1.builder.add_child(f2.attr_regexp_term); Blueprint::UP result = f1.builder.build(); - TEST_DO(verify_children(result.get(), {"idx", "idx", "idx", "idx", "attr", "attr", "attr", "attr"})); + TEST_DO(verify_blueprint(result.get(), {"idx", "idx", "idx", "idx", "attr", "attr", "attr", "attr"})); } TEST_F("require that building same element with no children gives EmptyBlueprint", BuilderFixture()) { diff --git a/searchcore/src/tests/proton/matching/termdataextractor_test.cpp b/searchcore/src/tests/proton/matching/termdataextractor_test.cpp index 443cc1686b6..1f96eb5d9d6 100644 --- a/searchcore/src/tests/proton/matching/termdataextractor_test.cpp +++ b/searchcore/src/tests/proton/matching/termdataextractor_test.cpp @@ -130,11 +130,11 @@ TEST("requireThatNegativeTermsAreSkipped") { EXPECT_EQUAL(id[1], term_data[1]->getUniqueId()); } -TEST("requireThatSameElementIsSkipped") +TEST("requireThatSameElementIsExtractedAsOneTerm") { QueryBuilder<ProtonNodeTypes> query_builder; query_builder.addAnd(2); - query_builder.addSameElement(2, field); + query_builder.addSameElement(2, field, id[3], Weight(7)); query_builder.addStringTerm("term1", field, id[0], Weight(1)); query_builder.addStringTerm("term2", field, id[1], Weight(1)); query_builder.addStringTerm("term3", field, id[2], Weight(1)); @@ -142,9 +142,9 @@ TEST("requireThatSameElementIsSkipped") vector<const ITermData *> term_data; TermDataExtractor::extractTerms(*node, term_data); - EXPECT_EQUAL(1u, term_data.size()); - ASSERT_TRUE(term_data.size() >= 1); - EXPECT_EQUAL(id[2], term_data[0]->getUniqueId()); + ASSERT_EQUAL(2u, term_data.size()); + EXPECT_EQUAL(id[3], term_data[0]->getUniqueId()); + EXPECT_EQUAL(id[2], term_data[1]->getUniqueId()); } } // namespace diff --git a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp index 279db26f946..1bd7a9bec9f 100644 --- a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp +++ b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp @@ -99,7 +99,7 @@ void add_phrase(QueryBuilder<ProtonNodeTypes> &builder) { } void add_same_element(QueryBuilder<ProtonNodeTypes> &builder) { - builder.addSameElement(2, view); + builder.addSameElement(2, view, id, weight); { builder.addStringTerm("x", view, id, weight); builder.addStringTerm("y", view, id, weight); diff --git a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp index 7573137d98c..2d3323949d1 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp @@ -10,6 +10,7 @@ #include <vespa/searchlib/queryeval/intermediate_blueprints.h> #include <vespa/searchlib/queryeval/equiv_blueprint.h> #include <vespa/searchlib/queryeval/get_weight_from_node.h> +#include <vespa/vespalib/util/issue.h> using namespace search::queryeval; @@ -102,11 +103,16 @@ private: } void buildSameElement(ProtonSameElement &n) { - SameElementBuilder builder(_requestContext, _context, n.getView(), n.is_expensive()); - for (search::query::Node *node : n.getChildren()) { - builder.add_child(*node); + if (n.numFields() == 1) { + SameElementBuilder builder(_requestContext, _context, n.field(0).fieldSpec(), n.is_expensive()); + for (search::query::Node *node: n.getChildren()) { + builder.add_child(*node); + } + _result = builder.build(); + } else { + vespalib::Issue::report("SameElement operator searches in unexpected number of fields. Expected 1 but was %zu", n.numFields()); + _result = std::make_unique<EmptyBlueprint>(); } - _result = builder.build(); } template <typename NodeType> diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp index 032991cbf6a..fd5c3782b9a 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp @@ -59,7 +59,8 @@ template<typename T> const T *as(const Blueprint &bp) { return dynamic_cast<const T *>(&bp); } void find_matching_elements(const std::vector<uint32_t> &docs, const SameElementBlueprint &same_element, MatchingElements &result) { - auto search = same_element.create_same_element_search(false); + search::fef::TermFieldMatchData dummy_tfmd; + auto search = same_element.create_same_element_search(dummy_tfmd, false); search->initRange(docs.front(), docs.back()+1); std::vector<uint32_t> matches; for (uint32_t i = 0; i < docs.size(); ++i) { diff --git a/searchcore/src/vespa/searchcore/proton/matching/matchdatareservevisitor.h b/searchcore/src/vespa/searchcore/proton/matching/matchdatareservevisitor.h index ee0b4fba183..bf6e6346e24 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matchdatareservevisitor.h +++ b/searchcore/src/vespa/searchcore/proton/matching/matchdatareservevisitor.h @@ -27,7 +27,6 @@ public: } n.allocateTerms(_mdl); } - void visit(ProtonNodeTypes::SameElement &) override { } MatchDataReserveVisitor(search::fef::MatchDataLayout &mdl) : _mdl(mdl) {} }; diff --git a/searchcore/src/vespa/searchcore/proton/matching/querynodes.h b/searchcore/src/vespa/searchcore/proton/matching/querynodes.h index 20d602b7814..fc3f12af4f7 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/querynodes.h +++ b/searchcore/src/vespa/searchcore/proton/matching/querynodes.h @@ -114,7 +114,6 @@ using ProtonONear = search::query::SimpleONear; using ProtonOr = search::query::SimpleOr; using ProtonRank = search::query::SimpleRank; using ProtonWeakAnd = search::query::SimpleWeakAnd; -using ProtonSameElement = search::query::SimpleSameElement; using ProtonTrue = search::query::SimpleTrue; using ProtonFalse = search::query::SimpleFalse; @@ -123,6 +122,10 @@ struct ProtonEquiv final : public ProtonTermBase<search::query::Equiv> { using ProtonTermBase::ProtonTermBase; }; +struct ProtonSameElement final : public ProtonTermBase<search::query::SameElement> { + using ProtonTermBase::ProtonTermBase; +}; + struct ProtonNearestNeighborTerm : public ProtonTermBase<search::query::NearestNeighborTerm> { using ProtonTermBase::ProtonTermBase; std::optional<vespalib::string> query_tensor_name() const override { diff --git a/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.h b/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.h index a24dcc96b8e..3f778cc05f0 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.h +++ b/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.h @@ -28,6 +28,11 @@ public: visitChildren(n); n.resolveFromChildren(n.getChildren()); } + + void visit(ProtonNodeTypes::SameElement &n) override { + visitChildren(n); + visitTerm(n); + } }; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.cpp b/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.cpp index 574df384a63..5f77cfefcdf 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.cpp @@ -75,10 +75,10 @@ public: } // namespace proton::matching::<unnamed> SameElementBuilder::SameElementBuilder(const search::queryeval::IRequestContext &requestContext, ISearchContext &context, - const vespalib::string &struct_field_name, bool expensive) + const search::queryeval::FieldSpec &field, bool expensive) : _requestContext(requestContext), _context(context), - _result(std::make_unique<SameElementBlueprint>(struct_field_name, expensive)) + _result(std::make_unique<SameElementBlueprint>(field, expensive)) { } @@ -92,7 +92,7 @@ SameElementBuilder::add_child(search::query::Node &node) Blueprint::UP SameElementBuilder::build() { - if (!_result || _result->terms().empty()) { + if (_result->terms().empty()) { return std::make_unique<EmptyBlueprint>(); } return std::move(_result); diff --git a/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.h b/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.h index ece0006049e..dd89e952a34 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.h +++ b/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.h @@ -7,6 +7,8 @@ #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/queryeval/same_element_blueprint.h> +namespace search::queryeval { class FieldSpec; } + namespace proton::matching { class SameElementBuilder @@ -17,7 +19,7 @@ private: std::unique_ptr<search::queryeval::SameElementBlueprint> _result; public: SameElementBuilder(const search::queryeval::IRequestContext &requestContext, ISearchContext &context, - const vespalib::string &struct_field_name, bool expensive); + const search::queryeval::FieldSpec &field, bool expensive); void add_child(search::query::Node &node); search::queryeval::Blueprint::UP build(); }; diff --git a/searchcore/src/vespa/searchcore/proton/matching/termdataextractor.cpp b/searchcore/src/vespa/searchcore/proton/matching/termdataextractor.cpp index efddd24c964..4252f5385cb 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/termdataextractor.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/termdataextractor.cpp @@ -39,7 +39,6 @@ public: _term_data.push_back(&n); } - virtual void visit(ProtonNodeTypes::SameElement &) override {} }; } // namespace |