diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-03-18 14:15:16 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-03-18 14:15:16 +0100 |
commit | a91ea35083d79d63e925e3d5fe8f4735b90a17ce (patch) | |
tree | 06616a20ff3a0f90e0753dbc5a7f2d14a398c767 /searchlib | |
parent | b199f8bf2bd09f0a2fcff3dfa85861e24f69d647 (diff) |
Change parent class of search::streaming::SameElementQueryNode from
search::streaming::AndQueryNode to search:streaming::MultiTerm.
Diffstat (limited to 'searchlib')
10 files changed, 113 insertions, 36 deletions
diff --git a/searchlib/src/tests/query/streaming/same_element_query_node_test.cpp b/searchlib/src/tests/query/streaming/same_element_query_node_test.cpp index ece6dc551b2..db1e5a1ef5d 100644 --- a/searchlib/src/tests/query/streaming/same_element_query_node_test.cpp +++ b/searchlib/src/tests/query/streaming/same_element_query_node_test.cpp @@ -1,12 +1,20 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchlib/query/streaming/same_element_query_node.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/simpletermdata.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/query/streaming/query.h> #include <vespa/searchlib/query/streaming/queryterm.h> #include <vespa/searchlib/query/tree/querybuilder.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/query/tree/stackdumpcreator.h> #include <vespa/vespalib/gtest/gtest.h> +using search::fef::MatchData; +using search::fef::SimpleTermData; +using search::fef::TermFieldHandle; +using search::fef::test::IndexEnvironment; using search::query::QueryBuilder; using search::query::Node; using search::query::SimpleQueryNodeTypes; @@ -44,11 +52,11 @@ TEST(SameElementQueryNodeTest, a_unhandled_sameElement_stack) const QueryNode & root = q.getRoot(); auto sameElement = dynamic_cast<const SameElementQueryNode *>(&root); EXPECT_TRUE(sameElement != nullptr); - EXPECT_EQ(2u, sameElement->size()); + EXPECT_EQ(2u, sameElement->get_terms().size()); EXPECT_EQ("xyz_abcdefghij_xyzxyzx", sameElement->getIndex()); - auto term0 = dynamic_cast<const QueryTerm *>((*sameElement)[0].get()); + auto term0 = sameElement->get_terms()[0].get(); EXPECT_TRUE(term0 != nullptr); - auto term1 = dynamic_cast<const QueryTerm *>((*sameElement)[1].get()); + auto term1 = sameElement->get_terms()[1].get(); EXPECT_TRUE(term1 != nullptr); } @@ -75,15 +83,17 @@ TEST(SameElementQueryNodeTest, test_same_element_evaluate) auto * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot()); EXPECT_TRUE(sameElem != nullptr); EXPECT_EQ("field", sameElem->getIndex()); - EXPECT_EQ(3u, sameElem->size()); - verifyQueryTermNode("field.f1", (*sameElem)[0].get()); - verifyQueryTermNode("field.f2", (*sameElem)[1].get()); - verifyQueryTermNode("field.f3", (*sameElem)[2].get()); - - QueryTermList terms; - q.getLeaves(terms); + EXPECT_EQ(3u, sameElem->get_terms().size()); + verifyQueryTermNode("field.f1", sameElem->get_terms()[0].get()); + verifyQueryTermNode("field.f2", sameElem->get_terms()[1].get()); + verifyQueryTermNode("field.f3", sameElem->get_terms()[2].get()); + + QueryTermList leaves; + q.getLeaves(leaves); + EXPECT_EQ(1u, leaves.size()); + auto& terms = sameElem->get_terms(); EXPECT_EQ(3u, terms.size()); - for (QueryTerm * qt : terms) { + for (auto& qt : terms) { qt->resizeFieldId(3); } @@ -130,6 +140,19 @@ TEST(SameElementQueryNodeTest, test_same_element_evaluate) EXPECT_EQ(160, hits[3].element_weight()); EXPECT_EQ(0u, hits[3].position()); EXPECT_TRUE(sameElem->evaluate()); + + SimpleTermData td; + constexpr TermFieldHandle handle0 = 27; + constexpr TermFieldHandle handle_max = handle0; + td.addField(0).setHandle(handle0); + auto md = MatchData::makeTestInstance(handle_max + 1, handle_max + 1); + auto tfmd0 = md->resolveTermField(handle0); + tfmd0->setNeedInterleavedFeatures(true); + IndexEnvironment ie; + sameElem->unpack_match_data(2, td, *md, ie); + EXPECT_EQ(2, tfmd0->getDocId()); + EXPECT_EQ(0, tfmd0->getNumOccs()); + EXPECT_EQ(0, tfmd0->end() - tfmd0->begin()); } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp index 3fcf983901d..4367dc1cd69 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp @@ -81,8 +81,8 @@ EquivQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef: unpack_match_data_helper(docid, td, match_data, hit_list, *this, is_filter(), index_env); } -EquivQueryNode* -EquivQueryNode::as_equiv_query_node() noexcept +const MultiTerm* +EquivQueryNode::as_multi_index_multi_term() const noexcept { return this; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h index a0485954675..ac87e14150b 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h +++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h @@ -17,7 +17,7 @@ public: bool evaluate() const override; const HitList & evaluateHits(HitList & hl) const override; void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override; - EquivQueryNode* as_equiv_query_node() noexcept override; + const MultiTerm* as_multi_index_multi_term() const noexcept override; const EquivQueryNode* as_equiv_query_node() const noexcept override; std::vector<std::unique_ptr<QueryTerm>> steal_terms(); }; diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.cpp b/searchlib/src/vespa/searchlib/query/streaming/query.cpp index 94d9acd02cd..25fcb8c123e 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/query.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/query.cpp @@ -95,7 +95,6 @@ QueryConnector::create(ParseItem::ItemType type) case search::ParseItem::ITEM_OR: case search::ParseItem::ITEM_WEAK_AND: return std::make_unique<OrQueryNode>(); case search::ParseItem::ITEM_NOT: return std::make_unique<AndNotQueryNode>(); - case search::ParseItem::ITEM_SAME_ELEMENT: return std::make_unique<SameElementQueryNode>(); case search::ParseItem::ITEM_NEAR: return std::make_unique<NearQueryNode>(); case search::ParseItem::ITEM_ONEAR: return std::make_unique<ONearQueryNode>(); case search::ParseItem::ITEM_RANK: return std::make_unique<RankWithQueryNode>(); diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 611e8d67d76..37f3b07058b 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -11,6 +11,7 @@ #include <vespa/searchlib/query/streaming/dot_product_term.h> #include <vespa/searchlib/query/streaming/equiv_query_node.h> #include <vespa/searchlib/query/streaming/in_term.h> +#include <vespa/searchlib/query/streaming/same_element_query_node.h> #include <vespa/searchlib/query/streaming/wand_term.h> #include <vespa/searchlib/query/streaming/weighted_set_term.h> #include <vespa/searchlib/query/tree/term_vector.h> @@ -49,7 +50,6 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor case ParseItem::ITEM_OR: case ParseItem::ITEM_WEAK_AND: case ParseItem::ITEM_NOT: - case ParseItem::ITEM_SAME_ELEMENT: case ParseItem::ITEM_NEAR: case ParseItem::ITEM_ONEAR: case ParseItem::ITEM_RANK: @@ -61,9 +61,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor if (nqn) { nqn->distance(queryRep.getNearDistance()); } - if ((type == ParseItem::ITEM_WEAK_AND) || - (type == ParseItem::ITEM_SAME_ELEMENT)) - { + if (type == ParseItem::ITEM_WEAK_AND) { qn->setIndex(queryRep.getIndexName()); } for (size_t i=0; i < arity; i++) { @@ -197,6 +195,9 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor case ParseItem::ITEM_EQUIV: qn = build_equiv_term(factory, queryRep, allowRewrite); break; + case ParseItem::ITEM_SAME_ELEMENT: + qn = build_same_element_term(factory, queryRep, allowRewrite); + break; default: skip_unknown(queryRep); break; @@ -335,6 +336,25 @@ QueryNode::build_equiv_term(const QueryNodeResultFactory& factory, SimpleQuerySt return eqn; } +std::unique_ptr<QueryNode> +QueryNode::build_same_element_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep, bool allow_rewrite) +{ + auto sen = std::make_unique<SameElementQueryNode>(factory.create(), queryRep.getIndexName(), queryRep.getArity()); + auto arity = queryRep.getArity(); + sen->setWeight(queryRep.GetWeight()); + sen->setUniqueId(queryRep.getUniqueId()); + for (size_t i = 0; i < arity; ++i) { + queryRep.next(); + auto qn = Build(sen.get(), factory, queryRep, allow_rewrite); + auto qtp = dynamic_cast<QueryTerm*>(qn.get()); + assert(qtp != nullptr); + qn.release(); + std::unique_ptr<QueryTerm> qt(qtp); + sen->add_term(std::move(qt)); + } + return sen; +} + void QueryNode::skip_unknown(SimpleQueryStackDumpIterator& queryRep) { diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.h b/searchlib/src/vespa/searchlib/query/streaming/querynode.h index fff3bb15d10..c891cd44363 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.h +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.h @@ -35,6 +35,7 @@ class QueryNode static std::unique_ptr<QueryNode> build_weighted_set_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); static std::unique_ptr<QueryNode> build_phrase_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); static std::unique_ptr<QueryNode> build_equiv_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep, bool allow_rewrite); + static std::unique_ptr<QueryNode> build_same_element_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep, bool allow_rewrite); static void skip_unknown(SimpleQueryStackDumpIterator& queryRep); public: using UP = std::unique_ptr<QueryNode>; diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index 728b9a2ab76..8a1fe2004d0 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -147,8 +147,8 @@ QueryTerm::as_fuzzy_term() noexcept return nullptr; } -EquivQueryNode* -QueryTerm::as_equiv_query_node() noexcept +const MultiTerm* +QueryTerm::as_multi_index_multi_term() const noexcept { return nullptr; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index e6b063231d6..4a108d880e9 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -106,7 +106,7 @@ public: virtual MultiTerm* as_multi_term() noexcept; virtual RegexpTerm* as_regexp_term() noexcept; virtual FuzzyTerm* as_fuzzy_term() noexcept; - virtual EquivQueryNode* as_equiv_query_node() noexcept; + virtual const MultiTerm* as_multi_index_multi_term() const noexcept; virtual const EquivQueryNode* as_equiv_query_node() const noexcept; virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env); protected: diff --git a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp index 49d5fb0f9fb..ae1ae8d5230 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp @@ -1,30 +1,36 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "same_element_query_node.h" +#include <vespa/searchlib/fef/itermdata.h> +#include <vespa/searchlib/fef/matchdata.h> #include <cassert> namespace search::streaming { +SameElementQueryNode::SameElementQueryNode(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, uint32_t num_terms) noexcept + : MultiTerm(std::move(result_base), index, num_terms) +{ +} + +SameElementQueryNode::~SameElementQueryNode() = default; + bool SameElementQueryNode::evaluate() const { HitList hl; return ! evaluateHits(hl).empty(); } -void -SameElementQueryNode::addChild(QueryNode::UP child) { - assert(dynamic_cast<const QueryTerm *>(child.get()) != nullptr); - AndQueryNode::addChild(std::move(child)); -} - const HitList & SameElementQueryNode::evaluateHits(HitList & hl) const { hl.clear(); - if ( !AndQueryNode::evaluate()) return hl; - + const auto & children = get_terms(); + for (auto& child : children) { + if ( ! child->evaluate() ) { + return hl; + } + } HitList tmpHL; - const auto & children = getChildren(); unsigned int numFields = children.size(); unsigned int currMatchCount = 0; std::vector<unsigned int> indexVector(numFields, 0); @@ -62,4 +68,31 @@ SameElementQueryNode::evaluateHits(HitList & hl) const return hl; } +void +SameElementQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment&) +{ + HitList list; + const HitList & hit_list = evaluateHits(list); + if (!hit_list.empty()) { + auto num_fields = td.numFields(); + /* + * Currently reports hit for all fields for query node instead of + * just the fields where the related subfields had matches. + */ + for (size_t field_idx = 0; field_idx < num_fields; ++field_idx) { + auto& tfd = td.field(field_idx); + auto field_id = tfd.getFieldId(); + auto tmd = match_data.resolveTermField(tfd.getHandle()); + tmd->setFieldId(field_id); + tmd->reset(docid); + } + } +} + +const MultiTerm* +SameElementQueryNode::as_multi_index_multi_term() const noexcept +{ + return this; +} + } diff --git a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h index 8e675feb569..8955071ba4a 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h +++ b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h @@ -2,21 +2,22 @@ #pragma once -#include "query.h" +#include "multi_term.h" namespace search::streaming { /** N-ary Same element operator. All terms must be within the same element. */ -class SameElementQueryNode : public AndQueryNode +class SameElementQueryNode : public MultiTerm { public: - SameElementQueryNode() noexcept : AndQueryNode("SAME_ELEMENT") { } + SameElementQueryNode(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, uint32_t num_terms) noexcept; + ~SameElementQueryNode() override; bool evaluate() const override; const HitList & evaluateHits(HitList & hl) const override; - bool isFlattenable(ParseItem::ItemType) const override { return false; } - void addChild(QueryNode::UP child) override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override; + const MultiTerm* as_multi_index_multi_term() const noexcept override; }; } |