From e7f299d95f8fe6d0392e367598a3449d762f1604 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 15 Mar 2024 13:19:05 +0100 Subject: Use filter settings from rank profiles and query terms in streaming search. --- .../src/tests/query/streaming/equiv_query_node_test.cpp | 5 ++++- searchlib/src/tests/query/streaming_query_test.cpp | 14 ++++++++++---- .../vespa/searchlib/query/streaming/dot_product_term.cpp | 3 ++- .../vespa/searchlib/query/streaming/dot_product_term.h | 2 +- .../vespa/searchlib/query/streaming/equiv_query_node.cpp | 4 ++-- .../vespa/searchlib/query/streaming/equiv_query_node.h | 2 +- .../src/vespa/searchlib/query/streaming/in_term.cpp | 3 ++- searchlib/src/vespa/searchlib/query/streaming/in_term.h | 2 +- .../query/streaming/nearest_neighbor_query_node.cpp | 3 ++- .../query/streaming/nearest_neighbor_query_node.h | 2 +- .../searchlib/query/streaming/phrase_query_node.cpp | 4 ++-- .../vespa/searchlib/query/streaming/phrase_query_node.h | 2 +- .../src/vespa/searchlib/query/streaming/querynode.cpp | 1 + .../src/vespa/searchlib/query/streaming/queryterm.cpp | 5 +++-- .../src/vespa/searchlib/query/streaming/queryterm.h | 8 ++++++-- .../src/vespa/searchlib/query/streaming/queryterm.hpp | 8 ++++++-- .../src/vespa/searchlib/query/streaming/wand_term.cpp | 3 ++- .../src/vespa/searchlib/query/streaming/wand_term.h | 2 +- .../searchlib/query/streaming/weighted_set_term.cpp | 3 ++- .../vespa/searchlib/query/streaming/weighted_set_term.h | 2 +- .../src/tests/rank_processor/rank_processor_test.cpp | 16 ++++++++++------ .../src/vespa/searchvisitor/indexenvironment.cpp | 11 +++++++++++ .../src/vespa/searchvisitor/indexenvironment.h | 2 ++ .../src/vespa/searchvisitor/rankmanager.cpp | 1 + .../src/vespa/searchvisitor/rankprocessor.cpp | 6 +++--- .../src/vespa/searchvisitor/rankprocessor.h | 2 +- 26 files changed, 79 insertions(+), 37 deletions(-) diff --git a/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp b/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp index 72378385c78..3879b6f0d1d 100644 --- a/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp +++ b/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ using search::fef::MatchData; using search::fef::SimpleTermData; using search::fef::TermFieldHandle; using search::fef::TermFieldMatchDataPosition; +using search::fef::test::IndexEnvironment; using search::query::QueryBuilder; using search::query::Node; using search::query::SimpleQueryNodeTypes; @@ -167,7 +169,8 @@ TEST_F(EquivQueryNodeTest, test_equiv_evaluate_and_unpack) auto tfmd1 = md->resolveTermField(handle1); tfmd0->setNeedInterleavedFeatures(true); tfmd1->setNeedInterleavedFeatures(true); - eqn.unpack_match_data(2, td, *md); + IndexEnvironment ie; + eqn.unpack_match_data(2, td, *md, ie); EXPECT_EQ(2, tfmd0->getDocId()); EXPECT_EQ(3, tfmd0->getNumOccs()); EXPECT_EQ(3, tfmd0->end() - tfmd0->begin()); diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 2129cb6805a..1aa359c9910 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,7 @@ using namespace search::streaming; using TermType = QueryTerm::Type; using search::fef::SimpleTermData; using search::fef::MatchData; +using search::fef::test::IndexEnvironment; void assertHit(const Hit & h, uint32_t exp_field_id, uint32_t exp_element_id, int32_t exp_element_weight, size_t exp_position) { EXPECT_EQ(h.field_id(), exp_field_id); @@ -774,7 +776,8 @@ TEST(StreamingQueryTest, test_in_term) q.add(12, 0, 1, 0); EXPECT_TRUE(term.evaluate()); MatchData md(MatchData::params().numTermFields(2)); - term.unpack_match_data(23, td, md); + IndexEnvironment ie; + term.unpack_match_data(23, td, md, ie); auto tmd0 = md.resolveTermField(0); EXPECT_NE(23, tmd0->getDocId()); auto tmd2 = md.resolveTermField(1); @@ -804,7 +807,8 @@ TEST(StreamingQueryTest, dot_product_term) q1.add(12, 0, 9, 0); EXPECT_TRUE(term.evaluate()); MatchData md(MatchData::params().numTermFields(2)); - term.unpack_match_data(23, td, md); + IndexEnvironment ie; + term.unpack_match_data(23, td, md, ie); auto tmd0 = md.resolveTermField(0); EXPECT_NE(23, tmd0->getDocId()); auto tmd1 = md.resolveTermField(1); @@ -849,7 +853,8 @@ check_wand_term(double limit, const vespalib::string& label) q1.add(12, 0, 4, 0); EXPECT_EQ(limit < exp_wand_score_field_11, term.evaluate()); MatchData md(MatchData::params().numTermFields(2)); - term.unpack_match_data(23, td, md); + IndexEnvironment ie; + term.unpack_match_data(23, td, md, ie); auto tmd0 = md.resolveTermField(0); EXPECT_NE(23, tmd0->getDocId()); auto tmd1 = md.resolveTermField(1); @@ -903,7 +908,8 @@ TEST(StreamingQueryTest, weighted_set_term) q1.add(12, 0, 10, 0); EXPECT_TRUE(term.evaluate()); MatchData md(MatchData::params().numTermFields(2)); - term.unpack_match_data(23, td, md); + IndexEnvironment ie; + term.unpack_match_data(23, td, md, ie); auto tmd0 = md.resolveTermField(0); EXPECT_NE(23, tmd0->getDocId()); auto tmd1 = md.resolveTermField(1); diff --git a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp index 09840d9a126..c267cec5e3c 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp @@ -51,8 +51,9 @@ DotProductTerm::unpack_scores(Scores& scores, std::optional score_thresh } void -DotProductTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data) +DotProductTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment& index_env) { + (void) index_env; Scores scores; build_scores(scores); unpack_scores(scores, std::nullopt, docid, td, match_data); diff --git a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h index 3702bd4721c..28400b4b283 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h +++ b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h @@ -19,7 +19,7 @@ protected: public: DotProductTerm(std::unique_ptr result_base, const string& index, uint32_t num_terms); ~DotProductTerm() override; - void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override; }; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp index 939afec0463..3fcf983901d 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp @@ -74,11 +74,11 @@ EquivQueryNode::evaluateHits(HitList & hl) const } void -EquivQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) +EquivQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) { std::vector hit_list; merge_hits_from_children(hit_list, *this); - unpack_match_data_helper(docid, td, match_data, hit_list, *this); + unpack_match_data_helper(docid, td, match_data, hit_list, *this, is_filter(), index_env); } EquivQueryNode* diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h index b5cdb31274f..a0485954675 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h +++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h @@ -16,7 +16,7 @@ public: ~EquivQueryNode() override; bool evaluate() const override; const HitList & evaluateHits(HitList & hl) const override; - void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override; EquivQueryNode* as_equiv_query_node() noexcept override; const EquivQueryNode* as_equiv_query_node() const noexcept override; std::vector> steal_terms(); diff --git a/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp index c164db69ba1..38a13c55730 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp @@ -21,8 +21,9 @@ InTerm::InTerm(std::unique_ptr result_base, const string & InTerm::~InTerm() = default; void -InTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data) +InTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment& index_env) { + (void) index_env; vespalib::hash_set matching_field_ids; HitList hl_store; std::optional prev_field_id; diff --git a/searchlib/src/vespa/searchlib/query/streaming/in_term.h b/searchlib/src/vespa/searchlib/query/streaming/in_term.h index 7b388b3f6e6..8ceb8edf689 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/in_term.h +++ b/searchlib/src/vespa/searchlib/query/streaming/in_term.h @@ -14,7 +14,7 @@ public: InTerm(std::unique_ptr result_base, const string& index, std::unique_ptr terms, Normalizing normalize_mode); ~InTerm() override; - void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override; }; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp index 07d16c7bddc..d7b1c6dc379 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp @@ -52,8 +52,9 @@ NearestNeighborQueryNode::get_raw_score() const } void -NearestNeighborQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) +NearestNeighborQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) { + (void) index_env; auto raw_score = get_raw_score(); if (raw_score.has_value()) { if (td.numFields() == 1u) { diff --git a/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.h index 277956e27a4..7e7e513bc2e 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.h +++ b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.h @@ -52,7 +52,7 @@ public: const std::optional& get_distance() const { return _distance; } // This is used during unpacking, and also signals to the RawScoreCalculator that the entire document was a match. std::optional get_raw_score() const; - void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override; }; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp index b090ca13225..eef9e2e5567 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp @@ -67,11 +67,11 @@ PhraseQueryNode::evaluateHits(HitList & hl) const } void -PhraseQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) +PhraseQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) { HitList list; const HitList & hit_list = evaluateHits(list); - unpack_match_data_helper(docid, td, match_data, hit_list, *get_terms().front()); + unpack_match_data_helper(docid, td, match_data, hit_list, *get_terms().front(), is_filter(), index_env); } } diff --git a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.h index 0d443ac4527..615d32324be 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.h +++ b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.h @@ -17,7 +17,7 @@ public: ~PhraseQueryNode() override; bool evaluate() const override; const HitList & evaluateHits(HitList & hl) const override; - void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override; size_t width() const override; }; diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 55301132a18..611e8d67d76 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -141,6 +141,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor qt->setWeight(queryRep.GetWeight()); qt->setUniqueId(queryRep.getUniqueId()); qt->setRanked( ! queryRep.hasNoRankFlag()); + qt->set_filter(queryRep.hasNoPositionDataFlag()); if (allowRewrite && possibleFloat(*qt, ssTerm) && factory.allow_float_terms_rewrite(ssIndex)) { /* * Tokenize number term and make add alternative diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index a9516d8aee4..728b9a2ab76 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -74,6 +74,7 @@ QueryTerm::QueryTerm(std::unique_ptr org, stringref termS, _result(org.release()), _encoding(0x01), _isRanked(true), + _filter(false), _weight(100), _uniqueId(0), _fieldInfo() @@ -115,11 +116,11 @@ QueryTerm::set_element_length(uint32_t hitlist_idx, uint32_t element_length) } void -QueryTerm::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) +QueryTerm::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) { HitList list; const HitList & hit_list = evaluateHits(list); - unpack_match_data_helper(docid, td, match_data, hit_list, *this); + unpack_match_data_helper(docid, td, match_data, hit_list, *this, is_filter(), index_env); } NearestNeighborQueryNode* diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index 1d0454f3b63..e6b063231d6 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -11,6 +11,7 @@ namespace search::fef { +class IIndexEnvironment; class ITermData; class MatchData; @@ -85,6 +86,8 @@ public: void setWeight(query::Weight v) { _weight = v; } void setRanked(bool ranked) { _isRanked = ranked; } bool isRanked() const { return _isRanked; } + void set_filter(bool v) noexcept { _filter = v; } + bool is_filter() const noexcept { return _filter; } void setUniqueId(uint32_t u) { _uniqueId = u; } query::Weight weight() const { return _weight; } uint32_t uniqueId() const { return _uniqueId; } @@ -105,10 +108,10 @@ public: virtual FuzzyTerm* as_fuzzy_term() noexcept; virtual EquivQueryNode* as_equiv_query_node() noexcept; virtual const EquivQueryNode* as_equiv_query_node() const noexcept; - virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data); + virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env); protected: template - static void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term); + static void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term, bool term_filter, const fef::IIndexEnvironment& index_env); using QueryNodeResultBaseContainer = std::unique_ptr; HitList _hitList; private: @@ -116,6 +119,7 @@ private: QueryNodeResultBaseContainer _result; EncodingBitMap _encoding; bool _isRanked; + bool _filter; query::Weight _weight; uint32_t _uniqueId; std::vector _fieldInfo; diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp index dd6eff1f22b..bf10d02e6bc 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp @@ -3,6 +3,7 @@ #pragma once #include "queryterm.h" +#include #include #include #include @@ -36,7 +37,7 @@ set_interleaved_features(search::fef::TermFieldMatchData& tmd, uint32_t field_le template void -QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term) +QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term, bool term_filter, const fef::IIndexEnvironment& index_env) { (void) fl_term; if (!hit_list.empty()) { // only unpack if we have a hit @@ -45,6 +46,7 @@ QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fe uint32_t last_field_length = 0; search::fef::TermFieldMatchData *tmd = nullptr; uint32_t num_occs = 0; + bool filter = false; // optimize for hitlist giving all hits for a single field in one chunk for (const auto& hit : hit_list) { @@ -58,6 +60,8 @@ QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fe tmd = nullptr; } num_occs = 0; + auto field = index_env.getField(field_id); + filter = term_filter || (field != nullptr && field->isFilter()); // setup for new field that had a hit const search::fef::ITermFieldData *tfd = td.lookupField(field_id); @@ -77,7 +81,7 @@ QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fe } } ++num_occs; - if (tmd != nullptr) { + if (tmd != nullptr && !filter) { search::fef::TermFieldMatchDataPosition pos(hit.element_id(), hit.position(), hit.element_weight(), hit.element_length()); tmd->appendPosition(pos); diff --git a/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp index a561adf5b42..f2b11f3c5b5 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp @@ -34,8 +34,9 @@ WandTerm::evaluate() const } void -WandTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data) +WandTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment& index_env) { + (void) index_env; Scores scores; build_scores(scores); unpack_scores(scores, _score_threshold, docid, td, match_data); diff --git a/searchlib/src/vespa/searchlib/query/streaming/wand_term.h b/searchlib/src/vespa/searchlib/query/streaming/wand_term.h index 1b342834216..77041dbb256 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/wand_term.h +++ b/searchlib/src/vespa/searchlib/query/streaming/wand_term.h @@ -16,7 +16,7 @@ public: ~WandTerm() override; void set_score_threshold(double value) { _score_threshold = value; } bool evaluate() const override; - void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override; }; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp index d2d706eef3d..496ea381555 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp @@ -18,8 +18,9 @@ WeightedSetTerm::WeightedSetTerm(std::unique_ptr result_bas WeightedSetTerm::~WeightedSetTerm() = default; void -WeightedSetTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data) +WeightedSetTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment& index_env) { + (void) index_env; vespalib::hash_map> scores; HitList hl_store; for (const auto& term : _terms) { diff --git a/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.h b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.h index 3d8a5fba843..f90cdb20e05 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.h +++ b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.h @@ -13,7 +13,7 @@ class WeightedSetTerm : public MultiTerm { public: WeightedSetTerm(std::unique_ptr result_base, const string& index, uint32_t num_terms); ~WeightedSetTerm() override; - void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override; }; } diff --git a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp index a4a4b4e696f..ee9a08f3ecd 100644 --- a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp +++ b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include +#include #include #include #include @@ -12,6 +13,7 @@ using search::fef::MatchData; using search::fef::TermFieldHandle; using search::fef::TermFieldMatchData; +using search::fef::test::IndexEnvironment; using search::query::Weight; using search::query::QueryBuilder; using search::query::SimpleQueryNodeTypes; @@ -82,13 +84,14 @@ RankProcessorTest::test_unpack_match_data_for_term_node(bool interleaved_feature tfmd->setNeedInterleavedFeatures(interleaved_features); auto invalid_id = TermFieldMatchData::invalidId(); EXPECT_EQ(invalid_id, tfmd->getDocId()); - RankProcessor::unpack_match_data(1, *md, *_query_wrapper); + IndexEnvironment ie; + RankProcessor::unpack_match_data(1, *md, *_query_wrapper, ie); EXPECT_EQ(invalid_id, tfmd->getDocId()); node->add(field_id, 0, 1, 0); node->add(field_id, 0, 1, 1); auto& field_info = node->getFieldInfo(field_id); field_info.setFieldLength(mock_field_length); - RankProcessor::unpack_match_data(2, *md, *_query_wrapper); + RankProcessor::unpack_match_data(2, *md, *_query_wrapper, ie); EXPECT_EQ(2, tfmd->getDocId()); if (interleaved_features) { EXPECT_EQ(mock_num_occs, tfmd->getNumOccs()); @@ -99,7 +102,7 @@ RankProcessorTest::test_unpack_match_data_for_term_node(bool interleaved_feature } EXPECT_EQ(2, tfmd->size()); node->reset(); - RankProcessor::unpack_match_data(3, *md, *_query_wrapper); + RankProcessor::unpack_match_data(3, *md, *_query_wrapper, ie); EXPECT_EQ(2, tfmd->getDocId()); } @@ -145,15 +148,16 @@ TEST_F(RankProcessorTest, unpack_match_data_for_nearest_neighbor_query_node) auto tfmd = md->resolveTermField(handle); auto invalid_id = TermFieldMatchData::invalidId(); EXPECT_EQ(invalid_id, tfmd->getDocId()); - RankProcessor::unpack_match_data(1, *md, *_query_wrapper); + IndexEnvironment ie; + RankProcessor::unpack_match_data(1, *md, *_query_wrapper, ie); EXPECT_EQ(invalid_id, tfmd->getDocId()); constexpr double distance = 1.5; node->set_distance(distance); - RankProcessor::unpack_match_data(2, *md, *_query_wrapper); + RankProcessor::unpack_match_data(2, *md, *_query_wrapper, ie); EXPECT_EQ(2, tfmd->getDocId()); EXPECT_EQ(distance * 2, tfmd->getRawScore()); node->reset(); - RankProcessor::unpack_match_data(3, *md, *_query_wrapper); + RankProcessor::unpack_match_data(3, *md, *_query_wrapper, ie); EXPECT_EQ(2, tfmd->getDocId()); } diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp index 25c8d982f0f..726afcc959b 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp @@ -2,6 +2,7 @@ #include "indexenvironment.h" #include +#include using namespace search::fef; @@ -38,6 +39,16 @@ IndexEnvironment::addField(const vespalib::string& name, return true; } +void +IndexEnvironment::fixup_fields() +{ + for (auto& field : _fields) { + if (indexproperties::IsFilterField::check(_properties, field.name())) { + field.setFilter(true); + } + } +} + void IndexEnvironment::set_ranking_assets_repo(std::shared_ptr ranking_assets_repo) { diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h index eed38b3c922..50e6898262d 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h @@ -78,6 +78,8 @@ public: bool isAttribute, search::fef::FieldInfo::DataType data_type); + void fixup_fields(); + search::fef::Properties & getProperties() { return _properties; } void set_ranking_assets_repo(std::shared_ptr ranking_assets_repo); diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp index 7b248faaa51..3efeb8ef168 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp @@ -135,6 +135,7 @@ RankManager::Snapshot::initRankSetup(const BlueprintFactory & factory) _indexEnv.push_back(_protoEnv.current()); IndexEnvironment & ie = _indexEnv.back(); ie.getProperties().import(_properties[i].second); + ie.fixup_fields(); } // set up individual rank setups per rank profile diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index 825630f057b..2a95bf8251b 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -292,16 +292,16 @@ void RankProcessor::unpackMatchData(uint32_t docId) { _docId = docId; - unpack_match_data(docId, *_match_data, _query); + unpack_match_data(docId, *_match_data, _query, _queryEnv.getIndexEnvironment()); } void -RankProcessor::unpack_match_data(uint32_t docid, MatchData &matchData, QueryWrapper& query) +RankProcessor::unpack_match_data(uint32_t docid, MatchData &matchData, QueryWrapper& query, const search::fef::IIndexEnvironment& index_env) { for (auto& term : query.getTermList()) { auto & qtd = static_cast(term->getQueryItem()); const ITermData &td = qtd.getTermData(); - term->unpack_match_data(docid, td, matchData); + term->unpack_match_data(docid, td, matchData, index_env); } } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h index 476ed013d23..f384f7d7acf 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h @@ -74,7 +74,7 @@ public: void initForRanking(size_t wantedHitCount, bool use_sort_blob); void initForDumping(size_t wantedHitCount, bool use_sort_blob); void unpackMatchData(uint32_t docId); - static void unpack_match_data(uint32_t docid, search::fef::MatchData& matchData, QueryWrapper& query); + static void unpack_match_data(uint32_t docid, search::fef::MatchData& matchData, QueryWrapper& query, const search::fef::IIndexEnvironment& index_env); void runRankProgram(uint32_t docId); vespalib::FeatureSet::SP calculateFeatureSet(); vespalib::FeatureSet::SP calculateFeatureSet(search::DocumentIdT docId); -- cgit v1.2.3 From 1536e13b30358d63b8722ead277d8629376957f0 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 15 Mar 2024 13:41:21 +0100 Subject: Style fixes. --- searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp | 3 +-- searchlib/src/vespa/searchlib/query/streaming/in_term.cpp | 3 +-- searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp | 3 +-- searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp index c267cec5e3c..bfc66d14e34 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp @@ -51,9 +51,8 @@ DotProductTerm::unpack_scores(Scores& scores, std::optional score_thresh } void -DotProductTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment& index_env) +DotProductTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment&) { - (void) index_env; Scores scores; build_scores(scores); unpack_scores(scores, std::nullopt, docid, td, match_data); diff --git a/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp index 38a13c55730..c856d0dd466 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp @@ -21,9 +21,8 @@ InTerm::InTerm(std::unique_ptr result_base, const string & InTerm::~InTerm() = default; void -InTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment& index_env) +InTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment&) { - (void) index_env; vespalib::hash_set matching_field_ids; HitList hl_store; std::optional prev_field_id; diff --git a/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp index f2b11f3c5b5..a089e17adb3 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp @@ -34,9 +34,8 @@ WandTerm::evaluate() const } void -WandTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment& index_env) +WandTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment&) { - (void) index_env; Scores scores; build_scores(scores); unpack_scores(scores, _score_threshold, docid, td, match_data); diff --git a/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp index 496ea381555..ec652e89682 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp @@ -18,9 +18,8 @@ WeightedSetTerm::WeightedSetTerm(std::unique_ptr result_bas WeightedSetTerm::~WeightedSetTerm() = default; void -WeightedSetTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment& index_env) +WeightedSetTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data, const fef::IIndexEnvironment&) { - (void) index_env; vespalib::hash_map> scores; HitList hl_store; for (const auto& term : _terms) { -- cgit v1.2.3