diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-02-09 17:01:32 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-02-09 17:01:32 +0100 |
commit | e8d5226c0e499db017eef46cfeea20bbe8b11133 (patch) | |
tree | c89c14215e504395c9bcd25b43852b1e4bca4b73 /searchlib | |
parent | 332bdd44a075c16418b49ddfe66965e5a46e2e8c (diff) |
Reduce code duplication.
Diffstat (limited to 'searchlib')
5 files changed, 107 insertions, 154 deletions
diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp index 8a39830799b..939afec0463 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp @@ -2,10 +2,7 @@ #include "equiv_query_node.h" #include "phrase_query_node.h" -#include <vespa/searchlib/fef/itermdata.h> -#include <vespa/searchlib/fef/matchdata.h> -#include <algorithm> -#include <cassert> +#include "queryterm.hpp" using search::fef::TermFieldMatchData; using search::fef::TermFieldMatchDataPosition; @@ -27,25 +24,6 @@ public: uint32_t get_field_length() const noexcept { return _field_length; } }; -uint16_t -cap_16_bits(uint32_t value) -{ - return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max())); -} - -uint32_t -extract_field_length(const QueryTerm& term, uint32_t field_id) -{ - return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH; -} - -void -set_interleaved_features(TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs) -{ - tmd.setFieldLength(cap_16_bits(field_length)); - tmd.setNumOccs(cap_16_bits(num_occs)); -} - template <typename HitType> void merge_hits_from_children(std::vector<HitType>& hl, const MultiTerm& mt) { @@ -98,54 +76,9 @@ EquivQueryNode::evaluateHits(HitList & hl) const void EquivQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) { - std::vector<HitWithFieldLength> hitList; - merge_hits_from_children(hitList, *this); - - if (!hitList.empty()) { // only unpack if we have a hit - uint32_t lastFieldId = -1; - uint32_t last_field_length = 0; - TermFieldMatchData *tmd = nullptr; - uint32_t num_occs = 0; - - // optimize for hitlist giving all hits for a single field in one chunk - for (auto& hit : hitList) { - uint32_t fieldId = hit.field_id(); - if (fieldId != lastFieldId) { - if (tmd != nullptr) { - if (tmd->needs_interleaved_features()) { - set_interleaved_features(*tmd, last_field_length, num_occs); - } - // reset to notfound/unknown values - tmd = nullptr; - } - num_occs = 0; - - // setup for new field that had a hit - const ITermFieldData *tfd = td.lookupField(fieldId); - if (tfd != nullptr) { - tmd = match_data.resolveTermField(tfd->getHandle()); - tmd->setFieldId(fieldId); - // reset field match data, but only once per docId - if (tmd->getDocId() != docid) { - tmd->reset(docid); - } - } - lastFieldId = fieldId; - last_field_length = hit.get_field_length(); - } - ++num_occs; - if (tmd != nullptr) { - TermFieldMatchDataPosition pos(hit.element_id(), hit.position(), - hit.element_weight(), hit.element_length()); - tmd->appendPosition(pos); - } - } - if (tmd != nullptr) { - if (tmd->needs_interleaved_features()) { - set_interleaved_features(*tmd, last_field_length, num_occs); - } - } - } + std::vector<HitWithFieldLength> hit_list; + merge_hits_from_children(hit_list, *this); + unpack_match_data_helper(docid, td, match_data, hit_list, *this); } EquivQueryNode* diff --git a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp index 9cd8d41d33d..b090ca13225 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp @@ -69,7 +69,9 @@ PhraseQueryNode::evaluateHits(HitList & hl) const void PhraseQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) { - unpack_match_data_helper(docid, td, match_data, *get_terms().front()); + HitList list; + const HitList & hit_list = evaluateHits(list); + unpack_match_data_helper(docid, td, match_data, hit_list, *get_terms().front()); } } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index 920d4bc59d7..0d0f5a7c4ad 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -1,6 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "queryterm.h" +#include "queryterm.hpp" #include <vespa/searchlib/fef/itermdata.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/vespalib/objects/visit.h> @@ -113,89 +113,12 @@ QueryTerm::set_element_length(uint32_t hitlist_idx, uint32_t element_length) _hitList[hitlist_idx].set_element_length(element_length); } -namespace { - -uint16_t -cap_16_bits(uint32_t value) -{ - return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max())); -} - -uint32_t -extract_field_length(const QueryTerm& term, uint32_t field_id) -{ - return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH; -} - -void -set_interleaved_features(TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs) -{ - tmd.setFieldLength(cap_16_bits(field_length)); - tmd.setNumOccs(cap_16_bits(num_occs)); -} - -} - -void -QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const -{ - HitList list; - const HitList & hitList = evaluateHits(list); - - if (!hitList.empty()) { // only unpack if we have a hit - LOG(debug, "Unpack match data for query term '%s:%s'", - index().c_str(), getTerm()); - - uint32_t lastFieldId = -1; - TermFieldMatchData *tmd = nullptr; - uint32_t num_occs = 0; - - // optimize for hitlist giving all hits for a single field in one chunk - for (const Hit & hit : hitList) { - uint32_t fieldId = hit.field_id(); - if (fieldId != lastFieldId) { - if (tmd != nullptr) { - if (tmd->needs_interleaved_features()) { - set_interleaved_features(*tmd, extract_field_length(fl_term, lastFieldId), num_occs); - } - // reset to notfound/unknown values - tmd = nullptr; - } - num_occs = 0; - - // setup for new field that had a hit - const ITermFieldData *tfd = td.lookupField(fieldId); - if (tfd != nullptr) { - tmd = match_data.resolveTermField(tfd->getHandle()); - tmd->setFieldId(fieldId); - // reset field match data, but only once per docId - if (tmd->getDocId() != docid) { - tmd->reset(docid); - } - } - lastFieldId = fieldId; - } - ++num_occs; - if (tmd != nullptr) { - TermFieldMatchDataPosition pos(hit.element_id(), hit.position(), - hit.element_weight(), hit.element_length()); - tmd->appendPosition(pos); - LOG(debug, "Append elemId(%u),position(%u), weight(%d), tfmd.weight(%d)", - pos.getElementId(), pos.getPosition(), pos.getElementWeight(), tmd->getWeight()); - } - } - if (tmd != nullptr) { - if (tmd->needs_interleaved_features()) { - set_interleaved_features(*tmd, extract_field_length(fl_term, lastFieldId), num_occs); - } - } - } -} - void QueryTerm::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) { - unpack_match_data_helper(docid, td, match_data, *this); + HitList list; + const HitList & hit_list = evaluateHits(list); + unpack_match_data_helper(docid, td, match_data, hit_list, *this); } NearestNeighborQueryNode* diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index 3f45a99e805..2cb4f2d2ebb 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -105,7 +105,8 @@ public: virtual const EquivQueryNode* as_equiv_query_node() const noexcept; virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data); protected: - void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const; + template <typename HitListType> + static void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term); using QueryNodeResultBaseContainer = std::unique_ptr<QueryNodeResultBase>; string _index; EncodingBitMap _encoding; diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp new file mode 100644 index 00000000000..dd6eff1f22b --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp @@ -0,0 +1,94 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "queryterm.h" +#include <vespa/searchlib/fef/itermdata.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <algorithm> +#include <limits> + + +namespace search::streaming { + +namespace { + +uint16_t +cap_16_bits(uint32_t value) +{ + return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max())); +} + +uint32_t +extract_field_length(const QueryTerm& term, uint32_t field_id) +{ + return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH; +} + +void +set_interleaved_features(search::fef::TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs) +{ + tmd.setFieldLength(cap_16_bits(field_length)); + tmd.setNumOccs(cap_16_bits(num_occs)); +} + +} + +template <typename HitListType> +void +QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term) +{ + (void) fl_term; + if (!hit_list.empty()) { // only unpack if we have a hit + + uint32_t last_field_id = -1; + uint32_t last_field_length = 0; + search::fef::TermFieldMatchData *tmd = nullptr; + uint32_t num_occs = 0; + + // optimize for hitlist giving all hits for a single field in one chunk + for (const auto& hit : hit_list) { + uint32_t field_id = hit.field_id(); + if (field_id != last_field_id) { + if (tmd != nullptr) { + if (tmd->needs_interleaved_features()) { + set_interleaved_features(*tmd, last_field_length, num_occs); + } + // reset to notfound/unknown values + tmd = nullptr; + } + num_occs = 0; + + // setup for new field that had a hit + const search::fef::ITermFieldData *tfd = td.lookupField(field_id); + if (tfd != nullptr) { + tmd = match_data.resolveTermField(tfd->getHandle()); + tmd->setFieldId(field_id); + // reset field match data, but only once per docId + if (tmd->getDocId() != docid) { + tmd->reset(docid); + } + } + last_field_id = field_id; + if constexpr (std::is_same_v<HitList, HitListType>) { + last_field_length = extract_field_length(fl_term, field_id); + } else { + last_field_length = hit.get_field_length(); + } + } + ++num_occs; + if (tmd != nullptr) { + search::fef::TermFieldMatchDataPosition pos(hit.element_id(), hit.position(), + hit.element_weight(), hit.element_length()); + tmd->appendPosition(pos); + } + } + if (tmd != nullptr) { + if (tmd->needs_interleaved_features()) { + set_interleaved_features(*tmd, last_field_length, num_occs); + } + } + } +} + +} |