summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2024-02-09 17:01:32 +0100
committerTor Egge <Tor.Egge@online.no>2024-02-09 17:01:32 +0100
commite8d5226c0e499db017eef46cfeea20bbe8b11133 (patch)
treec89c14215e504395c9bcd25b43852b1e4bca4b73 /searchlib
parent332bdd44a075c16418b49ddfe66965e5a46e2e8c (diff)
Reduce code duplication.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp75
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp85
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.h3
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp94
5 files changed, 107 insertions, 154 deletions
diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp
index 8a39830799b..939afec0463 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp
@@ -2,10 +2,7 @@
#include "equiv_query_node.h"
#include "phrase_query_node.h"
-#include <vespa/searchlib/fef/itermdata.h>
-#include <vespa/searchlib/fef/matchdata.h>
-#include <algorithm>
-#include <cassert>
+#include "queryterm.hpp"
using search::fef::TermFieldMatchData;
using search::fef::TermFieldMatchDataPosition;
@@ -27,25 +24,6 @@ public:
uint32_t get_field_length() const noexcept { return _field_length; }
};
-uint16_t
-cap_16_bits(uint32_t value)
-{
- return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()));
-}
-
-uint32_t
-extract_field_length(const QueryTerm& term, uint32_t field_id)
-{
- return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
-}
-
-void
-set_interleaved_features(TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs)
-{
- tmd.setFieldLength(cap_16_bits(field_length));
- tmd.setNumOccs(cap_16_bits(num_occs));
-}
-
template <typename HitType>
void merge_hits_from_children(std::vector<HitType>& hl, const MultiTerm& mt)
{
@@ -98,54 +76,9 @@ EquivQueryNode::evaluateHits(HitList & hl) const
void
EquivQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data)
{
- std::vector<HitWithFieldLength> hitList;
- merge_hits_from_children(hitList, *this);
-
- if (!hitList.empty()) { // only unpack if we have a hit
- uint32_t lastFieldId = -1;
- uint32_t last_field_length = 0;
- TermFieldMatchData *tmd = nullptr;
- uint32_t num_occs = 0;
-
- // optimize for hitlist giving all hits for a single field in one chunk
- for (auto& hit : hitList) {
- uint32_t fieldId = hit.field_id();
- if (fieldId != lastFieldId) {
- if (tmd != nullptr) {
- if (tmd->needs_interleaved_features()) {
- set_interleaved_features(*tmd, last_field_length, num_occs);
- }
- // reset to notfound/unknown values
- tmd = nullptr;
- }
- num_occs = 0;
-
- // setup for new field that had a hit
- const ITermFieldData *tfd = td.lookupField(fieldId);
- if (tfd != nullptr) {
- tmd = match_data.resolveTermField(tfd->getHandle());
- tmd->setFieldId(fieldId);
- // reset field match data, but only once per docId
- if (tmd->getDocId() != docid) {
- tmd->reset(docid);
- }
- }
- lastFieldId = fieldId;
- last_field_length = hit.get_field_length();
- }
- ++num_occs;
- if (tmd != nullptr) {
- TermFieldMatchDataPosition pos(hit.element_id(), hit.position(),
- hit.element_weight(), hit.element_length());
- tmd->appendPosition(pos);
- }
- }
- if (tmd != nullptr) {
- if (tmd->needs_interleaved_features()) {
- set_interleaved_features(*tmd, last_field_length, num_occs);
- }
- }
- }
+ std::vector<HitWithFieldLength> hit_list;
+ merge_hits_from_children(hit_list, *this);
+ unpack_match_data_helper(docid, td, match_data, hit_list, *this);
}
EquivQueryNode*
diff --git a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp
index 9cd8d41d33d..b090ca13225 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp
@@ -69,7 +69,9 @@ PhraseQueryNode::evaluateHits(HitList & hl) const
void
PhraseQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data)
{
- unpack_match_data_helper(docid, td, match_data, *get_terms().front());
+ HitList list;
+ const HitList & hit_list = evaluateHits(list);
+ unpack_match_data_helper(docid, td, match_data, hit_list, *get_terms().front());
}
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
index 920d4bc59d7..0d0f5a7c4ad 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
@@ -1,6 +1,6 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "queryterm.h"
+#include "queryterm.hpp"
#include <vespa/searchlib/fef/itermdata.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/vespalib/objects/visit.h>
@@ -113,89 +113,12 @@ QueryTerm::set_element_length(uint32_t hitlist_idx, uint32_t element_length)
_hitList[hitlist_idx].set_element_length(element_length);
}
-namespace {
-
-uint16_t
-cap_16_bits(uint32_t value)
-{
- return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()));
-}
-
-uint32_t
-extract_field_length(const QueryTerm& term, uint32_t field_id)
-{
- return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
-}
-
-void
-set_interleaved_features(TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs)
-{
- tmd.setFieldLength(cap_16_bits(field_length));
- tmd.setNumOccs(cap_16_bits(num_occs));
-}
-
-}
-
-void
-QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const
-{
- HitList list;
- const HitList & hitList = evaluateHits(list);
-
- if (!hitList.empty()) { // only unpack if we have a hit
- LOG(debug, "Unpack match data for query term '%s:%s'",
- index().c_str(), getTerm());
-
- uint32_t lastFieldId = -1;
- TermFieldMatchData *tmd = nullptr;
- uint32_t num_occs = 0;
-
- // optimize for hitlist giving all hits for a single field in one chunk
- for (const Hit & hit : hitList) {
- uint32_t fieldId = hit.field_id();
- if (fieldId != lastFieldId) {
- if (tmd != nullptr) {
- if (tmd->needs_interleaved_features()) {
- set_interleaved_features(*tmd, extract_field_length(fl_term, lastFieldId), num_occs);
- }
- // reset to notfound/unknown values
- tmd = nullptr;
- }
- num_occs = 0;
-
- // setup for new field that had a hit
- const ITermFieldData *tfd = td.lookupField(fieldId);
- if (tfd != nullptr) {
- tmd = match_data.resolveTermField(tfd->getHandle());
- tmd->setFieldId(fieldId);
- // reset field match data, but only once per docId
- if (tmd->getDocId() != docid) {
- tmd->reset(docid);
- }
- }
- lastFieldId = fieldId;
- }
- ++num_occs;
- if (tmd != nullptr) {
- TermFieldMatchDataPosition pos(hit.element_id(), hit.position(),
- hit.element_weight(), hit.element_length());
- tmd->appendPosition(pos);
- LOG(debug, "Append elemId(%u),position(%u), weight(%d), tfmd.weight(%d)",
- pos.getElementId(), pos.getPosition(), pos.getElementWeight(), tmd->getWeight());
- }
- }
- if (tmd != nullptr) {
- if (tmd->needs_interleaved_features()) {
- set_interleaved_features(*tmd, extract_field_length(fl_term, lastFieldId), num_occs);
- }
- }
- }
-}
-
void
QueryTerm::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data)
{
- unpack_match_data_helper(docid, td, match_data, *this);
+ HitList list;
+ const HitList & hit_list = evaluateHits(list);
+ unpack_match_data_helper(docid, td, match_data, hit_list, *this);
}
NearestNeighborQueryNode*
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
index 3f45a99e805..2cb4f2d2ebb 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
@@ -105,7 +105,8 @@ public:
virtual const EquivQueryNode* as_equiv_query_node() const noexcept;
virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data);
protected:
- void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const;
+ template <typename HitListType>
+ static void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term);
using QueryNodeResultBaseContainer = std::unique_ptr<QueryNodeResultBase>;
string _index;
EncodingBitMap _encoding;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp
new file mode 100644
index 00000000000..dd6eff1f22b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp
@@ -0,0 +1,94 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "queryterm.h"
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <algorithm>
+#include <limits>
+
+
+namespace search::streaming {
+
+namespace {
+
+uint16_t
+cap_16_bits(uint32_t value)
+{
+ return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()));
+}
+
+uint32_t
+extract_field_length(const QueryTerm& term, uint32_t field_id)
+{
+ return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
+}
+
+void
+set_interleaved_features(search::fef::TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs)
+{
+ tmd.setFieldLength(cap_16_bits(field_length));
+ tmd.setNumOccs(cap_16_bits(num_occs));
+}
+
+}
+
+template <typename HitListType>
+void
+QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term)
+{
+ (void) fl_term;
+ if (!hit_list.empty()) { // only unpack if we have a hit
+
+ uint32_t last_field_id = -1;
+ uint32_t last_field_length = 0;
+ search::fef::TermFieldMatchData *tmd = nullptr;
+ uint32_t num_occs = 0;
+
+ // optimize for hitlist giving all hits for a single field in one chunk
+ for (const auto& hit : hit_list) {
+ uint32_t field_id = hit.field_id();
+ if (field_id != last_field_id) {
+ if (tmd != nullptr) {
+ if (tmd->needs_interleaved_features()) {
+ set_interleaved_features(*tmd, last_field_length, num_occs);
+ }
+ // reset to notfound/unknown values
+ tmd = nullptr;
+ }
+ num_occs = 0;
+
+ // setup for new field that had a hit
+ const search::fef::ITermFieldData *tfd = td.lookupField(field_id);
+ if (tfd != nullptr) {
+ tmd = match_data.resolveTermField(tfd->getHandle());
+ tmd->setFieldId(field_id);
+ // reset field match data, but only once per docId
+ if (tmd->getDocId() != docid) {
+ tmd->reset(docid);
+ }
+ }
+ last_field_id = field_id;
+ if constexpr (std::is_same_v<HitList, HitListType>) {
+ last_field_length = extract_field_length(fl_term, field_id);
+ } else {
+ last_field_length = hit.get_field_length();
+ }
+ }
+ ++num_occs;
+ if (tmd != nullptr) {
+ search::fef::TermFieldMatchDataPosition pos(hit.element_id(), hit.position(),
+ hit.element_weight(), hit.element_length());
+ tmd->appendPosition(pos);
+ }
+ }
+ if (tmd != nullptr) {
+ if (tmd->needs_interleaved_features()) {
+ set_interleaved_features(*tmd, last_field_length, num_occs);
+ }
+ }
+ }
+}
+
+}