aboutsummaryrefslogtreecommitdiffstats
path: root/streamingvisitors
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2024-02-05 13:42:12 +0100
committerTor Egge <Tor.Egge@online.no>2024-02-05 13:42:12 +0100
commitc490ded9e1d40b68b2b167896d754459e5d9d7de (patch)
treef5e888483539d4a4298177c8d2e06f261878a6cb /streamingvisitors
parentd45399e3ab1d07781f71473e4a8fe2b67b197941 (diff)
Add unpack_match_data member function to search::streaming::QueryTerm.
Diffstat (limited to 'streamingvisitors')
-rw-r--r--streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp6
-rw-r--r--streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp4
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/querywrapper.h1
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp71
4 files changed, 6 insertions, 76 deletions
diff --git a/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp b/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp
index dc3dbfca7ca..70b863e540b 100644
--- a/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp
+++ b/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp
@@ -84,12 +84,6 @@ QueryWrapperTest::testQueryWrapper()
EXPECT_TRUE( tl[2].isPhraseTerm());
EXPECT_TRUE( tl[3].isPhraseTerm());
EXPECT_TRUE(!tl[4].isPhraseTerm());
-
- EXPECT_EQUAL(tl[0].getPosAdjust(), 0u);
- EXPECT_EQUAL(tl[1].getPosAdjust(), 2u);
- EXPECT_EQUAL(tl[2].getPosAdjust(), 2u);
- EXPECT_EQUAL(tl[3].getPosAdjust(), 2u);
- EXPECT_EQUAL(tl[4].getPosAdjust(), 0u);
}
}
diff --git a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp
index c9518b29884..0abff37d622 100644
--- a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp
+++ b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp
@@ -85,8 +85,8 @@ RankProcessorTest::test_unpack_match_data_for_term_node(bool interleaved_feature
RankProcessor::unpack_match_data(1, *md, *_query_wrapper);
EXPECT_EQ(invalid_id, tfmd->getDocId());
node->add(field_id, 0, 1, 0);
+ node->add(field_id, 0, 1, 1);
auto& field_info = node->getFieldInfo(field_id);
- field_info.setHitCount(mock_num_occs);
field_info.setFieldLength(mock_field_length);
RankProcessor::unpack_match_data(2, *md, *_query_wrapper);
EXPECT_EQ(2, tfmd->getDocId());
@@ -97,7 +97,7 @@ RankProcessorTest::test_unpack_match_data_for_term_node(bool interleaved_feature
EXPECT_EQ(0, tfmd->getNumOccs());
EXPECT_EQ(0, tfmd->getFieldLength());
}
- EXPECT_EQ(1, tfmd->size());
+ EXPECT_EQ(2, tfmd->size());
node->reset();
RankProcessor::unpack_match_data(3, *md, *_query_wrapper);
EXPECT_EQ(2, tfmd->getDocId());
diff --git a/streamingvisitors/src/vespa/searchvisitor/querywrapper.h b/streamingvisitors/src/vespa/searchvisitor/querywrapper.h
index b24f695196e..420ff215833 100644
--- a/streamingvisitors/src/vespa/searchvisitor/querywrapper.h
+++ b/streamingvisitors/src/vespa/searchvisitor/querywrapper.h
@@ -47,7 +47,6 @@ public:
size_t getIndex() const { return _index; }
bool isPhraseTerm() const { return _parent != nullptr; }
bool isFirstPhraseTerm() const { return isPhraseTerm() && getIndex() == 0; }
- size_t getPosAdjust() const { return _parent != nullptr ? _parent->width() - 1 : 0; }
bool isGeoPosTerm() const { return (_term != nullptr) && _term->isGeoLoc(); }
};
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
index 96e8ca89a04..09699f79427 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
@@ -53,11 +53,6 @@ getFeature(const RankProgram &rankProgram) {
return resolver.resolve(0);
}
-uint16_t
-cap_16_bits(uint32_t value) {
- return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()));
-}
-
}
void
@@ -285,68 +280,10 @@ RankProcessor::unpack_match_data(uint32_t docid, MatchData &matchData, QueryWrap
QueryTermData & qtd = static_cast<QueryTermData &>(term.getTerm()->getQueryItem());
const ITermData &td = qtd.getTermData();
- HitList list;
- const HitList & hitList = isPhrase
- ? term.getParent()->evaluateHits(list)
- : term.getTerm()->evaluateHits(list);
-
- if (hitList.size() > 0) { // only unpack if we have a hit
- LOG(debug, "Unpack match data for query term '%s:%s' (%s)",
- term.getTerm()->index().c_str(), term.getTerm()->getTerm(), isPhrase ? "phrase" : "term");
-
- uint32_t lastFieldId = -1;
- TermFieldMatchData *tmd = nullptr;
- uint32_t fieldLen = search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
- uint32_t num_occs = 0;
-
- // optimize for hitlist giving all hits for a single field in one chunk
- for (const Hit & hit : hitList) {
- uint32_t fieldId = hit.field_id();
- if (fieldId != lastFieldId) {
- // reset to notfound/unknown values
- tmd = nullptr;
- fieldLen = search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
- num_occs = 0;
-
- // setup for new field that had a hit
- const ITermFieldData *tfd = td.lookupField(fieldId);
- if (tfd != nullptr) {
- tmd = matchData.resolveTermField(tfd->getHandle());
- tmd->setFieldId(fieldId);
- // reset field match data, but only once per docId
- if (tmd->getDocId() != docid) {
- tmd->reset(docid);
- }
- }
- // find fieldLen for new field
- if (isPhrase) {
- if (fieldId < term.getParent()->getFieldInfoSize()) {
- auto& field_info = term.getParent()->getFieldInfo(fieldId);
- fieldLen = field_info.getFieldLength();
- num_occs = field_info.getHitCount();
- }
- } else {
- if (fieldId < term.getTerm()->getFieldInfoSize()) {
- auto& field_info = term.getTerm()->getFieldInfo(fieldId);
- fieldLen = field_info.getFieldLength();
- num_occs = field_info.getHitCount();
- }
- }
- lastFieldId = fieldId;
- }
- if (tmd != nullptr) {
- // adjust so that the position for phrase terms equals the match for the first term
- TermFieldMatchDataPosition pos(hit.element_id(), hit.position() - term.getPosAdjust(),
- hit.element_weight(), hit.element_length());
- tmd->appendPosition(pos);
- LOG(debug, "Append elemId(%u),position(%u), weight(%d), tfmd.weight(%d)",
- pos.getElementId(), pos.getPosition(), pos.getElementWeight(), tmd->getWeight());
- if (tmd->needs_interleaved_features()) {
- tmd->setFieldLength(cap_16_bits(fieldLen));
- tmd->setNumOccs(cap_16_bits(num_occs));
- }
- }
- }
+ if (isPhrase) {
+ term.getParent()->unpack_match_data(docid, td, matchData);
+ } else {
+ term.getTerm()->unpack_match_data(docid, td, matchData);
}
}
}