diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-02-05 13:42:12 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-02-05 13:42:12 +0100 |
commit | c490ded9e1d40b68b2b167896d754459e5d9d7de (patch) | |
tree | f5e888483539d4a4298177c8d2e06f261878a6cb /streamingvisitors | |
parent | d45399e3ab1d07781f71473e4a8fe2b67b197941 (diff) |
Add unpack_match_data member function to search::streaming::QueryTerm.
Diffstat (limited to 'streamingvisitors')
4 files changed, 6 insertions, 76 deletions
diff --git a/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp b/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp index dc3dbfca7ca..70b863e540b 100644 --- a/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp +++ b/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp @@ -84,12 +84,6 @@ QueryWrapperTest::testQueryWrapper() EXPECT_TRUE( tl[2].isPhraseTerm()); EXPECT_TRUE( tl[3].isPhraseTerm()); EXPECT_TRUE(!tl[4].isPhraseTerm()); - - EXPECT_EQUAL(tl[0].getPosAdjust(), 0u); - EXPECT_EQUAL(tl[1].getPosAdjust(), 2u); - EXPECT_EQUAL(tl[2].getPosAdjust(), 2u); - EXPECT_EQUAL(tl[3].getPosAdjust(), 2u); - EXPECT_EQUAL(tl[4].getPosAdjust(), 0u); } } diff --git a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp index c9518b29884..0abff37d622 100644 --- a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp +++ b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp @@ -85,8 +85,8 @@ RankProcessorTest::test_unpack_match_data_for_term_node(bool interleaved_feature RankProcessor::unpack_match_data(1, *md, *_query_wrapper); EXPECT_EQ(invalid_id, tfmd->getDocId()); node->add(field_id, 0, 1, 0); + node->add(field_id, 0, 1, 1); auto& field_info = node->getFieldInfo(field_id); - field_info.setHitCount(mock_num_occs); field_info.setFieldLength(mock_field_length); RankProcessor::unpack_match_data(2, *md, *_query_wrapper); EXPECT_EQ(2, tfmd->getDocId()); @@ -97,7 +97,7 @@ RankProcessorTest::test_unpack_match_data_for_term_node(bool interleaved_feature EXPECT_EQ(0, tfmd->getNumOccs()); EXPECT_EQ(0, tfmd->getFieldLength()); } - EXPECT_EQ(1, tfmd->size()); + EXPECT_EQ(2, tfmd->size()); node->reset(); RankProcessor::unpack_match_data(3, *md, *_query_wrapper); EXPECT_EQ(2, tfmd->getDocId()); diff --git a/streamingvisitors/src/vespa/searchvisitor/querywrapper.h b/streamingvisitors/src/vespa/searchvisitor/querywrapper.h index b24f695196e..420ff215833 100644 --- a/streamingvisitors/src/vespa/searchvisitor/querywrapper.h +++ b/streamingvisitors/src/vespa/searchvisitor/querywrapper.h @@ -47,7 +47,6 @@ public: size_t getIndex() const { return _index; } bool isPhraseTerm() const { return _parent != nullptr; } bool isFirstPhraseTerm() const { return isPhraseTerm() && getIndex() == 0; } - size_t getPosAdjust() const { return _parent != nullptr ? _parent->width() - 1 : 0; } bool isGeoPosTerm() const { return (_term != nullptr) && _term->isGeoLoc(); } }; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index 96e8ca89a04..09699f79427 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -53,11 +53,6 @@ getFeature(const RankProgram &rankProgram) { return resolver.resolve(0); } -uint16_t -cap_16_bits(uint32_t value) { - return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max())); -} - } void @@ -285,68 +280,10 @@ RankProcessor::unpack_match_data(uint32_t docid, MatchData &matchData, QueryWrap QueryTermData & qtd = static_cast<QueryTermData &>(term.getTerm()->getQueryItem()); const ITermData &td = qtd.getTermData(); - HitList list; - const HitList & hitList = isPhrase - ? term.getParent()->evaluateHits(list) - : term.getTerm()->evaluateHits(list); - - if (hitList.size() > 0) { // only unpack if we have a hit - LOG(debug, "Unpack match data for query term '%s:%s' (%s)", - term.getTerm()->index().c_str(), term.getTerm()->getTerm(), isPhrase ? "phrase" : "term"); - - uint32_t lastFieldId = -1; - TermFieldMatchData *tmd = nullptr; - uint32_t fieldLen = search::fef::FieldPositionsIterator::UNKNOWN_LENGTH; - uint32_t num_occs = 0; - - // optimize for hitlist giving all hits for a single field in one chunk - for (const Hit & hit : hitList) { - uint32_t fieldId = hit.field_id(); - if (fieldId != lastFieldId) { - // reset to notfound/unknown values - tmd = nullptr; - fieldLen = search::fef::FieldPositionsIterator::UNKNOWN_LENGTH; - num_occs = 0; - - // setup for new field that had a hit - const ITermFieldData *tfd = td.lookupField(fieldId); - if (tfd != nullptr) { - tmd = matchData.resolveTermField(tfd->getHandle()); - tmd->setFieldId(fieldId); - // reset field match data, but only once per docId - if (tmd->getDocId() != docid) { - tmd->reset(docid); - } - } - // find fieldLen for new field - if (isPhrase) { - if (fieldId < term.getParent()->getFieldInfoSize()) { - auto& field_info = term.getParent()->getFieldInfo(fieldId); - fieldLen = field_info.getFieldLength(); - num_occs = field_info.getHitCount(); - } - } else { - if (fieldId < term.getTerm()->getFieldInfoSize()) { - auto& field_info = term.getTerm()->getFieldInfo(fieldId); - fieldLen = field_info.getFieldLength(); - num_occs = field_info.getHitCount(); - } - } - lastFieldId = fieldId; - } - if (tmd != nullptr) { - // adjust so that the position for phrase terms equals the match for the first term - TermFieldMatchDataPosition pos(hit.element_id(), hit.position() - term.getPosAdjust(), - hit.element_weight(), hit.element_length()); - tmd->appendPosition(pos); - LOG(debug, "Append elemId(%u),position(%u), weight(%d), tfmd.weight(%d)", - pos.getElementId(), pos.getPosition(), pos.getElementWeight(), tmd->getWeight()); - if (tmd->needs_interleaved_features()) { - tmd->setFieldLength(cap_16_bits(fieldLen)); - tmd->setNumOccs(cap_16_bits(num_occs)); - } - } - } + if (isPhrase) { + term.getParent()->unpack_match_data(docid, td, matchData); + } else { + term.getTerm()->unpack_match_data(docid, td, matchData); } } } |