diff options
11 files changed, 118 insertions, 119 deletions
diff --git a/searchlib/src/tests/query/streaming/phrase_query_node_test.cpp b/searchlib/src/tests/query/streaming/phrase_query_node_test.cpp index ff4734a3846..2459fe2f01c 100644 --- a/searchlib/src/tests/query/streaming/phrase_query_node_test.cpp +++ b/searchlib/src/tests/query/streaming/phrase_query_node_test.cpp @@ -74,22 +74,13 @@ TEST(PhraseQueryNodeTest, test_phrase_evaluate) ASSERT_EQ(3u, hits.size()); EXPECT_EQ(0u, hits[0].field_id()); EXPECT_EQ(0u, hits[0].element_id()); - EXPECT_EQ(2u, hits[0].position()); + EXPECT_EQ(0u, hits[0].position()); EXPECT_EQ(1u, hits[1].field_id()); EXPECT_EQ(0u, hits[1].element_id()); - EXPECT_EQ(6u, hits[1].position()); + EXPECT_EQ(4u, hits[1].position()); EXPECT_EQ(3u, hits[2].field_id()); EXPECT_EQ(0u, hits[2].element_id()); - EXPECT_EQ(2u, hits[2].position()); - ASSERT_EQ(4u, p->getFieldInfoSize()); - EXPECT_EQ(0u, p->getFieldInfo(0).getHitOffset()); - EXPECT_EQ(1u, p->getFieldInfo(0).getHitCount()); - EXPECT_EQ(1u, p->getFieldInfo(1).getHitOffset()); - EXPECT_EQ(1u, p->getFieldInfo(1).getHitCount()); - EXPECT_EQ(0u, p->getFieldInfo(2).getHitOffset()); // invalid, but will never be used - EXPECT_EQ(0u, p->getFieldInfo(2).getHitCount()); - EXPECT_EQ(2u, p->getFieldInfo(3).getHitOffset()); - EXPECT_EQ(1u, p->getFieldInfo(3).getHitCount()); + EXPECT_EQ(0u, hits[2].position()); EXPECT_TRUE(p->evaluate()); } diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.h b/searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.h index ad9d15f719a..b2cdb422cc3 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.h +++ b/searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.h @@ -23,7 +23,6 @@ public: ~HitIteratorPack(); FieldElement& get_field_element_ref() noexcept { return _field_element; } HitIterator& front() noexcept { return _iterators.front(); } - HitIterator& back() noexcept { return _iterators.back(); } iterator begin() noexcept { return _iterators.begin(); } iterator end() noexcept { return _iterators.end(); } bool all_valid() const noexcept; diff --git a/searchlib/src/vespa/searchlib/query/streaming/multi_term.h b/searchlib/src/vespa/searchlib/query/streaming/multi_term.h index 6f795c31356..9bf5f8de6b2 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/multi_term.h +++ b/searchlib/src/vespa/searchlib/query/streaming/multi_term.h @@ -32,7 +32,6 @@ public: MultiTerm* as_multi_term() noexcept override { return this; } void reset() override; bool evaluate() const override; - virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) = 0; const std::vector<std::unique_ptr<QueryTerm>>& get_terms() const noexcept { return _terms; } }; diff --git a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp index 0020089ef62..95781b58019 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp @@ -7,8 +7,7 @@ namespace search::streaming { PhraseQueryNode::PhraseQueryNode(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, uint32_t num_terms) - : MultiTerm(std::move(result_base), index, num_terms), - _fieldInfo(32) + : MultiTerm(std::move(result_base), index, num_terms) { } @@ -65,13 +64,11 @@ const HitList & PhraseQueryNode::evaluateHits(HitList & hl) const { hl.clear(); - _fieldInfo.clear(); auto& terms = get_terms(); HitIteratorPack itr_pack(terms); if (!itr_pack.all_valid()) { return hl; } - auto& last_child = dynamic_cast<const QueryTerm&>(*terms.back()); while (itr_pack.seek_to_matching_field_element()) { uint32_t first_position = itr_pack.front()->position(); bool retry_element = true; @@ -92,10 +89,8 @@ PhraseQueryNode::evaluateHits(HitList & hl) const ++position_offset; } if (match) { - auto h = *itr_pack.back(); + auto h = *itr_pack.front(); hl.push_back(h); - auto& fi = last_child.getFieldInfo(h.field_id()); - updateFieldInfo(h.field_id(), hl.size() - 1, fi.getFieldLength()); if (!itr_pack.front().step_in_field_element(itr_pack.get_field_element_ref())) { retry_element = false; } @@ -106,25 +101,9 @@ PhraseQueryNode::evaluateHits(HitList & hl) const } void -PhraseQueryNode::updateFieldInfo(size_t fid, size_t offset, size_t fieldLength) const -{ - if (fid >= _fieldInfo.size()) { - _fieldInfo.resize(fid + 1); - // only set hit offset and field length the first time - QueryTerm::FieldInfo & fi = _fieldInfo[fid]; - fi.setHitOffset(offset); - fi.setFieldLength(fieldLength); - } - QueryTerm::FieldInfo & fi = _fieldInfo[fid]; - fi.setHitCount(fi.getHitCount() + 1); -} - -void PhraseQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) { - (void) docid; - (void) td; - (void) match_data; + unpack_match_data_helper(docid, td, match_data, *get_terms().front()); } } diff --git a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.h index 594eab3deba..763cb1d6b8f 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.h +++ b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.h @@ -19,16 +19,11 @@ public: const HitList & evaluateHits(HitList & hl) const override; void getPhrases(QueryNodeRefList & tl) override; void getPhrases(ConstQueryNodeRefList & tl) const override; - const QueryTerm::FieldInfo & getFieldInfo(size_t fid) const { return _fieldInfo[fid]; } - size_t getFieldInfoSize() const { return _fieldInfo.size(); } void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; void getLeaves(QueryTermList & tl) override; void getLeaves(ConstQueryTermList & tl) const override; size_t width() const override; MultiTerm* as_multi_term() noexcept override; -private: - mutable std::vector<QueryTerm::FieldInfo> _fieldInfo; - void updateFieldInfo(size_t fid, size_t offset, size_t fieldLength) const; }; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index b7e619cfe4c..e5e1473dd3c 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -2,8 +2,21 @@ #include "queryterm.h" #include <vespa/fastlib/text/normwordfolder.h> +#include <vespa/searchlib/fef/itermdata.h> +#include <vespa/searchlib/fef/matchdata.h> #include <vespa/vespalib/objects/visit.h> +#include <algorithm> #include <cmath> +#include <limits> + +#include <vespa/log/log.h> +LOG_SETUP(".searchlib.query.streaming.queryterm"); + +using search::fef::ITermData; +using search::fef::ITermFieldData; +using search::fef::MatchData; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataPosition; namespace { @@ -176,6 +189,91 @@ QueryTerm::set_element_length(uint32_t hitlist_idx, uint32_t element_length) _hitList[hitlist_idx].set_element_length(element_length); } +namespace { + +uint16_t +cap_16_bits(uint32_t value) +{ + return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max())); +} + +uint32_t +extract_field_length(const QueryTerm& term, uint32_t field_id) +{ + return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH; +} + +void +set_interleaved_features(TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs) +{ + tmd.setFieldLength(cap_16_bits(field_length)); + tmd.setNumOccs(cap_16_bits(num_occs)); +} + +} + +void +QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const +{ + HitList list; + const HitList & hitList = evaluateHits(list); + + if (!hitList.empty()) { // only unpack if we have a hit + LOG(debug, "Unpack match data for query term '%s:%s'", + index().c_str(), getTerm()); + + uint32_t lastFieldId = -1; + TermFieldMatchData *tmd = nullptr; + uint32_t num_occs = 0; + + // optimize for hitlist giving all hits for a single field in one chunk + for (const Hit & hit : hitList) { + uint32_t fieldId = hit.field_id(); + if (fieldId != lastFieldId) { + if (tmd != nullptr) { + if (tmd->needs_interleaved_features()) { + set_interleaved_features(*tmd, extract_field_length(fl_term, lastFieldId), num_occs); + } + // reset to notfound/unknown values + tmd = nullptr; + } + num_occs = 0; + + // setup for new field that had a hit + const ITermFieldData *tfd = td.lookupField(fieldId); + if (tfd != nullptr) { + tmd = match_data.resolveTermField(tfd->getHandle()); + tmd->setFieldId(fieldId); + // reset field match data, but only once per docId + if (tmd->getDocId() != docid) { + tmd->reset(docid); + } + } + lastFieldId = fieldId; + } + ++num_occs; + if (tmd != nullptr) { + TermFieldMatchDataPosition pos(hit.element_id(), hit.position(), + hit.element_weight(), hit.element_length()); + tmd->appendPosition(pos); + LOG(debug, "Append elemId(%u),position(%u), weight(%d), tfmd.weight(%d)", + pos.getElementId(), pos.getPosition(), pos.getElementWeight(), tmd->getWeight()); + } + } + if (tmd != nullptr) { + if (tmd->needs_interleaved_features()) { + set_interleaved_features(*tmd, extract_field_length(fl_term, lastFieldId), num_occs); + } + } + } +} + +void +QueryTerm::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) +{ + unpack_match_data_helper(docid, td, match_data, *this); +} + NearestNeighborQueryNode* QueryTerm::as_nearest_neighbor_query_node() noexcept { diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index 108cc1c148d..de043237cff 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -9,6 +9,12 @@ #include <vespa/vespalib/objects/objectvisitor.h> #include <vespa/vespalib/stllike/string.h> +namespace search::fef { + +class ITermData; +class MatchData; + +} namespace search::streaming { class FuzzyTerm; @@ -98,7 +104,9 @@ public: virtual MultiTerm* as_multi_term() noexcept; virtual RegexpTerm* as_regexp_term() noexcept; virtual FuzzyTerm* as_fuzzy_term() noexcept; + virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data); protected: + void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const; using QueryNodeResultBaseContainer = std::unique_ptr<QueryNodeResultBase>; string _index; EncodingBitMap _encoding; diff --git a/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp b/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp index dc3dbfca7ca..70b863e540b 100644 --- a/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp +++ b/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp @@ -84,12 +84,6 @@ QueryWrapperTest::testQueryWrapper() EXPECT_TRUE( tl[2].isPhraseTerm()); EXPECT_TRUE( tl[3].isPhraseTerm()); EXPECT_TRUE(!tl[4].isPhraseTerm()); - - EXPECT_EQUAL(tl[0].getPosAdjust(), 0u); - EXPECT_EQUAL(tl[1].getPosAdjust(), 2u); - EXPECT_EQUAL(tl[2].getPosAdjust(), 2u); - EXPECT_EQUAL(tl[3].getPosAdjust(), 2u); - EXPECT_EQUAL(tl[4].getPosAdjust(), 0u); } } diff --git a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp index c9518b29884..0abff37d622 100644 --- a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp +++ b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp @@ -85,8 +85,8 @@ RankProcessorTest::test_unpack_match_data_for_term_node(bool interleaved_feature RankProcessor::unpack_match_data(1, *md, *_query_wrapper); EXPECT_EQ(invalid_id, tfmd->getDocId()); node->add(field_id, 0, 1, 0); + node->add(field_id, 0, 1, 1); auto& field_info = node->getFieldInfo(field_id); - field_info.setHitCount(mock_num_occs); field_info.setFieldLength(mock_field_length); RankProcessor::unpack_match_data(2, *md, *_query_wrapper); EXPECT_EQ(2, tfmd->getDocId()); @@ -97,7 +97,7 @@ RankProcessorTest::test_unpack_match_data_for_term_node(bool interleaved_feature EXPECT_EQ(0, tfmd->getNumOccs()); EXPECT_EQ(0, tfmd->getFieldLength()); } - EXPECT_EQ(1, tfmd->size()); + EXPECT_EQ(2, tfmd->size()); node->reset(); RankProcessor::unpack_match_data(3, *md, *_query_wrapper); EXPECT_EQ(2, tfmd->getDocId()); diff --git a/streamingvisitors/src/vespa/searchvisitor/querywrapper.h b/streamingvisitors/src/vespa/searchvisitor/querywrapper.h index b24f695196e..420ff215833 100644 --- a/streamingvisitors/src/vespa/searchvisitor/querywrapper.h +++ b/streamingvisitors/src/vespa/searchvisitor/querywrapper.h @@ -47,7 +47,6 @@ public: size_t getIndex() const { return _index; } bool isPhraseTerm() const { return _parent != nullptr; } bool isFirstPhraseTerm() const { return isPhraseTerm() && getIndex() == 0; } - size_t getPosAdjust() const { return _parent != nullptr ? _parent->width() - 1 : 0; } bool isGeoPosTerm() const { return (_term != nullptr) && _term->isGeoLoc(); } }; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index 96e8ca89a04..09699f79427 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -53,11 +53,6 @@ getFeature(const RankProgram &rankProgram) { return resolver.resolve(0); } -uint16_t -cap_16_bits(uint32_t value) { - return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max())); -} - } void @@ -285,68 +280,10 @@ RankProcessor::unpack_match_data(uint32_t docid, MatchData &matchData, QueryWrap QueryTermData & qtd = static_cast<QueryTermData &>(term.getTerm()->getQueryItem()); const ITermData &td = qtd.getTermData(); - HitList list; - const HitList & hitList = isPhrase - ? term.getParent()->evaluateHits(list) - : term.getTerm()->evaluateHits(list); - - if (hitList.size() > 0) { // only unpack if we have a hit - LOG(debug, "Unpack match data for query term '%s:%s' (%s)", - term.getTerm()->index().c_str(), term.getTerm()->getTerm(), isPhrase ? "phrase" : "term"); - - uint32_t lastFieldId = -1; - TermFieldMatchData *tmd = nullptr; - uint32_t fieldLen = search::fef::FieldPositionsIterator::UNKNOWN_LENGTH; - uint32_t num_occs = 0; - - // optimize for hitlist giving all hits for a single field in one chunk - for (const Hit & hit : hitList) { - uint32_t fieldId = hit.field_id(); - if (fieldId != lastFieldId) { - // reset to notfound/unknown values - tmd = nullptr; - fieldLen = search::fef::FieldPositionsIterator::UNKNOWN_LENGTH; - num_occs = 0; - - // setup for new field that had a hit - const ITermFieldData *tfd = td.lookupField(fieldId); - if (tfd != nullptr) { - tmd = matchData.resolveTermField(tfd->getHandle()); - tmd->setFieldId(fieldId); - // reset field match data, but only once per docId - if (tmd->getDocId() != docid) { - tmd->reset(docid); - } - } - // find fieldLen for new field - if (isPhrase) { - if (fieldId < term.getParent()->getFieldInfoSize()) { - auto& field_info = term.getParent()->getFieldInfo(fieldId); - fieldLen = field_info.getFieldLength(); - num_occs = field_info.getHitCount(); - } - } else { - if (fieldId < term.getTerm()->getFieldInfoSize()) { - auto& field_info = term.getTerm()->getFieldInfo(fieldId); - fieldLen = field_info.getFieldLength(); - num_occs = field_info.getHitCount(); - } - } - lastFieldId = fieldId; - } - if (tmd != nullptr) { - // adjust so that the position for phrase terms equals the match for the first term - TermFieldMatchDataPosition pos(hit.element_id(), hit.position() - term.getPosAdjust(), - hit.element_weight(), hit.element_length()); - tmd->appendPosition(pos); - LOG(debug, "Append elemId(%u),position(%u), weight(%d), tfmd.weight(%d)", - pos.getElementId(), pos.getPosition(), pos.getElementWeight(), tmd->getWeight()); - if (tmd->needs_interleaved_features()) { - tmd->setFieldLength(cap_16_bits(fieldLen)); - tmd->setNumOccs(cap_16_bits(num_occs)); - } - } - } + if (isPhrase) { + term.getParent()->unpack_match_data(docid, td, matchData); + } else { + term.getTerm()->unpack_match_data(docid, td, matchData); } } } |