diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-05-04 11:25:46 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-04 11:25:46 +0200 |
commit | 44d195021c1742fdbddfcb6f9afac7fdd993a96a (patch) | |
tree | 02a9eafb3b178bcf1aee696c588991812cd09b03 | |
parent | e3f135fc9ccad48719e462397915c6efeaf450b6 (diff) | |
parent | 020de7058e91e2b8e36043e9f46449b0ccf1000a (diff) |
Merge pull request #13136 from vespa-engine/toregge/unpack-interleaved-features-for-simple-phrase-search
Unpack interleaved features if they are needed in simple phrase search.
5 files changed, 75 insertions, 16 deletions
diff --git a/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp b/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp index abaae7e4333..dfe2e2edbd9 100644 --- a/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp +++ b/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp @@ -43,7 +43,7 @@ class Test : public vespalib::TestApp { void requireThatIteratorFindsSimplePhrase(bool useBlueprint); void requireThatIteratorFindsLongPhrase(bool useBlueprint); void requireThatStrictIteratorFindsNextMatch(bool useBlueprint); - void requireThatPhrasesAreUnpacked(bool useBlueprint); + void requireThatPhrasesAreUnpacked(bool useBlueprint, bool unpack_normal_features, bool unpack_interleaved_features); void requireThatTermsCanBeEvaluatedInPriorityOrder(); void requireThatBlueprintExposesFieldWithEstimate(); void requireThatBlueprintForcesPositionDataOnChildren(); @@ -63,13 +63,19 @@ Test::Main() TEST_DO(requireThatIteratorFindsSimplePhrase(false)); TEST_DO(requireThatIteratorFindsLongPhrase(false)); TEST_DO(requireThatStrictIteratorFindsNextMatch(false)); - TEST_DO(requireThatPhrasesAreUnpacked(false)); + TEST_DO(requireThatPhrasesAreUnpacked(false, true, false)); + TEST_DO(requireThatPhrasesAreUnpacked(false, true, true)); + TEST_DO(requireThatPhrasesAreUnpacked(false, false, false)); + TEST_DO(requireThatPhrasesAreUnpacked(false, false, true)); TEST_DO(requireThatTermsCanBeEvaluatedInPriorityOrder()); TEST_DO(requireThatIteratorFindsSimplePhrase(true)); TEST_DO(requireThatIteratorFindsLongPhrase(true)); TEST_DO(requireThatStrictIteratorFindsNextMatch(true)); - TEST_DO(requireThatPhrasesAreUnpacked(true)); + TEST_DO(requireThatPhrasesAreUnpacked(true, true, false)); + TEST_DO(requireThatPhrasesAreUnpacked(true, true, true)); + TEST_DO(requireThatPhrasesAreUnpacked(true, false, false)); + TEST_DO(requireThatPhrasesAreUnpacked(true, false, true)); TEST_DO(requireThatBlueprintExposesFieldWithEstimate()); TEST_DO(requireThatBlueprintForcesPositionDataOnChildren()); TEST_DO(requireThatIteratorHonorsFutureDoom()); @@ -107,6 +113,7 @@ public: void setStrict(bool strict) { _strict = strict; } void setOrder(const vector<uint32_t> &order) { _order = order; } const TermFieldMatchData &tmd() const { return *_md->resolveTermField(phrase_handle); } + TermFieldMatchData &writable_term_field_match_data() { return *_md->resolveTermField(phrase_handle); } PhraseSearchTest &addTerm(const string &term, bool last) { return addTerm(term, FakeResult() @@ -156,7 +163,10 @@ public: } else { search::fef::TermFieldMatchDataArray childMatch; for (size_t i = 0; i < _children.size(); ++i) { - childMatch.add(_md->resolveTermField(childHandle(i))); + auto *child_term_field_match_data = _md->resolveTermField(childHandle(i)); + child_term_field_match_data->setNeedInterleavedFeatures(tmd().needs_interleaved_features()); + child_term_field_match_data->setNeedNormalFeatures(true); + childMatch.add(child_term_field_match_data); } SimplePhraseSearch::Children children; for (size_t i = 0; i < _children.size(); ++i) { @@ -264,21 +274,34 @@ void Test::requireThatStrictIteratorFindsNextMatch(bool useBlueprint) { EXPECT_TRUE(search->isAtEnd()); } -void Test::requireThatPhrasesAreUnpacked(bool useBlueprint) { +void Test::requireThatPhrasesAreUnpacked(bool useBlueprint, bool unpack_normal_features, bool unpack_interleaved_features) { PhraseSearchTest test; test.addTerm("foo", FakeResult() - .doc(doc_match).pos(1).pos(11).pos(21)); + .doc(doc_match).pos(1).pos(11).pos(21).field_length(30).num_occs(3)); test.addTerm("bar", FakeResult() - .doc(doc_match).pos(2).pos(16).pos(22)); + .doc(doc_match).pos(2).pos(16).pos(22).field_length(30).num_occs(3)); + test.writable_term_field_match_data().setNeedNormalFeatures(unpack_normal_features); + test.writable_term_field_match_data().setNeedInterleavedFeatures(unpack_interleaved_features); test.fetchPostings(useBlueprint); unique_ptr<SearchIterator> search(test.createSearch(useBlueprint)); EXPECT_TRUE(search->seek(doc_match)); search->unpack(doc_match); EXPECT_EQUAL(doc_match, test.tmd().getDocId()); - EXPECT_EQUAL(2, std::distance(test.tmd().begin(), test.tmd().end())); - EXPECT_EQUAL(1u, test.tmd().begin()->getPosition()); - EXPECT_EQUAL(21u, (test.tmd().begin() + 1)->getPosition()); + if (unpack_normal_features) { + EXPECT_EQUAL(2, std::distance(test.tmd().begin(), test.tmd().end())); + EXPECT_EQUAL(1u, test.tmd().begin()->getPosition()); + EXPECT_EQUAL(21u, (test.tmd().begin() + 1)->getPosition()); + } else { + EXPECT_EQUAL(0, std::distance(test.tmd().begin(), test.tmd().end())); + } + if (unpack_interleaved_features) { + EXPECT_EQUAL(2u, test.tmd().getNumOccs()); + EXPECT_EQUAL(30u, test.tmd().getFieldLength()); + } else { + EXPECT_EQUAL(0u, test.tmd().getNumOccs()); + EXPECT_EQUAL(0u, test.tmd().getFieldLength()); + } } void Test::requireThatTermsCanBeEvaluatedInPriorityOrder() { diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_result.h b/searchlib/src/vespa/searchlib/queryeval/fake_result.h index bbae5e027cf..1823ebbd616 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_result.h +++ b/searchlib/src/vespa/searchlib/queryeval/fake_result.h @@ -33,11 +33,15 @@ public: uint32_t docId; std::vector<Element> elements; feature_t rawScore; - Document(uint32_t id) : docId(id), elements(), rawScore(0) {} + uint32_t field_length; + uint32_t num_occs; + Document(uint32_t id) : docId(id), elements(), rawScore(0), field_length(0), num_occs(0) {} bool operator==(const Document &rhs) const { return (docId == rhs.docId && elements == rhs.elements && - rawScore == rhs.rawScore); + rawScore == rhs.rawScore && + field_length == rhs.field_length && + num_occs == rhs.num_occs); } }; @@ -95,6 +99,16 @@ public: return *this; } + FakeResult &field_length(uint32_t field_length_) { + _documents.back().field_length = field_length_; + return *this; + } + + FakeResult &num_occs(uint32_t num_occs_) { + _documents.back().num_occs = num_occs_; + return *this; + } + bool operator==(const FakeResult &rhs) const { return _documents == rhs._documents; } diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp index 4ee214b0f16..6f2d3cb6b2a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp @@ -47,6 +47,10 @@ FakeSearch::doUnpack(uint32_t docid) if (is_attr()) { _tfmda[0]->appendPosition(PosCtx(0, 0, sum_weight, 1)); } + if (_tfmda[0]->needs_interleaved_features()) { + _tfmda[0]->setNumOccs(doc.num_occs); + _tfmda[0]->setFieldLength(doc.field_length); + } } void diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp index 2c63c96695a..c52cf6ddae1 100644 --- a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp @@ -65,7 +65,10 @@ SimplePhraseBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmd for (size_t i = 0; i < _terms.size(); ++i) { const State &childState = _terms[i]->getState(); assert(childState.numFields() == 1); - childMatch.add(childState.field(0).resolve(*md)); + auto *child_term_field_match_data = childState.field(0).resolve(*md); + child_term_field_match_data->setNeedInterleavedFeatures(tfmda[0]->needs_interleaved_features()); + child_term_field_match_data->setNeedNormalFeatures(true); + childMatch.add(child_term_field_match_data); children[i] = _terms[i]->createSearch(*md, strict).release(); order_map.insert(std::make_pair(childState.estimate().estHits, i)); } diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp index 767cdba7d68..df0dff06582 100644 --- a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp @@ -102,16 +102,31 @@ public: void fillPositions(TermFieldMatchData &tmd) { if (_tmds.size() == 1) { - for (const fef::TermFieldMatchDataPosition & pos : *_tmds[0]) { - tmd.appendPosition(pos); + if (tmd.needs_normal_features()) { + for (const fef::TermFieldMatchDataPosition & pos : *_tmds[0]) { + tmd.appendPosition(pos); + } + } + if (tmd.needs_interleaved_features()) { + tmd.setNumOccs(_tmds[0]->size()); + tmd.setFieldLength(_tmds[0]->getFieldLength()); } } else { + const bool needs_normal_features = tmd.needs_normal_features(); + uint32_t num_occs = 0; while (iterator(_eval_order[0]) != end(_eval_order[0])) { if (match()) { - tmd.appendPosition(*iterator(0)); + if (needs_normal_features) { + tmd.appendPosition(*iterator(0)); + } + ++num_occs; } ++iterator(_eval_order[0]); } + if (tmd.needs_interleaved_features()) { + tmd.setNumOccs(num_occs); + tmd.setFieldLength(_tmds[0]->getFieldLength()); + } } } }; |