diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-06-01 12:40:01 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-06-01 12:54:44 +0200 |
commit | 8f9b40cbbd64740b98b19a12a875c0ddc8b773a5 (patch) | |
tree | 37d24c0551e309c827d1dd3a2dd7e6e01059e166 | |
parent | 49fe9df15b37cbd73964d037a8382cf36ec53b5f (diff) |
Don't calculate score or weights when unpacking for a term in
a filter field or for a term not used by ranking.
11 files changed, 245 insertions, 71 deletions
diff --git a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp index 624ad331457..77cc7920ea8 100644 --- a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp +++ b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp @@ -38,11 +38,22 @@ struct DP { static const uint32_t fieldId = 0; static const TermFieldHandle handle = 0; std::vector<std::pair<std::string, uint32_t> > tokens; + bool field_is_filter; + bool term_is_not_needed; + + DP() + : tokens(), + field_is_filter(false), + term_is_not_needed(false) + { + } DP &add(const std::string &token, uint32_t weight) { tokens.push_back(std::make_pair(token, weight)); return *this; } + DP& set_field_is_filter(bool value) { field_is_filter = value; return *this; } + DP& set_term_is_not_needed(bool value) { term_is_not_needed = value; return *this; } Node::UP createNode() const { SimpleDotProduct *node = new SimpleDotProduct(tokens.size(), "view", 0, Weight(0)); @@ -54,9 +65,12 @@ struct DP { FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { MatchData::UP md(MatchData::makeTestInstance(1, 1)); + if (term_is_not_needed) { + md->resolveTermField(handle)->tagAsNotNeeded(); + } FakeRequestContext requestContext; Node::UP node = createNode(); - FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle, field_is_filter)); queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); bp->fetchPostings(ExecuteInfo::create(strict)); SearchIterator::UP sb = bp->createSearch(*md, strict); @@ -111,7 +125,7 @@ struct MockFixture { childMatch.push_back(md->resolveTermField(children.size())); children.push_back(mock); weights.push_back(1); - search = DotProductSearch::create(children, tfmd, childMatch, weights, std::move(md)); + search = DotProductSearch::create(children, tfmd, false, childMatch, weights, std::move(md)); } }; @@ -126,16 +140,42 @@ struct MockFixture { } // namespace <unnamed> -TEST("test Simple") { +std::function<int(int)> +make_score_filter(bool field_is_filter, bool term_is_not_needed) +{ + if (field_is_filter || term_is_not_needed) { + return [](int) noexcept { return 0; }; + } else { + return [](int value) noexcept { return value; }; + } +} + +void run_simple(bool field_is_filter, bool term_is_not_needed) +{ + auto score_filter = make_score_filter(field_is_filter, term_is_not_needed); FakeResult expect = FakeResult() - .doc(3).score(30 * 3) - .doc(5).score(50 * 5) - .doc(7).score(70 * 7); - DP ws = DP().add("7", 70).add("5", 50).add("3", 30).add("100", 1000); + .doc(3).score(score_filter(30 * 3)) + .doc(5).score(score_filter(50 * 5)) + .doc(7).score(score_filter(70 * 7)); + DP ws = DP().add("7", 70).add("5", 50).add("3", 30).add("100", 1000) + .set_field_is_filter(field_is_filter) + .set_term_is_not_needed(term_is_not_needed); TEST_DO(verifySimple(expect, ws)); } +TEST("test Simple") { + TEST_DO(run_simple(false, false)); +} + +TEST("test Simple filter field") { + TEST_DO(run_simple(true, false)); +} + +TEST("test Simple unranked") { + TEST_DO(run_simple(false, true)); +} + TEST("test Simple Single") { FakeResult expect = FakeResult() .doc(7).score(70 * 7); @@ -144,21 +184,37 @@ TEST("test Simple Single") { TEST_DO(verifySimple(expect, ws)); } -TEST("test Multi") { +void run_multi(bool field_is_filter, bool term_is_not_needed) +{ + auto score_filter = make_score_filter(field_is_filter, term_is_not_needed); FakeSearchable index; setupFakeSearchable(index); FakeResult expect = FakeResult() - .doc(3).score(30 * 3 + 130 * 2 * 3 + 230 * 3 * 3) - .doc(5).score(50 * 5 + 150 * 2 * 5) - .doc(7).score(70 * 7); + .doc(3).score(score_filter(30 * 3 + 130 * 2 * 3 + 230 * 3 * 3)) + .doc(5).score(score_filter(50 * 5 + 150 * 2 * 5)) + .doc(7).score(score_filter(70 * 7)); DP ws = DP().add("7", 70).add("5", 50).add("3", 30) .add("15", 150).add("13", 130) - .add("23", 230).add("100", 1000); + .add("23", 230).add("100", 1000) + .set_field_is_filter(field_is_filter) + .set_term_is_not_needed(term_is_not_needed); EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); } + +TEST("test Multi") { + TEST_DO(run_multi(false, false)); +} + +TEST("test Multi filter field") { + TEST_DO(run_multi(true, false)); +} +TEST("test Multi unranked") { + TEST_DO(run_multi(false, true)); +} + TEST_F("test Eager Empty Child", MockFixture(search::endDocId, {})) { MockSearch *mock = f1.mock; SearchIterator &search = *f1.search; @@ -210,14 +266,14 @@ private: SearchIterator::UP create(const std::vector<SearchIterator*> &children) const override { std::vector<fef::TermFieldMatchData*> no_child_match; MatchData::UP no_match_data; - return DotProductSearch::create(children, _tfmd, no_child_match, _weights, std::move(no_match_data)); + return DotProductSearch::create(children, _tfmd, false, no_child_match, _weights, std::move(no_match_data)); } }; class WeightIteratorChildrenVerifier : public search::test::DwaIteratorChildrenVerifier { private: SearchIterator::UP create(std::vector<DocumentWeightIterator> && children) const override { - return SearchIterator::UP(DotProductSearch::create(_tfmd, _weights, std::move(children))); + return SearchIterator::UP(DotProductSearch::create(_tfmd, false, _weights, std::move(children))); } }; diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp index e83226baf84..df01744d619 100644 --- a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp @@ -223,8 +223,18 @@ struct RiseWandFactory : SparseVectorFactory { struct WeightedSetFactory : SparseVectorFactory { mutable TermFieldMatchData tfmd; + bool field_is_filter; + + WeightedSetFactory(bool field_is_filter_, bool term_is_not_needed) + : tfmd(), + field_is_filter(field_is_filter_) + { + if (term_is_not_needed) { + tfmd.tagAsNotNeeded(); + } + } virtual std::string name() const override { - return vespalib::make_string("WeightedSet"); + return vespalib::make_string("WeightedSet%s%s", (field_is_filter ? "-filter" : ""), (tfmd.isNotNeeded() ? "-unranked" : "")); } SearchIterator::UP createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const override { std::vector<SearchIterator *> terms; @@ -234,14 +244,24 @@ struct WeightedSetFactory : SparseVectorFactory { terms.push_back(childFactory.createChild(i, limit).release()); weights.push_back(default_weight); } - return WeightedSetTermSearch::create(terms, tfmd, weights, MatchData::UP(nullptr)); + return WeightedSetTermSearch::create(terms, tfmd, field_is_filter, weights, MatchData::UP(nullptr)); } }; struct DotProductFactory : SparseVectorFactory { mutable TermFieldMatchData tfmd; + bool field_is_filter; + + DotProductFactory(bool field_is_filter_, bool term_is_not_needed) + : tfmd(), + field_is_filter(field_is_filter_) + { + if (term_is_not_needed) { + tfmd.tagAsNotNeeded(); + } + } virtual std::string name() const override { - return vespalib::make_string("DotProduct"); + return vespalib::make_string("DotProduct%s%s", (field_is_filter ? "-filter" : ""), (tfmd.isNotNeeded() ? "-unranked" : "")); } SearchIterator::UP createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const override { MatchDataLayout layout; @@ -258,7 +278,7 @@ struct DotProductFactory : SparseVectorFactory { childMatch.push_back(md->resolveTermField(handles[i])); weights.push_back(default_weight); } - return DotProductSearch::create(terms, tfmd, childMatch, weights, std::move(md)); + return DotProductSearch::create(terms, tfmd, field_is_filter, childMatch, weights, std::move(md)); } }; @@ -333,6 +353,7 @@ struct Result { Result run_single_benchmark(FilterStrategy &filterStrategy, SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) { SearchIterator::UP search(filterStrategy.createRoot(vectorFactory, childFactory, childCnt, limit)); SearchIterator &sb = *search; + sb.initFullRange(); uint32_t num_hits = 0; vespalib::Timer timer; for (sb.seek(1); !sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { @@ -383,13 +404,21 @@ public: void benchmark_all_operators(Setup &setup, const std::vector<uint32_t> &child_counts) { VespaWandFactory vespaWand256(256); RiseWandFactory riseWand256(256); - WeightedSetFactory weightedSet; - DotProductFactory dotProduct; + WeightedSetFactory weightedSet(false, false); + WeightedSetFactory weightedSet_filter(true, false); + WeightedSetFactory weightedSet_unranked(false, true); + DotProductFactory dotProduct(false, false); + DotProductFactory dotProduct_filter(true, false); + DotProductFactory dotProduct_unranked(false, true); OrFactory plain_or; setup.benchmark(vespaWand256, child_counts); setup.benchmark(riseWand256, child_counts); setup.benchmark(weightedSet, child_counts); + setup.benchmark(weightedSet_filter, child_counts); + setup.benchmark(weightedSet_unranked, child_counts); setup.benchmark(dotProduct, child_counts); + setup.benchmark(dotProduct_filter, child_counts); + setup.benchmark(dotProduct_unranked, child_counts); setup.benchmark(plain_or, child_counts); } diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp index 229c8b9501c..ba75d7b0da0 100644 --- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -41,8 +41,16 @@ struct WS { MatchDataLayout layout; TermFieldHandle handle; std::vector<std::pair<std::string, uint32_t> > tokens; + bool field_is_filter; + bool term_is_not_needed; - WS() : layout(), handle(layout.allocTermField(fieldId)), tokens() { + WS() + : layout(), + handle(layout.allocTermField(fieldId)), + tokens(), + field_is_filter(false), + term_is_not_needed(false) + { MatchData::UP tmp = layout.createMatchData(); ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId); } @@ -51,6 +59,8 @@ struct WS { tokens.push_back(std::make_pair(token, weight)); return *this; } + WS& set_field_is_filter(bool value) { field_is_filter = value; return *this; } + WS& set_term_is_not_needed(bool value) { term_is_not_needed = value; return *this; } Node::UP createNode() const { SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0)); @@ -74,8 +84,11 @@ struct WS { FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { FakeRequestContext requestContext; MatchData::UP md = layout.createMatchData(); + if (term_is_not_needed) { + md->resolveTermField(handle)->tagAsNotNeeded(); + } Node::UP node = createNode(); - FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle, field_is_filter)); queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); bp->fetchPostings(ExecuteInfo::create(strict)); SearchIterator::UP sb = bp->createSearch(*md, strict); @@ -123,20 +136,30 @@ struct MockFixture { mock = new MockSearch(initial); children.push_back(mock); weights.push_back(1); - search = WeightedSetTermSearch::create(children, tfmd, weights, MatchData::UP(nullptr)); + search = WeightedSetTermSearch::create(children, tfmd, false, weights, MatchData::UP(nullptr)); } }; } // namespace <unnamed> -TEST("testSimple") { +void run_simple(bool field_is_filter, bool term_is_not_needed) +{ FakeSearchable index; setupFakeSearchable(index); - FakeResult expect = FakeResult() - .doc(3).elem(0).weight(30).pos(0) - .doc(5).elem(0).weight(50).pos(0) - .doc(7).elem(0).weight(70).pos(0); - WS ws = WS().add("7", 70).add("5", 50).add("3", 30).add("100", 1000); + FakeResult expect; + if (field_is_filter || term_is_not_needed) { + expect.doc(3) + .doc(5) + .doc(7); + } else { + expect.doc(3).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + } + WS ws = WS().add("7", 70).add("5", 50).add("3", 30).add("100", 1000) + .set_field_is_filter(field_is_filter) + .set_term_is_not_needed(term_is_not_needed); +; EXPECT_TRUE(ws.isGenericSearch(index, "field", true)); EXPECT_TRUE(ws.isGenericSearch(index, "field", false)); EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true)); @@ -148,16 +171,37 @@ TEST("testSimple") { EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); } -TEST("testMulti") { +TEST("testSimple") { + TEST_DO(run_simple(false, false)); +} + +TEST("testSimple filter field") { + TEST_DO(run_simple(true, false)); +} + +TEST("testSimple unranked") { + TEST_DO(run_simple(false, true)); +} + +void run_multi(bool field_is_filter, bool term_is_not_needed) +{ FakeSearchable index; setupFakeSearchable(index); - FakeResult expect = FakeResult() - .doc(3).elem(0).weight(230).pos(0).elem(0).weight(130).pos(0).elem(0).weight(30).pos(0) - .doc(5).elem(0).weight(150).pos(0).elem(0).weight(50).pos(0) - .doc(7).elem(0).weight(70).pos(0); + FakeResult expect; + if (field_is_filter || term_is_not_needed) { + expect.doc(3) + .doc(5) + .doc(7); + } else { + expect.doc(3).elem(0).weight(230).pos(0).elem(0).weight(130).pos(0).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(150).pos(0).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + } WS ws = WS().add("7", 70).add("5", 50).add("3", 30) .add("15", 150).add("13", 130) - .add("23", 230).add("100", 1000); + .add("23", 230).add("100", 1000) + .set_field_is_filter(field_is_filter) + .set_term_is_not_needed(term_is_not_needed); EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true)); EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false)); @@ -165,6 +209,18 @@ TEST("testMulti") { EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); } +TEST("testMulti") { + TEST_DO(run_multi(false, false)); +} + +TEST("testMulti filter field") { + TEST_DO(run_multi(true, false)); +} + +TEST("testMulti unranked") { + TEST_DO(run_multi(false, true)); +} + TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) { MockSearch *mock = f1.mock; SearchIterator &search = *f1.search; @@ -194,14 +250,14 @@ TEST_F("test Eager Matching Child", MockFixture(5)) { class IteratorChildrenVerifier : public search::test::IteratorChildrenVerifier { private: SearchIterator::UP create(const std::vector<SearchIterator*> &children) const override { - return SearchIterator::UP(WeightedSetTermSearch::create(children, _tfmd, _weights, MatchData::UP(nullptr))); + return SearchIterator::UP(WeightedSetTermSearch::create(children, _tfmd, false, _weights, MatchData::UP(nullptr))); } }; class WeightIteratorChildrenVerifier : public search::test::DwaIteratorChildrenVerifier { private: SearchIterator::UP create(std::vector<DocumentWeightIterator> && children) const override { - return SearchIterator::UP(WeightedSetTermSearch::create(_tfmd, _weights, std::move(children))); + return SearchIterator::UP(WeightedSetTermSearch::create(_tfmd, false, _weights, std::move(children))); } }; diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 888156ce352..3526a921645 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -409,6 +409,7 @@ public: SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const override { assert(tfmda.size() == 1); + assert(getState().numFields() == 1); if (_terms.size() == 0) { return std::make_unique<queryeval::EmptySearch>(); } @@ -418,7 +419,8 @@ public: for (const IDocumentWeightAttribute::LookupResult &r : _terms) { _attr.create(r.posting_idx, iterators); } - return SearchType::create(*tfmda[0], _weights, std::move(iterators)); + bool field_is_filter = getState().fields()[0].isFilter(); + return SearchType::create(*tfmda[0], field_is_filter, _weights, std::move(iterators)); } std::unique_ptr<SearchIterator> createFilterSearch(bool strict, FilterConstraint constraint) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp index 32bd316d58d..c467590fe69 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp @@ -156,6 +156,7 @@ queryeval::SearchIterator::UP AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool strict) const { assert(tfmda.size() == 1); + assert(getState().numFields() == 1); fef::TermFieldMatchData &tfmd = *tfmda[0]; if (strict) { // use generic weighted set search fef::MatchDataLayout layout; @@ -167,7 +168,8 @@ AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArr // TODO: pass ownership with unique_ptr children[i] = _contexts[i]->createIterator(child_tfmd, true).release(); } - return queryeval::SearchIterator::UP(queryeval::WeightedSetTermSearch::create(children, tfmd, _weights, std::move(match_data))); + bool field_is_filter = getState().fields()[0].isFilter(); + return queryeval::SearchIterator::UP(queryeval::WeightedSetTermSearch::create(children, tfmd, field_is_filter, _weights, std::move(match_data))); } else { // use attribute filter optimization bool isSingleValue = !_attr.hasMultiValue(); bool isString = (_attr.isStringType() && _attr.hasEnum()); diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp index 70d07fa3050..1e2b8109778 100644 --- a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp @@ -52,6 +52,7 @@ DotProductBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray bool) const { assert(tfmda.size() == 1); + assert(getState().numFields() == 1); fef::MatchData::UP md = _layout.createMatchData(); std::vector<fef::TermFieldMatchData*> childMatch; std::vector<SearchIterator*> children(_terms.size()); @@ -62,7 +63,8 @@ DotProductBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray // TODO: pass ownership with unique_ptr children[i] = _terms[i]->createSearch(*md, true).release(); } - return DotProductSearch::create(children, *tfmda[0], childMatch, _weights, std::move(md)); + bool field_is_filter = getState().fields()[0].isFilter(); + return DotProductSearch::create(children, *tfmda[0], field_is_filter, childMatch, _weights, std::move(md)); } SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp index 78608f2b21c..393140784b0 100644 --- a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp @@ -35,6 +35,7 @@ private: ref_t *_data_stash; ref_t *_data_end; IteratorPack _children; + bool _field_is_filter; void seek_child(ref_t child, uint32_t docId) { _termPos[child] = _children.seek(child, docId); @@ -42,6 +43,7 @@ private: public: DotProductSearchImpl(TermFieldMatchData &tmd, + bool field_is_filter, const std::vector<int32_t> &weights, IteratorPack &&iteratorPack) : _tmd(tmd), @@ -52,7 +54,8 @@ public: _data_begin(nullptr), _data_stash(nullptr), _data_end(nullptr), - _children(std::move(iteratorPack)) + _children(std::move(iteratorPack)), + _field_is_filter(field_is_filter) { HEAP::require_left_heap(); assert(_weights.size() > 0); @@ -63,6 +66,9 @@ public: } _data_begin = &_data_space[0]; _data_end = _data_begin + _data_space.size(); + if (_field_is_filter || _tmd.isNotNeeded()) { + _tmd.setRawScore(TermFieldMatchData::invalidId(), 0.0); + } } void doSeek(uint32_t docId) override { @@ -78,17 +84,21 @@ public: } void doUnpack(uint32_t docId) override { - feature_t score = 0.0; - while ((_data_begin < _data_stash) && - _termPos[HEAP::front(_data_begin, _data_stash)] == docId) - { - HEAP::pop(_data_begin, _data_stash--, _cmpDocId); - const ref_t child = *_data_stash; - double tmp = _weights[child]; - tmp *= _children.get_weight(child, docId); - score += tmp; - }; - _tmd.setRawScore(docId, score); + if (!_field_is_filter && !_tmd.isNotNeeded()) { + feature_t score = 0.0; + while ((_data_begin < _data_stash) && + _termPos[HEAP::front(_data_begin, _data_stash)] == docId) + { + HEAP::pop(_data_begin, _data_stash--, _cmpDocId); + const ref_t child = *_data_stash; + double tmp = _weights[child]; + tmp *= _children.get_weight(child, docId); + score += tmp; + }; + _tmd.setRawScore(docId, score); + } else { + _tmd.resetOnlyDocId(docId); + } } void initRange(uint32_t begin, uint32_t end) override { @@ -146,6 +156,7 @@ private: SearchIterator::UP DotProductSearch::create(const std::vector<SearchIterator*> &children, TermFieldMatchData &tmd, + bool field_is_filter, const std::vector<TermFieldMatchData*> &childMatch, const std::vector<int32_t> &weights, MatchData::UP md) @@ -158,15 +169,16 @@ DotProductSearch::create(const std::vector<SearchIterator*> &children, *childMatch[0], weights[0], std::move(md)); } if (childMatch.size() < 128) { - return SearchIterator::UP(new ArrayHeapImpl(tmd, weights, SearchIteratorPack(children, childMatch, std::move(md)))); + return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, weights, SearchIteratorPack(children, childMatch, std::move(md)))); } - return SearchIterator::UP(new HeapImpl(tmd, weights, SearchIteratorPack(children, childMatch, std::move(md)))); + return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, weights, SearchIteratorPack(children, childMatch, std::move(md)))); } //----------------------------------------------------------------------------- SearchIterator::UP DotProductSearch::create(TermFieldMatchData &tmd, + bool field_is_filter, const std::vector<int32_t> &weights, std::vector<DocumentWeightIterator> &&iterators) { @@ -174,9 +186,9 @@ DotProductSearch::create(TermFieldMatchData &tmd, typedef DotProductSearchImpl<vespalib::LeftHeap, AttributeIteratorPack> HeapImpl; if (iterators.size() < 128) { - return SearchIterator::UP(new ArrayHeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators)))); + return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, weights, AttributeIteratorPack(std::move(iterators)))); } - return SearchIterator::UP(new HeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators)))); + return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, weights, AttributeIteratorPack(std::move(iterators)))); } //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h index 151c0b9469c..0cb69938d55 100644 --- a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h @@ -28,11 +28,13 @@ public: // TODO: use MultiSearch::Children to pass ownership static SearchIterator::UP create(const std::vector<SearchIterator*> &children, search::fef::TermFieldMatchData &tmd, + bool field_is_filter, const std::vector<fef::TermFieldMatchData*> &childMatch, const std::vector<int32_t> &weights, fef::MatchData::UP md); static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd, + bool field_is_filter, const std::vector<int32_t> &weights, std::vector<DocumentWeightIterator> &&iterators); }; diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp index 1835dddb39e..4862d9a2375 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp @@ -64,7 +64,7 @@ WeightedSetTermBlueprint::WeightedSetTermBlueprint(const FieldSpec &field) : ComplexLeafBlueprint(field), _estimate(), _layout(), - _children_field(field.getName(), field.getFieldId(), _layout.allocTermField(field.getFieldId()), false), + _children_field(field.getName(), field.getFieldId(), _layout.allocTermField(field.getFieldId()), field.isFilter()), _weights(), _terms() { @@ -107,7 +107,7 @@ WeightedSetTermBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &t // TODO: pass ownership with unique_ptr children[i] = _terms[i]->createSearch(*md, true).release(); } - return SearchIterator::UP(WeightedSetTermSearch::create(children, *tfmda[0], _weights, std::move(md))); + return SearchIterator::UP(WeightedSetTermSearch::create(children, *tfmda[0], _children_field.isFilter(), _weights, std::move(md))); } SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp index 683a82150f5..0eebfb9f690 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp @@ -45,6 +45,7 @@ private: ref_t *_data_stash; ref_t *_data_end; IteratorPack _children; + bool _field_is_filter; void seek_child(ref_t child, uint32_t docId) { _termPos[child] = _children.seek(child, docId); @@ -61,6 +62,7 @@ private: public: WeightedSetTermSearchImpl(search::fef::TermFieldMatchData &tmd, + bool field_is_filter, const std::vector<int32_t> &weights, IteratorPack &&iteratorPack) : _tmd(tmd), @@ -72,7 +74,8 @@ public: _data_begin(nullptr), _data_stash(nullptr), _data_end(nullptr), - _children(std::move(iteratorPack)) + _children(std::move(iteratorPack)), + _field_is_filter(field_is_filter) { HEAP::require_left_heap(); assert(_children.size() > 0); @@ -83,7 +86,9 @@ public: } _data_begin = &_data_space[0]; _data_end = _data_begin + _data_space.size(); - _tmd.reservePositions(_children.size()); + if (!_field_is_filter && !_tmd.isNotNeeded()) { + _tmd.reservePositions(_children.size()); + } } void doSeek(uint32_t docId) override { @@ -107,13 +112,17 @@ public: } void doUnpack(uint32_t docId) override { - _tmd.reset(docId); - pop_matching_children(docId); - std::sort(_data_stash, _data_end, _cmpWeight); - for (ref_t *ptr = _data_stash; ptr < _data_end; ++ptr) { - fef::TermFieldMatchDataPosition pos; - pos.setElementWeight(_weights[*ptr]); - _tmd.appendPosition(pos); + if (!_field_is_filter && !_tmd.isNotNeeded()) { + _tmd.reset(docId); + pop_matching_children(docId); + std::sort(_data_stash, _data_end, _cmpWeight); + for (ref_t *ptr = _data_stash; ptr < _data_end; ++ptr) { + fef::TermFieldMatchDataPosition pos; + pos.setElementWeight(_weights[*ptr]); + _tmd.appendPosition(pos); + } + } else { + _tmd.resetOnlyDocId(docId); } } @@ -155,6 +164,7 @@ public: SearchIterator::UP WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children, TermFieldMatchData &tmd, + bool field_is_filter, const std::vector<int32_t> &weights, fef::MatchData::UP match_data) { @@ -162,15 +172,16 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children, typedef WeightedSetTermSearchImpl<vespalib::LeftHeap, SearchIteratorPack> HeapImpl; if (children.size() < 128) { - return SearchIterator::UP(new ArrayHeapImpl(tmd, weights, SearchIteratorPack(children, std::move(match_data)))); + return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, weights, SearchIteratorPack(children, std::move(match_data)))); } - return SearchIterator::UP(new HeapImpl(tmd, weights, SearchIteratorPack(children, std::move(match_data)))); + return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, weights, SearchIteratorPack(children, std::move(match_data)))); } //----------------------------------------------------------------------------- SearchIterator::UP WeightedSetTermSearch::create(search::fef::TermFieldMatchData &tmd, + bool field_is_filter, const std::vector<int32_t> &weights, std::vector<DocumentWeightIterator> &&iterators) { @@ -178,9 +189,9 @@ WeightedSetTermSearch::create(search::fef::TermFieldMatchData &tmd, typedef WeightedSetTermSearchImpl<vespalib::LeftHeap, AttributeIteratorPack> HeapImpl; if (iterators.size() < 128) { - return SearchIterator::UP(new ArrayHeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators)))); + return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, weights, AttributeIteratorPack(std::move(iterators)))); } - return SearchIterator::UP(new HeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators)))); + return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, weights, AttributeIteratorPack(std::move(iterators)))); } //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h index ac39c26286d..3aaf3477bbd 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h @@ -32,10 +32,12 @@ public: // TODO: pass ownership with unique_ptr static SearchIterator::UP create(const std::vector<SearchIterator *> &children, search::fef::TermFieldMatchData &tmd, + bool field_is_filter, const std::vector<int32_t> &weights, fef::MatchData::UP match_data); static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd, + bool field_is_filter, const std::vector<int32_t> &weights, std::vector<DocumentWeightIterator> &&iterators); |