diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-06-01 12:40:01 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-06-01 12:54:44 +0200 |
commit | 8f9b40cbbd64740b98b19a12a875c0ddc8b773a5 (patch) | |
tree | 37d24c0551e309c827d1dd3a2dd7e6e01059e166 /searchlib/src/tests | |
parent | 49fe9df15b37cbd73964d037a8382cf36ec53b5f (diff) |
Don't calculate score or weights when unpacking for a term in
a filter field or for a term not used by ranking.
Diffstat (limited to 'searchlib/src/tests')
3 files changed, 178 insertions, 37 deletions
diff --git a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp index 624ad331457..77cc7920ea8 100644 --- a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp +++ b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp @@ -38,11 +38,22 @@ struct DP { static const uint32_t fieldId = 0; static const TermFieldHandle handle = 0; std::vector<std::pair<std::string, uint32_t> > tokens; + bool field_is_filter; + bool term_is_not_needed; + + DP() + : tokens(), + field_is_filter(false), + term_is_not_needed(false) + { + } DP &add(const std::string &token, uint32_t weight) { tokens.push_back(std::make_pair(token, weight)); return *this; } + DP& set_field_is_filter(bool value) { field_is_filter = value; return *this; } + DP& set_term_is_not_needed(bool value) { term_is_not_needed = value; return *this; } Node::UP createNode() const { SimpleDotProduct *node = new SimpleDotProduct(tokens.size(), "view", 0, Weight(0)); @@ -54,9 +65,12 @@ struct DP { FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { MatchData::UP md(MatchData::makeTestInstance(1, 1)); + if (term_is_not_needed) { + md->resolveTermField(handle)->tagAsNotNeeded(); + } FakeRequestContext requestContext; Node::UP node = createNode(); - FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle, field_is_filter)); queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); bp->fetchPostings(ExecuteInfo::create(strict)); SearchIterator::UP sb = bp->createSearch(*md, strict); @@ -111,7 +125,7 @@ struct MockFixture { childMatch.push_back(md->resolveTermField(children.size())); children.push_back(mock); weights.push_back(1); - search = DotProductSearch::create(children, tfmd, childMatch, weights, std::move(md)); + search = DotProductSearch::create(children, tfmd, false, childMatch, weights, std::move(md)); } }; @@ -126,16 +140,42 @@ struct MockFixture { } // namespace <unnamed> -TEST("test Simple") { +std::function<int(int)> +make_score_filter(bool field_is_filter, bool term_is_not_needed) +{ + if (field_is_filter || term_is_not_needed) { + return [](int) noexcept { return 0; }; + } else { + return [](int value) noexcept { return value; }; + } +} + +void run_simple(bool field_is_filter, bool term_is_not_needed) +{ + auto score_filter = make_score_filter(field_is_filter, term_is_not_needed); FakeResult expect = FakeResult() - .doc(3).score(30 * 3) - .doc(5).score(50 * 5) - .doc(7).score(70 * 7); - DP ws = DP().add("7", 70).add("5", 50).add("3", 30).add("100", 1000); + .doc(3).score(score_filter(30 * 3)) + .doc(5).score(score_filter(50 * 5)) + .doc(7).score(score_filter(70 * 7)); + DP ws = DP().add("7", 70).add("5", 50).add("3", 30).add("100", 1000) + .set_field_is_filter(field_is_filter) + .set_term_is_not_needed(term_is_not_needed); TEST_DO(verifySimple(expect, ws)); } +TEST("test Simple") { + TEST_DO(run_simple(false, false)); +} + +TEST("test Simple filter field") { + TEST_DO(run_simple(true, false)); +} + +TEST("test Simple unranked") { + TEST_DO(run_simple(false, true)); +} + TEST("test Simple Single") { FakeResult expect = FakeResult() .doc(7).score(70 * 7); @@ -144,21 +184,37 @@ TEST("test Simple Single") { TEST_DO(verifySimple(expect, ws)); } -TEST("test Multi") { +void run_multi(bool field_is_filter, bool term_is_not_needed) +{ + auto score_filter = make_score_filter(field_is_filter, term_is_not_needed); FakeSearchable index; setupFakeSearchable(index); FakeResult expect = FakeResult() - .doc(3).score(30 * 3 + 130 * 2 * 3 + 230 * 3 * 3) - .doc(5).score(50 * 5 + 150 * 2 * 5) - .doc(7).score(70 * 7); + .doc(3).score(score_filter(30 * 3 + 130 * 2 * 3 + 230 * 3 * 3)) + .doc(5).score(score_filter(50 * 5 + 150 * 2 * 5)) + .doc(7).score(score_filter(70 * 7)); DP ws = DP().add("7", 70).add("5", 50).add("3", 30) .add("15", 150).add("13", 130) - .add("23", 230).add("100", 1000); + .add("23", 230).add("100", 1000) + .set_field_is_filter(field_is_filter) + .set_term_is_not_needed(term_is_not_needed); EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); } + +TEST("test Multi") { + TEST_DO(run_multi(false, false)); +} + +TEST("test Multi filter field") { + TEST_DO(run_multi(true, false)); +} +TEST("test Multi unranked") { + TEST_DO(run_multi(false, true)); +} + TEST_F("test Eager Empty Child", MockFixture(search::endDocId, {})) { MockSearch *mock = f1.mock; SearchIterator &search = *f1.search; @@ -210,14 +266,14 @@ private: SearchIterator::UP create(const std::vector<SearchIterator*> &children) const override { std::vector<fef::TermFieldMatchData*> no_child_match; MatchData::UP no_match_data; - return DotProductSearch::create(children, _tfmd, no_child_match, _weights, std::move(no_match_data)); + return DotProductSearch::create(children, _tfmd, false, no_child_match, _weights, std::move(no_match_data)); } }; class WeightIteratorChildrenVerifier : public search::test::DwaIteratorChildrenVerifier { private: SearchIterator::UP create(std::vector<DocumentWeightIterator> && children) const override { - return SearchIterator::UP(DotProductSearch::create(_tfmd, _weights, std::move(children))); + return SearchIterator::UP(DotProductSearch::create(_tfmd, false, _weights, std::move(children))); } }; diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp index e83226baf84..df01744d619 100644 --- a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp @@ -223,8 +223,18 @@ struct RiseWandFactory : SparseVectorFactory { struct WeightedSetFactory : SparseVectorFactory { mutable TermFieldMatchData tfmd; + bool field_is_filter; + + WeightedSetFactory(bool field_is_filter_, bool term_is_not_needed) + : tfmd(), + field_is_filter(field_is_filter_) + { + if (term_is_not_needed) { + tfmd.tagAsNotNeeded(); + } + } virtual std::string name() const override { - return vespalib::make_string("WeightedSet"); + return vespalib::make_string("WeightedSet%s%s", (field_is_filter ? "-filter" : ""), (tfmd.isNotNeeded() ? "-unranked" : "")); } SearchIterator::UP createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const override { std::vector<SearchIterator *> terms; @@ -234,14 +244,24 @@ struct WeightedSetFactory : SparseVectorFactory { terms.push_back(childFactory.createChild(i, limit).release()); weights.push_back(default_weight); } - return WeightedSetTermSearch::create(terms, tfmd, weights, MatchData::UP(nullptr)); + return WeightedSetTermSearch::create(terms, tfmd, field_is_filter, weights, MatchData::UP(nullptr)); } }; struct DotProductFactory : SparseVectorFactory { mutable TermFieldMatchData tfmd; + bool field_is_filter; + + DotProductFactory(bool field_is_filter_, bool term_is_not_needed) + : tfmd(), + field_is_filter(field_is_filter_) + { + if (term_is_not_needed) { + tfmd.tagAsNotNeeded(); + } + } virtual std::string name() const override { - return vespalib::make_string("DotProduct"); + return vespalib::make_string("DotProduct%s%s", (field_is_filter ? "-filter" : ""), (tfmd.isNotNeeded() ? "-unranked" : "")); } SearchIterator::UP createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const override { MatchDataLayout layout; @@ -258,7 +278,7 @@ struct DotProductFactory : SparseVectorFactory { childMatch.push_back(md->resolveTermField(handles[i])); weights.push_back(default_weight); } - return DotProductSearch::create(terms, tfmd, childMatch, weights, std::move(md)); + return DotProductSearch::create(terms, tfmd, field_is_filter, childMatch, weights, std::move(md)); } }; @@ -333,6 +353,7 @@ struct Result { Result run_single_benchmark(FilterStrategy &filterStrategy, SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) { SearchIterator::UP search(filterStrategy.createRoot(vectorFactory, childFactory, childCnt, limit)); SearchIterator &sb = *search; + sb.initFullRange(); uint32_t num_hits = 0; vespalib::Timer timer; for (sb.seek(1); !sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { @@ -383,13 +404,21 @@ public: void benchmark_all_operators(Setup &setup, const std::vector<uint32_t> &child_counts) { VespaWandFactory vespaWand256(256); RiseWandFactory riseWand256(256); - WeightedSetFactory weightedSet; - DotProductFactory dotProduct; + WeightedSetFactory weightedSet(false, false); + WeightedSetFactory weightedSet_filter(true, false); + WeightedSetFactory weightedSet_unranked(false, true); + DotProductFactory dotProduct(false, false); + DotProductFactory dotProduct_filter(true, false); + DotProductFactory dotProduct_unranked(false, true); OrFactory plain_or; setup.benchmark(vespaWand256, child_counts); setup.benchmark(riseWand256, child_counts); setup.benchmark(weightedSet, child_counts); + setup.benchmark(weightedSet_filter, child_counts); + setup.benchmark(weightedSet_unranked, child_counts); setup.benchmark(dotProduct, child_counts); + setup.benchmark(dotProduct_filter, child_counts); + setup.benchmark(dotProduct_unranked, child_counts); setup.benchmark(plain_or, child_counts); } diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp index 229c8b9501c..ba75d7b0da0 100644 --- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -41,8 +41,16 @@ struct WS { MatchDataLayout layout; TermFieldHandle handle; std::vector<std::pair<std::string, uint32_t> > tokens; + bool field_is_filter; + bool term_is_not_needed; - WS() : layout(), handle(layout.allocTermField(fieldId)), tokens() { + WS() + : layout(), + handle(layout.allocTermField(fieldId)), + tokens(), + field_is_filter(false), + term_is_not_needed(false) + { MatchData::UP tmp = layout.createMatchData(); ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId); } @@ -51,6 +59,8 @@ struct WS { tokens.push_back(std::make_pair(token, weight)); return *this; } + WS& set_field_is_filter(bool value) { field_is_filter = value; return *this; } + WS& set_term_is_not_needed(bool value) { term_is_not_needed = value; return *this; } Node::UP createNode() const { SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0)); @@ -74,8 +84,11 @@ struct WS { FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { FakeRequestContext requestContext; MatchData::UP md = layout.createMatchData(); + if (term_is_not_needed) { + md->resolveTermField(handle)->tagAsNotNeeded(); + } Node::UP node = createNode(); - FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle, field_is_filter)); queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); bp->fetchPostings(ExecuteInfo::create(strict)); SearchIterator::UP sb = bp->createSearch(*md, strict); @@ -123,20 +136,30 @@ struct MockFixture { mock = new MockSearch(initial); children.push_back(mock); weights.push_back(1); - search = WeightedSetTermSearch::create(children, tfmd, weights, MatchData::UP(nullptr)); + search = WeightedSetTermSearch::create(children, tfmd, false, weights, MatchData::UP(nullptr)); } }; } // namespace <unnamed> -TEST("testSimple") { +void run_simple(bool field_is_filter, bool term_is_not_needed) +{ FakeSearchable index; setupFakeSearchable(index); - FakeResult expect = FakeResult() - .doc(3).elem(0).weight(30).pos(0) - .doc(5).elem(0).weight(50).pos(0) - .doc(7).elem(0).weight(70).pos(0); - WS ws = WS().add("7", 70).add("5", 50).add("3", 30).add("100", 1000); + FakeResult expect; + if (field_is_filter || term_is_not_needed) { + expect.doc(3) + .doc(5) + .doc(7); + } else { + expect.doc(3).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + } + WS ws = WS().add("7", 70).add("5", 50).add("3", 30).add("100", 1000) + .set_field_is_filter(field_is_filter) + .set_term_is_not_needed(term_is_not_needed); +; EXPECT_TRUE(ws.isGenericSearch(index, "field", true)); EXPECT_TRUE(ws.isGenericSearch(index, "field", false)); EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true)); @@ -148,16 +171,37 @@ TEST("testSimple") { EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); } -TEST("testMulti") { +TEST("testSimple") { + TEST_DO(run_simple(false, false)); +} + +TEST("testSimple filter field") { + TEST_DO(run_simple(true, false)); +} + +TEST("testSimple unranked") { + TEST_DO(run_simple(false, true)); +} + +void run_multi(bool field_is_filter, bool term_is_not_needed) +{ FakeSearchable index; setupFakeSearchable(index); - FakeResult expect = FakeResult() - .doc(3).elem(0).weight(230).pos(0).elem(0).weight(130).pos(0).elem(0).weight(30).pos(0) - .doc(5).elem(0).weight(150).pos(0).elem(0).weight(50).pos(0) - .doc(7).elem(0).weight(70).pos(0); + FakeResult expect; + if (field_is_filter || term_is_not_needed) { + expect.doc(3) + .doc(5) + .doc(7); + } else { + expect.doc(3).elem(0).weight(230).pos(0).elem(0).weight(130).pos(0).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(150).pos(0).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + } WS ws = WS().add("7", 70).add("5", 50).add("3", 30) .add("15", 150).add("13", 130) - .add("23", 230).add("100", 1000); + .add("23", 230).add("100", 1000) + .set_field_is_filter(field_is_filter) + .set_term_is_not_needed(term_is_not_needed); EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true)); EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false)); @@ -165,6 +209,18 @@ TEST("testMulti") { EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); } +TEST("testMulti") { + TEST_DO(run_multi(false, false)); +} + +TEST("testMulti filter field") { + TEST_DO(run_multi(true, false)); +} + +TEST("testMulti unranked") { + TEST_DO(run_multi(false, true)); +} + TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) { MockSearch *mock = f1.mock; SearchIterator &search = *f1.search; @@ -194,14 +250,14 @@ TEST_F("test Eager Matching Child", MockFixture(5)) { class IteratorChildrenVerifier : public search::test::IteratorChildrenVerifier { private: SearchIterator::UP create(const std::vector<SearchIterator*> &children) const override { - return SearchIterator::UP(WeightedSetTermSearch::create(children, _tfmd, _weights, MatchData::UP(nullptr))); + return SearchIterator::UP(WeightedSetTermSearch::create(children, _tfmd, false, _weights, MatchData::UP(nullptr))); } }; class WeightIteratorChildrenVerifier : public search::test::DwaIteratorChildrenVerifier { private: SearchIterator::UP create(std::vector<DocumentWeightIterator> && children) const override { - return SearchIterator::UP(WeightedSetTermSearch::create(_tfmd, _weights, std::move(children))); + return SearchIterator::UP(WeightedSetTermSearch::create(_tfmd, false, _weights, std::move(children))); } }; |