diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-11-30 16:33:23 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-30 16:33:23 +0100 |
commit | f07ab8bb1d20ddb3457768788ee5a71d7d3c3175 (patch) | |
tree | c3b532cfbf8d649112092e7c758cb961a1607908 /searchlib/src/tests | |
parent | 075d484a13f0b7adcb593c6a7c137196505f1430 (diff) | |
parent | 7c1bae375340587aa279998f624b119bc27ad56e (diff) |
Merge pull request #29516 from vespa-engine/geirst/more-flexible-direct-multi-term-blueprint
Handle combination of both weight iterators and bitvectors in DirectM…
Diffstat (limited to 'searchlib/src/tests')
3 files changed, 266 insertions, 1 deletions
diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/CMakeLists.txt b/searchlib/src/tests/attribute/direct_multi_term_blueprint/CMakeLists.txt new file mode 100644 index 00000000000..473d977ac7a --- /dev/null +++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_direct_multi_term_blueprint_test_app TEST + SOURCES + direct_multi_term_blueprint_test.cpp + DEPENDS + searchlib + searchlib_test + GTest::GTest +) +vespa_add_test(NAME searchlib_direct_multi_term_blueprint_test_app COMMAND searchlib_direct_multi_term_blueprint_test_app) diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp new file mode 100644 index 00000000000..e86e866146a --- /dev/null +++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp @@ -0,0 +1,255 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/attribute/direct_multi_term_blueprint.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/weighted_set_term_search.h> +#include <vespa/searchlib/test/attribute_builder.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <gmock/gmock.h> +#include <numeric> + +using namespace search::attribute; +using namespace search::queryeval; +using namespace search; +using testing::StartsWith; + +struct IntegerKey : public IDirectPostingStore::LookupKey { + int64_t _value; + IntegerKey(int64_t value_in) : _value(value_in) {} + vespalib::stringref asString() const override { abort(); } + bool asInteger(int64_t& value) const override { value = _value; return true; } +}; + +const vespalib::string field_name = "test"; +constexpr uint32_t field_id = 3; +uint32_t doc_id_limit = 500; + +using Docids = std::vector<uint32_t>; + +Docids +range(uint32_t begin, uint32_t count) +{ + Docids res(count); + std::iota(res.begin(), res.end(), begin); + return res; +} + +Docids +concat(const Docids& a, const Docids& b) +{ + std::vector<uint32_t> res; + res.insert(res.end(), a.begin(), a.end()); + res.insert(res.end(), b.begin(), b.end()); + std::sort(res.begin(), res.end()); + return res; +} + +std::shared_ptr<AttributeVector> +make_attribute(bool field_is_filter) +{ + Config cfg(BasicType::INT64, CollectionType::WSET); + cfg.setFastSearch(true); + if (field_is_filter) { + cfg.setIsFilter(field_is_filter); + } + uint32_t num_docs = doc_id_limit - 1; + auto attr = test::AttributeBuilder(field_name, cfg).docs(num_docs).get(); + IntegerAttribute& real = dynamic_cast<IntegerAttribute&>(*attr); + + // Values 1 and 3 have btree (short) posting lists with weights. + real.append(10, 1, 1); + real.append(30, 3, 1); + real.append(31, 3, 1); + + // Values 100 and 300 have bitvector posting lists. + // We need at least 128 documents to get bitvector posting list (see PostingStoreBase2::resizeBitVectors()) + for (auto docid : range(100, 128)) { + real.append(docid, 100, 1); + } + for (auto docid : range(300, 128)) { + real.append(docid, 300, 1); + } + attr->commit(true); + return attr; +} + +void +expect_has_weight_iterator(const IDocidWithWeightPostingStore& store, int64_t term_value) +{ + auto snapshot = store.get_dictionary_snapshot(); + auto res = store.lookup(IntegerKey(term_value), snapshot); + EXPECT_TRUE(store.has_weight_iterator(res.posting_idx)); +} + +void +expect_has_bitvector_iterator(const IDocidWithWeightPostingStore& store, int64_t term_value) +{ + auto snapshot = store.get_dictionary_snapshot(); + auto res = store.lookup(IntegerKey(term_value), snapshot); + EXPECT_TRUE(store.has_bitvector(res.posting_idx)); +} + +void +validate_posting_lists(const IDocidWithWeightPostingStore& store) +{ + expect_has_weight_iterator(store, 1); + expect_has_weight_iterator(store, 3); + if (store.has_always_weight_iterator()) { + expect_has_weight_iterator(store, 100); + expect_has_weight_iterator(store, 300); + } + expect_has_bitvector_iterator(store, 100); + expect_has_bitvector_iterator(store, 300); +} + +class DirectMultiTermBlueprintTest : public ::testing::Test { +public: + using BlueprintType = DirectMultiTermBlueprint<WeightedSetTermSearch>; + std::shared_ptr<AttributeVector> attr; + const IDocidWithWeightPostingStore* store; + std::shared_ptr<BlueprintType> blueprint; + Blueprint::HitEstimate estimate; + fef::TermFieldMatchData tfmd; + fef::TermFieldMatchDataArray tfmda; + DirectMultiTermBlueprintTest() + : attr(), + store(), + blueprint(), + tfmd(), + tfmda() + { + tfmda.add(&tfmd); + } + void setup(bool field_is_filter, bool need_term_field_match_data) { + attr = make_attribute(field_is_filter); + store = attr->as_docid_with_weight_posting_store(); + ASSERT_TRUE(store); + validate_posting_lists(*store); + blueprint = std::make_shared<BlueprintType>(FieldSpec(field_name, field_id, fef::TermFieldHandle(), field_is_filter), *attr, *store, 2); + blueprint->setDocIdLimit(doc_id_limit); + if (need_term_field_match_data) { + tfmd.needs_normal_features(); + } else { + tfmd.tagAsNotNeeded(); + } + } + void add_term(int64_t term_value) { + blueprint->addTerm(IntegerKey(term_value), 1, estimate); + } + std::unique_ptr<SearchIterator> create_leaf_search() const { + return blueprint->createLeafSearch(tfmda, true); + } +}; + +void +expect_hits(const Docids& exp_docids, SearchIterator& itr) +{ + SimpleResult exp(exp_docids); + SimpleResult act; + act.search(itr); + EXPECT_EQ(exp, act); +} + +void +expect_or_iterator(SearchIterator& itr, size_t exp_children) +{ + auto& real = dynamic_cast<OrSearch&>(itr); + ASSERT_EQ(exp_children, real.getChildren().size()); +} + +void +expect_or_child(SearchIterator& itr, size_t child, const vespalib::string& exp_child_itr) +{ + auto& real = dynamic_cast<OrSearch&>(itr); + EXPECT_THAT(real.getChildren()[child]->asString(), StartsWith(exp_child_itr)); +} + +TEST_F(DirectMultiTermBlueprintTest, weight_iterators_used_for_none_filter_field) +{ + setup(false, true); + add_term(1); + add_term(3); + auto itr = create_leaf_search(); + EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl")); + expect_hits({10, 30, 31}, *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, weight_iterators_used_instead_of_bitvectors_for_none_filter_field) +{ + setup(false, true); + add_term(1); + add_term(100); + auto itr = create_leaf_search(); + EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl")); + expect_hits(concat({10}, range(100, 128)), *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_filter_field) +{ + setup(true, true); + add_term(1); + add_term(3); + add_term(100); + add_term(300); + auto itr = create_leaf_search(); + expect_or_iterator(*itr, 3); + expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 2, "search::queryeval::WeightedSetTermSearchImpl"); + expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field) +{ + setup(true, true); + add_term(100); + add_term(300); + auto itr = create_leaf_search(); + expect_or_iterator(*itr, 2); + expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); + expect_hits(concat(range(100, 128), range(300, 128)), *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, filter_iterator_used_for_filter_field_and_ranking_not_needed) +{ + setup(true, false); + add_term(1); + add_term(3); + auto itr = create_leaf_search(); + EXPECT_THAT(itr->asString(), StartsWith("search::attribute::DocumentWeightOrFilterSearchImpl")); + expect_hits({10, 30, 31}, *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_filter_field_and_ranking_not_needed) +{ + setup(true, false); + add_term(1); + add_term(3); + add_term(100); + add_term(300); + auto itr = create_leaf_search(); + expect_or_iterator(*itr, 3); + expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 2, "search::attribute::DocumentWeightOrFilterSearchImpl"); + expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_and_ranking_not_needed) +{ + setup(true, false); + add_term(100); + add_term(300); + auto itr = create_leaf_search(); + expect_or_iterator(*itr, 2); + expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); + expect_hits(concat(range(100, 128), range(300, 128)), *itr); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp index fffa4b3c5ba..5faead1175e 100644 --- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -292,7 +292,7 @@ private: class WeightIteratorChildrenVerifier : public search::test::DwwIteratorChildrenVerifier { private: SearchIterator::UP create(std::vector<DocidWithWeightIterator> && children) const override { - return WeightedSetTermSearch::create(_tfmd, false, _weights, std::move(children)); + return WeightedSetTermSearch::create(_tfmd, false, std::cref(_weights), std::move(children)); } }; |