From adb37ca418c465d07bc0d04be522bf49e4964cbe Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Mon, 18 Dec 2023 16:18:11 +0000 Subject: Rename DocumentWeightOrFilterSearch to MultiTermOrFilterSearch. --- searchlib/CMakeLists.txt | 2 +- .../direct_multi_term_blueprint_test.cpp | 4 +- .../CMakeLists.txt | 10 - .../document_weight_or_filter_search_test.cpp | 260 --------------------- .../multi_term_or_filter_search/CMakeLists.txt | 10 + .../multi_term_or_filter_search_test.cpp | 260 +++++++++++++++++++++ .../src/vespa/searchlib/attribute/CMakeLists.txt | 2 +- .../attribute/attribute_blueprint_factory.cpp | 5 +- .../attribute/direct_multi_term_blueprint.hpp | 4 +- .../attribute/document_weight_or_filter_search.cpp | 130 ----------- .../attribute/document_weight_or_filter_search.h | 24 -- .../attribute/multi_term_or_filter_search.cpp | 130 +++++++++++ .../attribute/multi_term_or_filter_search.h | 26 +++ .../queryeval/weighted_set_term_search.cpp | 4 +- 14 files changed, 436 insertions(+), 435 deletions(-) delete mode 100644 searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt delete mode 100644 searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp create mode 100644 searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp delete mode 100644 searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp delete mode 100644 searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 219439a1224..d46d9b57789 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -78,7 +78,6 @@ vespa_define_module( src/tests/attribute/dfa_fuzzy_matcher src/tests/attribute/direct_multi_term_blueprint src/tests/attribute/document_weight_iterator - src/tests/attribute/document_weight_or_filter_search src/tests/attribute/enum_attribute_compaction src/tests/attribute/enum_comparator src/tests/attribute/enumeratedsave @@ -87,6 +86,7 @@ vespa_define_module( src/tests/attribute/guard src/tests/attribute/imported_attribute_vector src/tests/attribute/imported_search_context + src/tests/attribute/multi_term_or_filter_search src/tests/attribute/multi_value_mapping src/tests/attribute/multi_value_read_view src/tests/attribute/posting_list_merger diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp index 0245af828dd..67b73f459c9 100644 --- a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp +++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp @@ -300,7 +300,7 @@ TEST_P(DirectMultiTermBlueprintTest, filter_iterator_used_for_filter_field_and_r add_term(1); add_term(3); auto itr = create_leaf_search(); - EXPECT_THAT(itr->asString(), StartsWith("search::attribute::DocumentWeightOrFilterSearchImpl")); + EXPECT_THAT(itr->asString(), StartsWith("search::attribute::MultiTermOrFilterSearchImpl")); expect_hits({10, 30, 31}, *itr); } @@ -315,7 +315,7 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_fil expect_or_iterator(*itr, 3); expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); - expect_or_child(*itr, 2, "search::attribute::DocumentWeightOrFilterSearchImpl"); + expect_or_child(*itr, 2, "search::attribute::MultiTermOrFilterSearchImpl"); expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr); } diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt deleted file mode 100644 index b2f86a9ddec..00000000000 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_document_weight_or_filter_search_test_app TEST - SOURCES - document_weight_or_filter_search_test.cpp - DEPENDS - searchlib - searchlib_test - GTest::GTest -) -vespa_add_test(NAME searchlib_document_weight_or_filter_search_test_app COMMAND searchlib_document_weight_or_filter_search_test_app) diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp deleted file mode 100644 index ae4812b5437..00000000000 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp +++ /dev/null @@ -1,260 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include -#include -#include -#include -#include -#define ENABLE_GTEST_MIGRATION -#include - -using PostingList = search::attribute::PostingListTraits::PostingStoreBase; -using Iterator = search::attribute::PostingListTraits::const_iterator; -using KeyData = PostingList::KeyDataType; -using search::BitVector; -using search::attribute::DocumentWeightOrFilterSearch; -using search::queryeval::SearchIterator; -using vespalib::datastore::EntryRef; - -class DocumentWeightOrFilterSearchTest : public ::testing::Test { - PostingList _postings; - vespalib::GenerationHandler _gens; - std::vector _trees; - uint32_t _range_start; - uint32_t _range_end; -public: - DocumentWeightOrFilterSearchTest(); - ~DocumentWeightOrFilterSearchTest() override; - void inc_generation(); - size_t num_trees() const { return _trees.size(); } - Iterator get_tree(size_t idx) const { - if (idx < _trees.size()) { - return _postings.beginFrozen(_trees[idx]); - } else { - return {}; - } - } - void ensure_tree(size_t idx) { - if (idx <= _trees.size()) { - _trees.resize(idx + 1); - } - } - void add_tree(size_t idx, const std::vector& keys) { - ensure_tree(idx); - std::vector adds; - std::vector removes; - adds.reserve(keys.size()); - for (auto& key : keys) { - adds.emplace_back(key, 1); - } - _postings.apply(_trees[idx], adds.data(), adds.data() + adds.size(), removes.data(), removes.data() + removes.size()); - } - - void clear_tree(size_t idx) { - if (idx < _trees.size()) { - _postings.clear(_trees[idx]); - _trees[idx] = EntryRef(); - } - } - - std::unique_ptr make_iterator() const { - std::vector iterators; - for (size_t i = 0; i < num_trees(); ++i) { - iterators.emplace_back(get_tree(i)); - } - auto result = DocumentWeightOrFilterSearch::create(std::move(iterators)); - result->initRange(_range_start, _range_end); - return result; - }; - - std::vector eval_daat(SearchIterator &iterator) const { - std::vector result; - uint32_t doc_id = _range_start; - while (doc_id < _range_end) { - if (iterator.seek(doc_id)) { - result.emplace_back(doc_id); - ++doc_id; - } else { - doc_id = std::max(doc_id + 1, iterator.getDocId()); - } - } - return result; - } - - std::vector frombv(const BitVector &bv) const { - std::vector result; - uint32_t doc_id = _range_start; - doc_id = bv.getNextTrueBit(doc_id); - while (doc_id < _range_end) { - result.emplace_back(doc_id); - ++doc_id; - doc_id = bv.getNextTrueBit(doc_id); - } - return result; - } - - std::unique_ptr tobv(const std::vector & values) const { - auto bv = BitVector::create(_range_start, _range_end); - for (auto value : values) { - bv->setBit(value); - } - bv->invalidateCachedCount(); - return bv; - } - - static void expect_result(const std::vector & exp, const std::vector & act) - { - EXPECT_EQ(exp, act); - } - - void make_sample_data() { - add_tree(0, { 10, 11 }); - add_tree(1, { 14, 17, 20 }); - add_tree(2, { 3 }); - add_tree(3, { 17 }); - } - - uint32_t get_range_start() const { return _range_start; } - void set_range(uint32_t start, uint32_t end) { - _range_start = start; - _range_end = end; - } -}; - -DocumentWeightOrFilterSearchTest::DocumentWeightOrFilterSearchTest() - : _postings(true), - _gens(), - _range_start(1), - _range_end(10000) -{ -} - -DocumentWeightOrFilterSearchTest::~DocumentWeightOrFilterSearchTest() -{ - for (auto& tree : _trees) { - _postings.clear(tree); - } - _postings.clearBuilder(); - _postings.reclaim_all_memory(); - inc_generation(); -} - -void -DocumentWeightOrFilterSearchTest::inc_generation() -{ - _postings.freeze(); - _postings.assign_generation(_gens.getCurrentGeneration()); - _gens.incGeneration(); - _postings.reclaim_memory(_gens.get_oldest_used_generation()); -} - -TEST_F(DocumentWeightOrFilterSearchTest, daat_or) -{ - make_sample_data(); - expect_result(eval_daat(*make_iterator()), { 3, 10, 11, 14, 17, 20 }); -} - -TEST_F(DocumentWeightOrFilterSearchTest, taat_get_hits) -{ - make_sample_data(); - expect_result(frombv(*make_iterator()->get_hits(get_range_start())), { 3, 10, 11, 14, 17, 20 }); -} - -TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into) -{ - make_sample_data(); - auto bv = tobv({13, 14}); - make_iterator()->or_hits_into(*bv, get_range_start()); - expect_result(frombv(*bv), { 3, 10, 11, 13, 14, 17, 20 }); -} - -TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into) -{ - make_sample_data(); - auto bv = tobv({13, 14}); - make_iterator()->and_hits_into(*bv, get_range_start()); - expect_result(frombv(*bv), { 14 }); -} - -TEST_F(DocumentWeightOrFilterSearchTest, daat_or_ranged) -{ - make_sample_data(); - set_range(4, 15); - expect_result(eval_daat(*make_iterator()), {10, 11, 14 }); -} - -TEST_F(DocumentWeightOrFilterSearchTest, taat_get_hits_ranged) -{ - make_sample_data(); - set_range(4, 15); - expect_result(frombv(*make_iterator()->get_hits(get_range_start())), { 10, 11, 14 }); -} - -TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into_ranged) -{ - make_sample_data(); - set_range(4, 15); - auto bv = tobv({13, 14}); - make_iterator()->or_hits_into(*bv, get_range_start()); - expect_result(frombv(*bv), { 10, 11, 13, 14 }); -} - -TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into_ranged) -{ - make_sample_data(); - set_range(4, 15); - auto bv = tobv({13, 14}); - make_iterator()->and_hits_into(*bv, get_range_start()); - expect_result(frombv(*bv), { 14 }); -} - -namespace { - -class Verifier : public search::test::SearchIteratorVerifier { - DocumentWeightOrFilterSearchTest &_test; -public: - Verifier(DocumentWeightOrFilterSearchTest &test, int num_trees) - : _test(test) - { - std::vector> trees(num_trees); - uint32_t tree_id = 0; - for (const auto doc_id : getExpectedDocIds()) { - trees[tree_id++ % trees.size()].emplace_back(doc_id); - } - tree_id = 0; - for (const auto &tree : trees) { - _test.add_tree(tree_id++, tree); - } - _test.inc_generation(); - } - ~Verifier() override { - for (uint32_t tree_id = 0; tree_id < _test.num_trees(); ++tree_id) { - _test.clear_tree(tree_id); - } - _test.inc_generation(); - } - std::unique_ptr create(bool) const override { - return _test.make_iterator(); - } - -}; - -TEST_F(DocumentWeightOrFilterSearchTest, iterator_conformance) -{ - { - Verifier verifier(*this, 1); - verifier.verify(); - } - { - Verifier verifier(*this, 2); - verifier.verify(); - } - { - Verifier verifier(*this, 3); - verifier.verify(); - } -} - -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt b/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt new file mode 100644 index 00000000000..4ec5d849ad3 --- /dev/null +++ b/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_multi_term_or_filter_search_test_app TEST + SOURCES + multi_term_or_filter_search_test.cpp + DEPENDS + searchlib + searchlib_test + GTest::GTest +) +vespa_add_test(NAME searchlib_multi_term_or_filter_search_test_app COMMAND searchlib_multi_term_or_filter_search_test_app) diff --git a/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp b/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp new file mode 100644 index 00000000000..dea2702ef0d --- /dev/null +++ b/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp @@ -0,0 +1,260 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#define ENABLE_GTEST_MIGRATION +#include + +using PostingList = search::attribute::PostingListTraits::PostingStoreBase; +using Iterator = search::attribute::PostingListTraits::const_iterator; +using KeyData = PostingList::KeyDataType; +using search::BitVector; +using search::attribute::MultiTermOrFilterSearch; +using search::queryeval::SearchIterator; +using vespalib::datastore::EntryRef; + +class MultiTermOrFilterSearchTest : public ::testing::Test { + PostingList _postings; + vespalib::GenerationHandler _gens; + std::vector _trees; + uint32_t _range_start; + uint32_t _range_end; +public: + MultiTermOrFilterSearchTest(); + ~MultiTermOrFilterSearchTest() override; + void inc_generation(); + size_t num_trees() const { return _trees.size(); } + Iterator get_tree(size_t idx) const { + if (idx < _trees.size()) { + return _postings.beginFrozen(_trees[idx]); + } else { + return {}; + } + } + void ensure_tree(size_t idx) { + if (idx <= _trees.size()) { + _trees.resize(idx + 1); + } + } + void add_tree(size_t idx, const std::vector& keys) { + ensure_tree(idx); + std::vector adds; + std::vector removes; + adds.reserve(keys.size()); + for (auto& key : keys) { + adds.emplace_back(key, 1); + } + _postings.apply(_trees[idx], adds.data(), adds.data() + adds.size(), removes.data(), removes.data() + removes.size()); + } + + void clear_tree(size_t idx) { + if (idx < _trees.size()) { + _postings.clear(_trees[idx]); + _trees[idx] = EntryRef(); + } + } + + std::unique_ptr make_iterator() const { + std::vector iterators; + for (size_t i = 0; i < num_trees(); ++i) { + iterators.emplace_back(get_tree(i)); + } + auto result = MultiTermOrFilterSearch::create(std::move(iterators)); + result->initRange(_range_start, _range_end); + return result; + }; + + std::vector eval_daat(SearchIterator &iterator) const { + std::vector result; + uint32_t doc_id = _range_start; + while (doc_id < _range_end) { + if (iterator.seek(doc_id)) { + result.emplace_back(doc_id); + ++doc_id; + } else { + doc_id = std::max(doc_id + 1, iterator.getDocId()); + } + } + return result; + } + + std::vector frombv(const BitVector &bv) const { + std::vector result; + uint32_t doc_id = _range_start; + doc_id = bv.getNextTrueBit(doc_id); + while (doc_id < _range_end) { + result.emplace_back(doc_id); + ++doc_id; + doc_id = bv.getNextTrueBit(doc_id); + } + return result; + } + + std::unique_ptr tobv(const std::vector & values) const { + auto bv = BitVector::create(_range_start, _range_end); + for (auto value : values) { + bv->setBit(value); + } + bv->invalidateCachedCount(); + return bv; + } + + static void expect_result(const std::vector & exp, const std::vector & act) + { + EXPECT_EQ(exp, act); + } + + void make_sample_data() { + add_tree(0, { 10, 11 }); + add_tree(1, { 14, 17, 20 }); + add_tree(2, { 3 }); + add_tree(3, { 17 }); + } + + uint32_t get_range_start() const { return _range_start; } + void set_range(uint32_t start, uint32_t end) { + _range_start = start; + _range_end = end; + } +}; + +MultiTermOrFilterSearchTest::MultiTermOrFilterSearchTest() + : _postings(true), + _gens(), + _range_start(1), + _range_end(10000) +{ +} + +MultiTermOrFilterSearchTest::~MultiTermOrFilterSearchTest() +{ + for (auto& tree : _trees) { + _postings.clear(tree); + } + _postings.clearBuilder(); + _postings.reclaim_all_memory(); + inc_generation(); +} + +void +MultiTermOrFilterSearchTest::inc_generation() +{ + _postings.freeze(); + _postings.assign_generation(_gens.getCurrentGeneration()); + _gens.incGeneration(); + _postings.reclaim_memory(_gens.get_oldest_used_generation()); +} + +TEST_F(MultiTermOrFilterSearchTest, daat_or) +{ + make_sample_data(); + expect_result(eval_daat(*make_iterator()), { 3, 10, 11, 14, 17, 20 }); +} + +TEST_F(MultiTermOrFilterSearchTest, taat_get_hits) +{ + make_sample_data(); + expect_result(frombv(*make_iterator()->get_hits(get_range_start())), { 3, 10, 11, 14, 17, 20 }); +} + +TEST_F(MultiTermOrFilterSearchTest, taat_or_hits_into) +{ + make_sample_data(); + auto bv = tobv({13, 14}); + make_iterator()->or_hits_into(*bv, get_range_start()); + expect_result(frombv(*bv), { 3, 10, 11, 13, 14, 17, 20 }); +} + +TEST_F(MultiTermOrFilterSearchTest, taat_and_hits_into) +{ + make_sample_data(); + auto bv = tobv({13, 14}); + make_iterator()->and_hits_into(*bv, get_range_start()); + expect_result(frombv(*bv), { 14 }); +} + +TEST_F(MultiTermOrFilterSearchTest, daat_or_ranged) +{ + make_sample_data(); + set_range(4, 15); + expect_result(eval_daat(*make_iterator()), {10, 11, 14 }); +} + +TEST_F(MultiTermOrFilterSearchTest, taat_get_hits_ranged) +{ + make_sample_data(); + set_range(4, 15); + expect_result(frombv(*make_iterator()->get_hits(get_range_start())), { 10, 11, 14 }); +} + +TEST_F(MultiTermOrFilterSearchTest, taat_or_hits_into_ranged) +{ + make_sample_data(); + set_range(4, 15); + auto bv = tobv({13, 14}); + make_iterator()->or_hits_into(*bv, get_range_start()); + expect_result(frombv(*bv), { 10, 11, 13, 14 }); +} + +TEST_F(MultiTermOrFilterSearchTest, taat_and_hits_into_ranged) +{ + make_sample_data(); + set_range(4, 15); + auto bv = tobv({13, 14}); + make_iterator()->and_hits_into(*bv, get_range_start()); + expect_result(frombv(*bv), { 14 }); +} + +namespace { + +class Verifier : public search::test::SearchIteratorVerifier { + MultiTermOrFilterSearchTest &_test; +public: + Verifier(MultiTermOrFilterSearchTest &test, int num_trees) + : _test(test) + { + std::vector> trees(num_trees); + uint32_t tree_id = 0; + for (const auto doc_id : getExpectedDocIds()) { + trees[tree_id++ % trees.size()].emplace_back(doc_id); + } + tree_id = 0; + for (const auto &tree : trees) { + _test.add_tree(tree_id++, tree); + } + _test.inc_generation(); + } + ~Verifier() override { + for (uint32_t tree_id = 0; tree_id < _test.num_trees(); ++tree_id) { + _test.clear_tree(tree_id); + } + _test.inc_generation(); + } + std::unique_ptr create(bool) const override { + return _test.make_iterator(); + } + +}; + +TEST_F(MultiTermOrFilterSearchTest, iterator_conformance) +{ + { + Verifier verifier(*this, 1); + verifier.verify(); + } + { + Verifier verifier(*this, 2); + verifier.verify(); + } + { + Verifier verifier(*this, 3); + verifier.verify(); + } +} + +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index f80e8dbe7be..96bd07bc5a3 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -43,7 +43,6 @@ vespa_add_library(searchlib_attribute OBJECT direct_multi_term_blueprint.cpp distance_metric_utils.cpp diversity.cpp - document_weight_or_filter_search.cpp empty_search_context.cpp enum_store_compaction_spec.cpp enum_store_dictionary.cpp @@ -84,6 +83,7 @@ vespa_add_library(searchlib_attribute OBJECT multi_numeric_search_context.cpp multi_string_enum_hint_search_context.cpp multi_string_enum_search_context.cpp + multi_term_or_filter_search.cpp multi_value_mapping.cpp multi_value_mapping_base.cpp multienumattribute.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 3a5f79ef665..b9adcf3b093 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -5,9 +5,8 @@ #include "attribute_object_visitor.h" #include "attribute_weighted_set_blueprint.h" #include "direct_multi_term_blueprint.h" -#include "document_weight_or_filter_search.h" #include "i_direct_posting_store.h" -#include "posting_iterator_pack.h" +#include "multi_term_or_filter_search.h" #include "predicate_attribute.h" #include #include @@ -468,7 +467,7 @@ DirectWandBlueprint::createFilterSearch(bool, FilterConstraint constraint) const for (const IDirectPostingStore::LookupResult &r : _terms) { _attr.create(r.posting_idx, iterators); } - return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators)); + return attribute::MultiTermOrFilterSearch::create(std::move(iterators)); } else { return std::make_unique(); } diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp index 5ca943a356d..bb6804f22f1 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp @@ -3,7 +3,7 @@ #pragma once #include "direct_multi_term_blueprint.h" -#include "document_weight_or_filter_search.h" +#include "multi_term_or_filter_search.h" #include #include #include @@ -101,7 +101,7 @@ DirectMultiTermBlueprint::create_search_helper(con bool use_bit_vector_when_available = is_filter_search || !_attr.has_always_btree_iterator(); auto weights = create_iterators(btree_iterators, bitvectors, use_bit_vector_when_available, *tfmda[0], strict); if (is_filter_search) { - auto filter = !btree_iterators.empty() ? attribute::DocumentWeightOrFilterSearch::create(std::move(btree_iterators)) : std::unique_ptr(); + auto filter = !btree_iterators.empty() ? attribute::MultiTermOrFilterSearch::create(std::move(btree_iterators)) : std::unique_ptr(); return combine_iterators(std::move(filter), std::move(bitvectors), strict); } bool field_is_filter = getState().fields()[0].isFilter(); diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp deleted file mode 100644 index b910e64b665..00000000000 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "document_weight_or_filter_search.h" -#include "posting_iterator_pack.h" -#include -#include -#include -#include - -using search::queryeval::SearchIteratorPack; - -namespace search::attribute { - -template -class DocumentWeightOrFilterSearchImpl : public DocumentWeightOrFilterSearch -{ - IteratorPack _children; - void seek_all(uint32_t docId); -public: - explicit DocumentWeightOrFilterSearchImpl(IteratorPack&& children); - ~DocumentWeightOrFilterSearchImpl() override; - - void doSeek(uint32_t docId) override; - - void doUnpack(uint32_t) override { } - - void initRange(uint32_t begin, uint32_t end) override { - SearchIterator::initRange(begin, end); - _children.initRange(begin, end); - } - - void or_hits_into(BitVector &result, uint32_t begin_id) override { - return _children.or_hits_into(result, begin_id); - } - - void and_hits_into(BitVector &result, uint32_t begin_id) override { - return result.andWith(*get_hits(begin_id)); - } - - std::unique_ptr get_hits(uint32_t begin_id) override { - seek_all(getDocId()); - return _children.get_hits(begin_id, getEndId()); - } - - Trinary is_strict() const override { return Trinary::True; } -}; - -template -DocumentWeightOrFilterSearchImpl::DocumentWeightOrFilterSearchImpl(IteratorPack&& children) - : DocumentWeightOrFilterSearch(), - _children(std::move(children)) -{ -} - -template -DocumentWeightOrFilterSearchImpl::~DocumentWeightOrFilterSearchImpl() = default; - -template -void -DocumentWeightOrFilterSearchImpl::seek_all(uint32_t docId) { - for (uint16_t i = 0; i < _children.size(); ++i) { - uint32_t next = _children.get_docid(i); - if (next < docId) { - _children.seek(i, docId); - } - } -} - -template -void -DocumentWeightOrFilterSearchImpl::doSeek(uint32_t docId) -{ - uint32_t min_doc_id = endDocId; - for (uint16_t i = 0; i < _children.size(); ++i) { - uint32_t next = _children.get_docid(i); - if (next < docId) { - next = _children.seek(i, docId); - } - if (next == docId) { - setDocId(next); - return; - } - min_doc_id = std::min(min_doc_id, next); - } - setDocId(min_doc_id); -} - -namespace { - -template -std::unique_ptr -create_helper(std::vector&& children) -{ - if (children.empty()) { - return std::make_unique(); - } else { - std::sort(children.begin(), children.end(), - [](const auto & a, const auto & b) { return a.size() > b.size(); }); - using OrFilter = DocumentWeightOrFilterSearchImpl; - return std::make_unique(IteratorPackType(std::move(children))); - } -} - -} - -std::unique_ptr -DocumentWeightOrFilterSearch::create(std::vector&& children) -{ - return create_helper(std::move(children)); -} - -std::unique_ptr -DocumentWeightOrFilterSearch::create(std::vector&& children) -{ - return create_helper(std::move(children)); -} - -std::unique_ptr -DocumentWeightOrFilterSearch::create(const std::vector& children, - std::unique_ptr md) -{ - if (children.empty()) { - return std::make_unique(); - } else { - using OrFilter = DocumentWeightOrFilterSearchImpl; - return std::make_unique(SearchIteratorPack(children, std::move(md))); - } -} - -} diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h deleted file mode 100644 index 5ed0dd16d83..00000000000 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "i_direct_posting_store.h" -#include - -namespace search::fef { class MatchData; } -namespace search::attribute { - -/** - * Filter iterator on top of document weight iterators with OR semantics used during - * calculation of global filter for weighted set terms, wand terms and dot product terms. - */ -class DocumentWeightOrFilterSearch : public queryeval::SearchIterator -{ -protected: - DocumentWeightOrFilterSearch() = default; -public: - static std::unique_ptr create(std::vector&& children); - static std::unique_ptr create(std::vector&& children); - static std::unique_ptr create(const std::vector& children, - std::unique_ptr md); -}; - -} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp new file mode 100644 index 00000000000..19668522e17 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp @@ -0,0 +1,130 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "multi_term_or_filter_search.h" +#include "posting_iterator_pack.h" +#include +#include +#include +#include + +using search::queryeval::SearchIteratorPack; + +namespace search::attribute { + +template +class MultiTermOrFilterSearchImpl : public MultiTermOrFilterSearch +{ + IteratorPack _children; + void seek_all(uint32_t docId); +public: + explicit MultiTermOrFilterSearchImpl(IteratorPack&& children); + ~MultiTermOrFilterSearchImpl() override; + + void doSeek(uint32_t docId) override; + + void doUnpack(uint32_t) override { } + + void initRange(uint32_t begin, uint32_t end) override { + SearchIterator::initRange(begin, end); + _children.initRange(begin, end); + } + + void or_hits_into(BitVector &result, uint32_t begin_id) override { + return _children.or_hits_into(result, begin_id); + } + + void and_hits_into(BitVector &result, uint32_t begin_id) override { + return result.andWith(*get_hits(begin_id)); + } + + std::unique_ptr get_hits(uint32_t begin_id) override { + seek_all(getDocId()); + return _children.get_hits(begin_id, getEndId()); + } + + Trinary is_strict() const override { return Trinary::True; } +}; + +template +MultiTermOrFilterSearchImpl::MultiTermOrFilterSearchImpl(IteratorPack&& children) + : MultiTermOrFilterSearch(), + _children(std::move(children)) +{ +} + +template +MultiTermOrFilterSearchImpl::~MultiTermOrFilterSearchImpl() = default; + +template +void +MultiTermOrFilterSearchImpl::seek_all(uint32_t docId) { + for (uint16_t i = 0; i < _children.size(); ++i) { + uint32_t next = _children.get_docid(i); + if (next < docId) { + _children.seek(i, docId); + } + } +} + +template +void +MultiTermOrFilterSearchImpl::doSeek(uint32_t docId) +{ + uint32_t min_doc_id = endDocId; + for (uint16_t i = 0; i < _children.size(); ++i) { + uint32_t next = _children.get_docid(i); + if (next < docId) { + next = _children.seek(i, docId); + } + if (next == docId) { + setDocId(next); + return; + } + min_doc_id = std::min(min_doc_id, next); + } + setDocId(min_doc_id); +} + +namespace { + +template +std::unique_ptr +create_helper(std::vector&& children) +{ + if (children.empty()) { + return std::make_unique(); + } else { + std::sort(children.begin(), children.end(), + [](const auto & a, const auto & b) { return a.size() > b.size(); }); + using OrFilter = MultiTermOrFilterSearchImpl; + return std::make_unique(IteratorPackType(std::move(children))); + } +} + +} + +std::unique_ptr +MultiTermOrFilterSearch::create(std::vector&& children) +{ + return create_helper(std::move(children)); +} + +std::unique_ptr +MultiTermOrFilterSearch::create(std::vector&& children) +{ + return create_helper(std::move(children)); +} + +std::unique_ptr +MultiTermOrFilterSearch::create(const std::vector& children, + std::unique_ptr md) +{ + if (children.empty()) { + return std::make_unique(); + } else { + using OrFilter = MultiTermOrFilterSearchImpl; + return std::make_unique(SearchIteratorPack(children, std::move(md))); + } +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h new file mode 100644 index 00000000000..42eb33d2eed --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h @@ -0,0 +1,26 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "i_direct_posting_store.h" +#include + +namespace search::fef { class MatchData; } +namespace search::attribute { + +/** + * Filter iterator on top of low-level posting list iterators or regular search iterators with OR semantics. + * + * Used during calculation of global filter for InTerm, WeightedSetTerm, DotProduct and WandTerm, + * or when ranking is not needed for InTerm and WeightedSetTerm. + */ +class MultiTermOrFilterSearch : public queryeval::SearchIterator +{ +protected: + MultiTermOrFilterSearch() = default; +public: + static std::unique_ptr create(std::vector&& children); + static std::unique_ptr create(std::vector&& children); + static std::unique_ptr create(const std::vector& children, + std::unique_ptr md); +}; + +} diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp index 1cecbca7660..0929f80a8f0 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp @@ -2,7 +2,7 @@ #include "weighted_set_term_search.h" #include -#include +#include #include #include @@ -175,7 +175,7 @@ WeightedSetTermSearch::create(const std::vector &children, using HeapImpl = WeightedSetTermSearchImpl; if (tmd.isNotNeeded()) { - return attribute::DocumentWeightOrFilterSearch::create(children, std::move(match_data)); + return attribute::MultiTermOrFilterSearch::create(children, std::move(match_data)); } if (children.size() < 128) { -- cgit v1.2.3