diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-15 10:23:18 +0100 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-15 09:56:25 +0000 |
commit | d4918f2ce956e316b6ce398f144c53f52e3288da (patch) | |
tree | 6289a1ef92d82b9cc274c1fdd7decbec29bf1cd6 /searchlib/src/tests/attribute | |
parent | 48b1bae2a6cdf58a237aa7be59632a06aba86861 (diff) | |
parent | 252fbeed13b8622fbc813620dc3b4e45abc6bbe2 (diff) |
Merge branch 'master' into balder/sliced-parallell-or
Diffstat (limited to 'searchlib/src/tests/attribute')
15 files changed, 527 insertions, 358 deletions
diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp index 181c0fdf110..f612bdda87f 100644 --- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -7,7 +7,7 @@ #include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/parsequery/parse.h> -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> #include <vespa/searchlib/queryeval/executeinfo.h> #include <vespa/searchlib/test/searchiteratorverifier.h> #include <vespa/searchlib/util/randomgenerator.h> @@ -432,7 +432,7 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre const auto* dww = v->as_docid_with_weight_posting_store(); if (dww != nullptr) { auto lres = dww->lookup(getSearchStr<VectorType>(), dww->get_dictionary_snapshot()); - using DWSI = search::queryeval::DocumentWeightSearchIterator; + using DWSI = search::queryeval::DocidWithWeightSearchIterator; TermFieldMatchData md; auto dwsi = std::make_unique<DWSI>(md, *dww, lres); if (!filter) { diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp index f2341d0968e..899ddaa3cc0 100644 --- a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp +++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp @@ -3,7 +3,9 @@ #include <vespa/searchlib/attribute/direct_multi_term_blueprint.h> #include <vespa/searchlib/attribute/i_docid_posting_store.h> #include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h> +#include <vespa/searchlib/attribute/in_term_search.h> #include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/stringbase.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/queryeval/orsearch.h> #include <vespa/searchlib/queryeval/searchiterator.h> @@ -19,13 +21,22 @@ using namespace search::queryeval; using namespace search; using testing::StartsWith; -struct IntegerKey : public IDirectPostingStore::LookupKey { +using LookupKey = IDirectPostingStore::LookupKey; + +struct IntegerKey : public LookupKey { int64_t _value; IntegerKey(int64_t value_in) : _value(value_in) {} vespalib::stringref asString() const override { abort(); } bool asInteger(int64_t& value) const override { value = _value; return true; } }; +struct StringKey : public LookupKey { + vespalib::string _value; + StringKey(int64_t value_in) : _value(std::to_string(value_in)) {} + vespalib::stringref asString() const override { return _value; } + bool asInteger(int64_t&) const override { abort(); } +}; + const vespalib::string field_name = "test"; constexpr uint32_t field_id = 3; uint32_t doc_id_limit = 500; @@ -50,112 +61,153 @@ concat(const Docids& a, const Docids& b) return res; } +template <typename AttributeType, typename DataType> +void +populate_attribute(AttributeType& attr, const std::vector<DataType>& values) +{ + // Values 0 and 1 have btree (short) posting lists. + attr.update(10, values[0]); + attr.update(30, values[1]); + attr.update(31, values[1]); + + // Values 2 and 3 have bitvector posting lists. + // We need at least 128 documents to get bitvector posting list (see PostingStoreBase2::resizeBitVectors()) + for (auto docid : range(100, 128)) { + attr.update(docid, values[2]); + } + for (auto docid : range(300, 128)) { + attr.update(docid, values[3]); + } + attr.commit(true); +} + std::shared_ptr<AttributeVector> -make_attribute(bool field_is_filter, CollectionType col_type) +make_attribute(CollectionType col_type, BasicType type, bool field_is_filter) { - Config cfg(BasicType::INT64, col_type); + Config cfg(type, col_type); cfg.setFastSearch(true); if (field_is_filter) { cfg.setIsFilter(field_is_filter); } uint32_t num_docs = doc_id_limit - 1; auto attr = test::AttributeBuilder(field_name, cfg).docs(num_docs).get(); - IntegerAttribute& real = dynamic_cast<IntegerAttribute&>(*attr); - - // Values 1 and 3 have btree (short) posting lists with weights. - real.update(10, 1); - real.update(30, 3); - real.update(31, 3); - - // Values 100 and 300 have bitvector posting lists. - // We need at least 128 documents to get bitvector posting list (see PostingStoreBase2::resizeBitVectors()) - for (auto docid : range(100, 128)) { - real.update(docid, 100); - } - for (auto docid : range(300, 128)) { - real.update(docid, 300); + if (type == BasicType::STRING) { + populate_attribute<StringAttribute, vespalib::string>(dynamic_cast<StringAttribute&>(*attr), + {"1", "3", "100", "300"}); + } else { + populate_attribute<IntegerAttribute, int64_t>(dynamic_cast<IntegerAttribute&>(*attr), + {1, 3, 100, 300}); } - attr->commit(true); return attr; } void -expect_has_btree_iterator(const IDirectPostingStore& store, int64_t term_value) +expect_has_btree_iterator(const IDirectPostingStore& store, const LookupKey& key) { auto snapshot = store.get_dictionary_snapshot(); - auto res = store.lookup(IntegerKey(term_value), snapshot); + auto res = store.lookup(key, snapshot); EXPECT_TRUE(store.has_btree_iterator(res.posting_idx)); } void -expect_has_bitvector_iterator(const IDirectPostingStore& store, int64_t term_value) +expect_has_bitvector_iterator(const IDirectPostingStore& store, const LookupKey& key) { auto snapshot = store.get_dictionary_snapshot(); - auto res = store.lookup(IntegerKey(term_value), snapshot); + auto res = store.lookup(key, snapshot); EXPECT_TRUE(store.has_bitvector(res.posting_idx)); } +template <typename LookupKeyType> void validate_posting_lists(const IDirectPostingStore& store) { - expect_has_btree_iterator(store, 1); - expect_has_btree_iterator(store, 3); + expect_has_btree_iterator(store, LookupKeyType(1)); + expect_has_btree_iterator(store, LookupKeyType(3)); if (store.has_always_btree_iterator()) { - expect_has_btree_iterator(store, 100); - expect_has_btree_iterator(store, 300); + expect_has_btree_iterator(store, LookupKeyType(100)); + expect_has_btree_iterator(store, LookupKeyType(300)); } - expect_has_bitvector_iterator(store, 100); - expect_has_bitvector_iterator(store, 300); + expect_has_bitvector_iterator(store, LookupKeyType(100)); + expect_has_bitvector_iterator(store, LookupKeyType(300)); } +enum OperatorType { + In, + WSet +}; + struct TestParam { + OperatorType op_type; CollectionType col_type; - TestParam(CollectionType col_type_in) : col_type(col_type_in) {} + BasicType type; + TestParam(OperatorType op_type_in, CollectionType col_type_in, BasicType type_in) + : op_type(op_type_in), col_type(col_type_in), type(type_in) {} ~TestParam() = default; }; std::ostream& operator<<(std::ostream& os, const TestParam& param) { - os << param.col_type.asString(); + os << (param.op_type == OperatorType::In ? "in_" : "wset_") << param.col_type.asString() << "_" << param.type.asString(); return os; } +using SingleInBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, InTermSearch>; +using MultiInBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, InTermSearch>; +using SingleWSetBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, WeightedSetTermSearch>; +using MultiWSetBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, WeightedSetTermSearch>; + class DirectMultiTermBlueprintTest : public ::testing::TestWithParam<TestParam> { public: - using SingleValueBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, WeightedSetTermSearch>; - using MultiValueBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, WeightedSetTermSearch>; std::shared_ptr<AttributeVector> attr; - std::shared_ptr<SingleValueBlueprintType> single_blueprint; - std::shared_ptr<MultiValueBlueprintType> multi_blueprint; - queryeval::ComplexLeafBlueprint* blueprint; + bool in_operator; + bool single_type; + bool integer_type; + std::shared_ptr<ComplexLeafBlueprint> blueprint; Blueprint::HitEstimate estimate; fef::TermFieldMatchData tfmd; fef::TermFieldMatchDataArray tfmda; DirectMultiTermBlueprintTest() : attr(), - single_blueprint(), - multi_blueprint(), + in_operator(true), + single_type(true), + integer_type(true), blueprint(), tfmd(), tfmda() { tfmda.add(&tfmd); } + ~DirectMultiTermBlueprintTest() {} void setup(bool field_is_filter, bool need_term_field_match_data) { - attr = make_attribute(field_is_filter, GetParam().col_type); + attr = make_attribute(GetParam().col_type, GetParam().type, field_is_filter); + in_operator = GetParam().op_type == OperatorType::In; + single_type = GetParam().col_type == CollectionType::SINGLE; + integer_type = GetParam().type != BasicType::STRING; FieldSpec spec(field_name, field_id, fef::TermFieldHandle(), field_is_filter); - if (GetParam().col_type == CollectionType::SINGLE) { - const auto* store = attr->as_docid_posting_store(); - ASSERT_TRUE(store); - validate_posting_lists(*store); - single_blueprint = std::make_shared<SingleValueBlueprintType>(spec, *attr, *store, 2); - blueprint = single_blueprint.get(); + const IDirectPostingStore* store; + if (single_type) { + auto real_store = attr->as_docid_posting_store(); + ASSERT_TRUE(real_store); + if (in_operator) { + blueprint = std::make_shared<SingleInBlueprintType>(spec, *attr, *real_store, 2); + } else { + blueprint = std::make_shared<SingleWSetBlueprintType>(spec, *attr, *real_store, 2); + } + store = real_store; + } else { + auto real_store = attr->as_docid_with_weight_posting_store(); + ASSERT_TRUE(real_store); + if (in_operator) { + blueprint = std::make_shared<MultiInBlueprintType>(spec, *attr, *real_store, 2); + } else { + blueprint = std::make_shared<MultiWSetBlueprintType>(spec, *attr, *real_store, 2); + } + store = real_store; + } + if (integer_type) { + validate_posting_lists<IntegerKey>(*store); } else { - const auto* store = attr->as_docid_with_weight_posting_store(); - ASSERT_TRUE(store); - validate_posting_lists(*store); - multi_blueprint = std::make_shared<MultiValueBlueprintType>(spec, *attr, *store, 2); - blueprint = multi_blueprint.get(); + validate_posting_lists<StringKey>(*store); } blueprint->setDocIdLimit(doc_id_limit); if (need_term_field_match_data) { @@ -164,16 +216,35 @@ public: tfmd.tagAsNotNeeded(); } } + template <typename BlueprintType> + void add_term_helper(BlueprintType& b, int64_t term_value) { + if (integer_type) { + b.addTerm(IntegerKey(term_value), 1, estimate); + } else { + b.addTerm(StringKey(term_value), 1, estimate); + } + } void add_term(int64_t term_value) { - if (single_blueprint) { - single_blueprint->addTerm(IntegerKey(term_value), 1, estimate); + if (single_type) { + if (in_operator) { + add_term_helper(dynamic_cast<SingleInBlueprintType&>(*blueprint), term_value); + } else { + add_term_helper(dynamic_cast<SingleWSetBlueprintType&>(*blueprint), term_value); + } } else { - multi_blueprint->addTerm(IntegerKey(term_value), 1, estimate); + if (in_operator) { + add_term_helper(dynamic_cast<MultiInBlueprintType&>(*blueprint), term_value); + } else { + add_term_helper(dynamic_cast<MultiWSetBlueprintType&>(*blueprint), term_value); + } } } std::unique_ptr<SearchIterator> create_leaf_search() const { return blueprint->createLeafSearch(tfmda, true); } + vespalib::string multi_term_iterator() const { + return in_operator ? "search::attribute::MultiTermOrFilterSearchImpl" : "search::queryeval::WeightedSetTermSearchImpl"; + } }; void @@ -201,30 +272,54 @@ expect_or_child(SearchIterator& itr, size_t child, const vespalib::string& exp_c INSTANTIATE_TEST_SUITE_P(DefaultInstantiation, DirectMultiTermBlueprintTest, - testing::Values(CollectionType::SINGLE, CollectionType::WSET), + testing::Values(TestParam(OperatorType::In, CollectionType::SINGLE, BasicType::INT64), + TestParam(OperatorType::In, CollectionType::SINGLE, BasicType::STRING), + TestParam(OperatorType::In, CollectionType::WSET, BasicType::INT64), + TestParam(OperatorType::In, CollectionType::WSET, BasicType::STRING), + TestParam(OperatorType::WSet, CollectionType::SINGLE, BasicType::INT64), + TestParam(OperatorType::WSet, CollectionType::SINGLE, BasicType::STRING), + TestParam(OperatorType::WSet, CollectionType::WSET, BasicType::INT64), + TestParam(OperatorType::WSet, CollectionType::WSET, BasicType::STRING)), testing::PrintToStringParamName()); -TEST_P(DirectMultiTermBlueprintTest, weight_iterators_used_for_none_filter_field) -{ +TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_for_none_filter_field) { setup(false, true); add_term(1); add_term(3); auto itr = create_leaf_search(); - EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl")); + EXPECT_THAT(itr->asString(), StartsWith(multi_term_iterator())); expect_hits({10, 30, 31}, *itr); } -TEST_P(DirectMultiTermBlueprintTest, weight_iterators_used_instead_of_bitvectors_for_none_filter_field) +TEST_P(DirectMultiTermBlueprintTest, bitvectors_used_instead_of_btree_iterators_for_none_filter_field) +{ + setup(false, true); + if (!in_operator) { + return; + } + add_term(1); + add_term(100); + auto itr = create_leaf_search(); + expect_or_iterator(*itr, 2); + expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 1, multi_term_iterator()); + expect_hits(concat({10}, range(100, 128)), *itr); +} + +TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_instead_of_bitvectors_for_none_filter_field) { setup(false, true); + if (in_operator) { + return; + } add_term(1); add_term(100); auto itr = create_leaf_search(); - EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl")); + EXPECT_THAT(itr->asString(), StartsWith(multi_term_iterator())); expect_hits(concat({10}, range(100, 128)), *itr); } -TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_filter_field) +TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_btree_iterators_used_for_filter_field) { setup(true, true); add_term(1); @@ -235,7 +330,7 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_fi expect_or_iterator(*itr, 3); expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); - expect_or_child(*itr, 2, "search::queryeval::WeightedSetTermSearchImpl"); + expect_or_child(*itr, 2, multi_term_iterator()); expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr); } @@ -251,17 +346,17 @@ TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field) expect_hits(concat(range(100, 128), range(300, 128)), *itr); } -TEST_P(DirectMultiTermBlueprintTest, filter_iterator_used_for_filter_field_and_ranking_not_needed) +TEST_P(DirectMultiTermBlueprintTest, or_filter_iterator_used_for_filter_field_when_ranking_not_needed) { setup(true, false); add_term(1); add_term(3); auto itr = create_leaf_search(); - EXPECT_THAT(itr->asString(), StartsWith("search::attribute::DocumentWeightOrFilterSearchImpl")); + EXPECT_THAT(itr->asString(), StartsWith("search::attribute::MultiTermOrFilterSearchImpl")); expect_hits({10, 30, 31}, *itr); } -TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_filter_field_and_ranking_not_needed) +TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_or_filter_iterator_used_for_filter_field_when_ranking_not_needed) { setup(true, false); add_term(1); @@ -272,11 +367,11 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_fil expect_or_iterator(*itr, 3); expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); - expect_or_child(*itr, 2, "search::attribute::DocumentWeightOrFilterSearchImpl"); + expect_or_child(*itr, 2, "search::attribute::MultiTermOrFilterSearchImpl"); expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr); } -TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_and_ranking_not_needed) +TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_when_ranking_not_needed) { setup(true, false); add_term(100); diff --git a/searchlib/src/tests/attribute/direct_posting_store/.gitignore b/searchlib/src/tests/attribute/direct_posting_store/.gitignore new file mode 100644 index 00000000000..5516bc721c7 --- /dev/null +++ b/searchlib/src/tests/attribute/direct_posting_store/.gitignore @@ -0,0 +1 @@ +searchlib_direct_posting_store_test_app diff --git a/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt b/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt new file mode 100644 index 00000000000..3c8e76bc9b2 --- /dev/null +++ b/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_direct_posting_store_test_app TEST + SOURCES + direct_posting_store_test.cpp + DEPENDS + searchlib + searchlib_test + GTest::GTest +) +vespa_add_test(NAME searchlib_direct_posting_store_test_app COMMAND searchlib_direct_posting_store_test_app) diff --git a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp new file mode 100644 index 00000000000..c1e12580559 --- /dev/null +++ b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp @@ -0,0 +1,297 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcommon/attribute/config.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attribute_read_guard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributememorysavetarget.h> +#include <vespa/searchlib/attribute/i_docid_posting_store.h> +#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> +#define ENABLE_GTEST_MIGRATION +#include <vespa/searchlib/test/searchiteratorverifier.h> +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/test/insertion_operators.h> + +#include <vespa/log/log.h> +LOG_SETUP("direct_posting_store_test"); + +using namespace search; +using namespace search::attribute; + +AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) { + Config cfg(type, collection); + cfg.setFastSearch(fast_search); + return AttributeFactory::createAttribute("my_attribute", cfg); +} + +void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) { + AttributeVector::DocId docid; + for (size_t i = 0; i < limit; ++i) { + attr_ptr->addDoc(docid); + } + attr_ptr->commit(); + ASSERT_EQ((limit - 1), docid); +} + +template <typename ATTR, typename KEY> +void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) { + attr->clearDoc(docid); + if (attr->getCollectionType() == CollectionType::SINGLE) { + attr->update(docid, key); + } else { + attr->append(docid, key, weight); + } + attr->commit(); +} + +void populate_long(AttributeVector::SP attr_ptr) { + IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get()); + set_doc(attr, 1, int64_t(111), 20); + set_doc(attr, 5, int64_t(111), 5); + set_doc(attr, 7, int64_t(111), 10); +} + +void populate_string(AttributeVector::SP attr_ptr) { + StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get()); + set_doc(attr, 1, "foo", 20); + set_doc(attr, 5, "foo", 5); + set_doc(attr, 7, "foo", 10); +} + +struct TestParam { + CollectionType col_type; + BasicType type; + const char* valid_term; + const char* invalid_term; + TestParam(CollectionType col_type_in, BasicType type_in, + const char* valid_term_in, const char* invalid_term_in) + : col_type(col_type_in), type(type_in), valid_term(valid_term_in), invalid_term(invalid_term_in) {} + ~TestParam() {} +}; + +std::ostream& operator<<(std::ostream& os, const TestParam& param) +{ + os << param.col_type.asString() << "_" << param.type.asString(); + return os; +} + +struct DirectPostingStoreTest : public ::testing::TestWithParam<TestParam> { + AttributeVector::SP attr; + bool has_weight; + const IDirectPostingStore* api; + + const IDirectPostingStore* extract_api() { + if (has_weight) { + return attr->as_docid_with_weight_posting_store(); + } else { + return attr->as_docid_posting_store(); + } + } + + DirectPostingStoreTest() + : attr(make_attribute(GetParam().type, GetParam().col_type, true)), + has_weight(GetParam().col_type != CollectionType::SINGLE), + api(extract_api()) + { + assert(api != nullptr); + add_docs(attr); + if (GetParam().type == BasicType::STRING) { + populate_string(attr); + } else { + populate_long(attr); + } + } + ~DirectPostingStoreTest() {} +}; + +void expect_docid_posting_store(BasicType type, CollectionType col_type, bool fast_search) { + EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_posting_store() != nullptr); +} + +void expect_not_docid_posting_store(BasicType type, CollectionType col_type, bool fast_search) { + EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_posting_store() == nullptr); +} + +void expect_docid_with_weight_posting_store(BasicType type, CollectionType col_type, bool fast_search) { + EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_with_weight_posting_store() != nullptr); +} + +void expect_not_docid_with_weight_posting_store(BasicType type, CollectionType col_type, bool fast_search) { + EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_with_weight_posting_store() == nullptr); +} + +TEST(DirectPostingStoreApiTest, attributes_support_IDocidPostingStore_interface) { + expect_docid_posting_store(BasicType::INT8, CollectionType::SINGLE, true); + expect_docid_posting_store(BasicType::INT16, CollectionType::SINGLE, true); + expect_docid_posting_store(BasicType::INT32, CollectionType::SINGLE, true); + expect_docid_posting_store(BasicType::INT64, CollectionType::SINGLE, true); + expect_docid_posting_store(BasicType::STRING, CollectionType::SINGLE, true); +} + +TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidPostingStore_interface) { + expect_not_docid_posting_store(BasicType::BOOL, CollectionType::SINGLE, true); + expect_not_docid_posting_store(BasicType::FLOAT, CollectionType::SINGLE, true); + expect_not_docid_posting_store(BasicType::DOUBLE, CollectionType::SINGLE, true); + expect_not_docid_posting_store(BasicType::INT64, CollectionType::SINGLE, false); + expect_not_docid_posting_store(BasicType::STRING, CollectionType::SINGLE, false); +} + +TEST(DirectPostingStoreApiTest, attributes_support_IDocidWithWeightPostingStore_interface) { + expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, true); + expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, true); +} + +TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostingStore_interface) { + expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::SINGLE, false); + expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, false); + expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, false); + expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::SINGLE, true); + expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true); + expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, false); + expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, false); + expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, false); + expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, true); + expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true); + expect_not_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true); + expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::WSET, true); +} + +void verify_valid_lookup(IDirectPostingStore::LookupResult result, bool has_weight) { + EXPECT_TRUE(result.posting_idx.valid()); + EXPECT_EQ(3u, result.posting_size); + EXPECT_EQ(has_weight ? 5 : 1, result.min_weight); + EXPECT_EQ(has_weight ? 20 : 1, result.max_weight); +} + +void verify_invalid_lookup(IDirectPostingStore::LookupResult result) { + EXPECT_FALSE(result.posting_idx.valid()); + EXPECT_EQ(0u, result.posting_size); + EXPECT_EQ(0, result.min_weight); + EXPECT_EQ(0, result.max_weight); +} + +INSTANTIATE_TEST_SUITE_P(DefaultInstantiation, + DirectPostingStoreTest, + testing::Values(TestParam(CollectionType::SINGLE, BasicType::INT64, "111", "222"), + TestParam(CollectionType::WSET, BasicType::INT64, "111", "222"), + TestParam(CollectionType::SINGLE, BasicType::STRING, "foo", "bar"), + TestParam(CollectionType::WSET, BasicType::STRING, "foo", "bar")), + testing::PrintToStringParamName()); + +TEST_P(DirectPostingStoreTest, lookup_works_correctly) { + verify_valid_lookup(api->lookup(GetParam().valid_term, api->get_dictionary_snapshot()), has_weight); + verify_invalid_lookup(api->lookup(GetParam().invalid_term, api->get_dictionary_snapshot())); +} + +template <typename DirectPostingStoreType, bool has_weight> +void verify_posting(const IDirectPostingStore& api, const vespalib::string& term) { + auto result = api.lookup(term, api.get_dictionary_snapshot()); + ASSERT_TRUE(result.posting_idx.valid()); + std::vector<typename DirectPostingStoreType::IteratorType> itr_store; + auto& real = dynamic_cast<const DirectPostingStoreType&>(api); + real.create(result.posting_idx, itr_store); + ASSERT_EQ(1u, itr_store.size()); + { + auto& itr = itr_store[0]; + if (itr.valid() && itr.getKey() < 1) { + itr.linearSeek(1); + } + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(1u, itr.getKey()); // docid + if constexpr (has_weight) { + EXPECT_EQ(20, itr.getData()); // weight + } + itr.linearSeek(2); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(5u, itr.getKey()); // docid + if constexpr (has_weight) { + EXPECT_EQ(5, itr.getData()); // weight + } + itr.linearSeek(6); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(7u, itr.getKey()); // docid + if constexpr (has_weight) { + EXPECT_EQ(10, itr.getData()); // weight + } + itr.linearSeek(8); + EXPECT_FALSE(itr.valid()); + } +} + +TEST_P(DirectPostingStoreTest, iterators_are_created_correctly) { + if (has_weight) { + verify_posting<IDocidWithWeightPostingStore, true>(*api, GetParam().valid_term); + } else { + verify_posting<IDocidPostingStore, false>(*api, GetParam().valid_term); + } +} + +TEST_P(DirectPostingStoreTest, collect_folded_works) +{ + if (GetParam().type == BasicType::STRING) { + auto* sa = static_cast<StringAttribute*>(attr.get()); + set_doc(sa, 2, "bar", 30); + attr->commit(); + set_doc(sa, 3, "FOO", 30); + attr->commit(); + auto snapshot = api->get_dictionary_snapshot(); + auto lookup = api->lookup(GetParam().valid_term, snapshot); + std::vector<vespalib::string> folded; + std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded,sa](vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(sa->getFromEnum(enum_idx.ref())); }; + api->collect_folded(lookup.enum_idx, snapshot, save_folded); + std::vector<vespalib::string> expected_folded{"FOO", "foo"}; + EXPECT_EQ(expected_folded, folded); + } else { + auto* ia = dynamic_cast<IntegerAttributeTemplate<int64_t>*>(attr.get()); + set_doc(ia, 2, int64_t(112), 30); + attr->commit(); + auto snapshot = api->get_dictionary_snapshot(); + auto lookup = api->lookup(GetParam().valid_term, snapshot); + std::vector<int64_t> folded; + std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded, ia]( + vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(ia->getFromEnum(enum_idx.ref())); }; + api->collect_folded(lookup.enum_idx, snapshot, save_folded); + std::vector<int64_t> expected_folded{int64_t(111)}; + EXPECT_EQ(expected_folded, folded); + } +} + +class Verifier : public search::test::SearchIteratorVerifier { +public: + Verifier(); + ~Verifier(); + SearchIterator::UP create(bool strict) const override { + (void) strict; + const auto* api = _attr->as_docid_with_weight_posting_store(); + assert(api != nullptr); + auto dict_entry = api->lookup("123", api->get_dictionary_snapshot()); + assert(dict_entry.posting_idx.valid()); + return std::make_unique<queryeval::DocidWithWeightSearchIterator>(_tfmd, *api, dict_entry); + } +private: + mutable fef::TermFieldMatchData _tfmd; + AttributeVector::SP _attr; +}; + +Verifier::Verifier() + : _attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)) +{ + add_docs(_attr, getDocIdLimit()); + auto docids = getExpectedDocIds(); + auto* int_attr = static_cast<IntegerAttribute*>(_attr.get()); + for (auto docid : docids) { + set_doc(int_attr, docid, int64_t(123), 1); + } +} +Verifier::~Verifier() {} + +TEST(VerifierTest, verify_document_weight_search_iterator) { + Verifier verifier; + verifier.verify(); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore deleted file mode 100644 index 08cae9a48df..00000000000 --- a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore +++ /dev/null @@ -1 +0,0 @@ -searchlib_document_weight_iterator_test_app diff --git a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt deleted file mode 100644 index 4cb480068e3..00000000000 --- a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_document_weight_iterator_test_app TEST - SOURCES - document_weight_iterator_test.cpp - DEPENDS - searchlib - searchlib_test -) -vespa_add_test(NAME searchlib_document_weight_iterator_test_app COMMAND searchlib_document_weight_iterator_test_app) diff --git a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp deleted file mode 100644 index 28416d09d6f..00000000000 --- a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp +++ /dev/null @@ -1,226 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/searchcommon/attribute/config.h> -#include <vespa/searchlib/attribute/attribute.h> -#include <vespa/searchlib/attribute/attribute_read_guard.h> -#include <vespa/searchlib/attribute/attributefactory.h> -#include <vespa/searchlib/attribute/attributeguard.h> -#include <vespa/searchlib/attribute/attributememorysavetarget.h> -#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h> -#include <vespa/searchlib/index/dummyfileheadercontext.h> -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> -#include <vespa/searchlib/test/searchiteratorverifier.h> -#include <vespa/searchlib/util/randomgenerator.h> -#include <vespa/vespalib/test/insertion_operators.h> -#include <vespa/vespalib/testkit/test_kit.h> - -#include <vespa/log/log.h> -LOG_SETUP("document_weight_iterator_test"); - -using namespace search; -using namespace search::attribute; - -AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) { - Config cfg(type, collection); - cfg.setFastSearch(fast_search); - return AttributeFactory::createAttribute("my_attribute", cfg); -} - -void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) { - AttributeVector::DocId docid; - for (size_t i = 0; i < limit; ++i) { - attr_ptr->addDoc(docid); - } - attr_ptr->commit(); - ASSERT_EQUAL((limit - 1), docid); -} - -template <typename ATTR, typename KEY> -void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) { - attr->clearDoc(docid); - attr->append(docid, key, weight); - attr->commit(); -} - -void populate_long(AttributeVector::SP attr_ptr) { - IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get()); - set_doc(attr, 1, int64_t(111), 20); - set_doc(attr, 5, int64_t(111), 5); - set_doc(attr, 7, int64_t(111), 10); -} - -void populate_string(AttributeVector::SP attr_ptr) { - StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get()); - set_doc(attr, 1, "foo", 20); - set_doc(attr, 5, "foo", 5); - set_doc(attr, 7, "foo", 10); -} - -struct LongFixture { - AttributeVector::SP attr; - const IDocidWithWeightPostingStore *api; - LongFixture() : attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)), - api(attr->as_docid_with_weight_posting_store()) - { - ASSERT_TRUE(api != nullptr); - add_docs(attr); - populate_long(attr); - } -}; - -struct StringFixture { - AttributeVector::SP attr; - const IDocidWithWeightPostingStore *api; - StringFixture() : attr(make_attribute(BasicType::STRING, CollectionType::WSET, true)), - api(attr->as_docid_with_weight_posting_store()) - { - ASSERT_TRUE(api != nullptr); - add_docs(attr); - populate_string(attr); - } -}; - -TEST("require that appropriate attributes support the document weight attribute interface") { - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, true)->as_docid_with_weight_posting_store() != nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, true)->as_docid_with_weight_posting_store() != nullptr); -} - -TEST("require that inappropriate attributes do not support the document weight attribute interface") { - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, true)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, true)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, true)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, true)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::INT32, CollectionType::WSET, true)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::DOUBLE, CollectionType::WSET, true)->as_docid_with_weight_posting_store() == nullptr); -} - -void verify_valid_lookup(IDirectPostingStore::LookupResult result) { - EXPECT_TRUE(result.posting_idx.valid()); - EXPECT_EQUAL(3u, result.posting_size); - EXPECT_EQUAL(5, result.min_weight); - EXPECT_EQUAL(20, result.max_weight); -} - -void verify_invalid_lookup(IDirectPostingStore::LookupResult result) { - EXPECT_FALSE(result.posting_idx.valid()); - EXPECT_EQUAL(0u, result.posting_size); - EXPECT_EQUAL(0, result.min_weight); - EXPECT_EQUAL(0, result.max_weight); -} - -TEST_F("require that integer lookup works correctly", LongFixture) { - verify_valid_lookup(f1.api->lookup("111", f1.api->get_dictionary_snapshot())); - verify_invalid_lookup(f1.api->lookup("222", f1.api->get_dictionary_snapshot())); -} - -TEST_F("require string lookup works correctly", StringFixture) { - verify_valid_lookup(f1.api->lookup("foo", f1.api->get_dictionary_snapshot())); - verify_invalid_lookup(f1.api->lookup("bar", f1.api->get_dictionary_snapshot())); -} - -void verify_posting(const IDocidWithWeightPostingStore &api, const char *term) { - auto result = api.lookup(term, api.get_dictionary_snapshot()); - ASSERT_TRUE(result.posting_idx.valid()); - std::vector<DocidWithWeightIterator> itr_store; - api.create(result.posting_idx, itr_store); - ASSERT_EQUAL(1u, itr_store.size()); - { - DocidWithWeightIterator &itr = itr_store[0]; - if (itr.valid() && itr.getKey() < 1) { - itr.linearSeek(1); - } - ASSERT_TRUE(itr.valid()); - EXPECT_EQUAL(1u, itr.getKey()); // docid - EXPECT_EQUAL(20, itr.getData()); // weight - itr.linearSeek(2); - ASSERT_TRUE(itr.valid()); - EXPECT_EQUAL(5u, itr.getKey()); // docid - EXPECT_EQUAL(5, itr.getData()); // weight - itr.linearSeek(6); - ASSERT_TRUE(itr.valid()); - EXPECT_EQUAL(7u, itr.getKey()); // docid - EXPECT_EQUAL(10, itr.getData()); // weight - itr.linearSeek(8); - EXPECT_FALSE(itr.valid()); - } -} - -TEST_F("require that integer iterators are created correctly", LongFixture) { - verify_posting(*f1.api, "111"); -} - -TEST_F("require that string iterators are created correctly", StringFixture) { - verify_posting(*f1.api, "foo"); -} - -TEST_F("require that collect_folded works for string", StringFixture) -{ - StringAttribute *attr = static_cast<StringAttribute *>(f1.attr.get()); - set_doc(attr, 2, "bar", 30); - attr->commit(); - set_doc(attr, 3, "FOO", 30); - attr->commit(); - auto dictionary_snapshot = f1.api->get_dictionary_snapshot(); - auto lookup1 = f1.api->lookup("foo", dictionary_snapshot); - std::vector<vespalib::string> folded; - std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded,attr](vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(attr->getFromEnum(enum_idx.ref())); }; - f1.api->collect_folded(lookup1.enum_idx, dictionary_snapshot, save_folded); - std::vector<vespalib::string> expected_folded{"FOO", "foo"}; - EXPECT_EQUAL(expected_folded, folded); -} - -TEST_F("require that collect_folded works for integers", LongFixture) -{ - IntegerAttributeTemplate<int64_t> *attr = dynamic_cast<IntegerAttributeTemplate<int64_t> *>(f1.attr.get()); - set_doc(attr, 2, int64_t(112), 30); - attr->commit(); - auto dictionary_snapshot = f1.api->get_dictionary_snapshot(); - auto lookup1 = f1.api->lookup("111", dictionary_snapshot); - std::vector<int64_t> folded; - std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded,attr](vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(attr->getFromEnum(enum_idx.ref())); }; - f1.api->collect_folded(lookup1.enum_idx, dictionary_snapshot, save_folded); - std::vector<int64_t> expected_folded{int64_t(111)}; - EXPECT_EQUAL(expected_folded, folded); -} - -class Verifier : public search::test::SearchIteratorVerifier { -public: - Verifier(); - ~Verifier(); - SearchIterator::UP create(bool strict) const override { - (void) strict; - const auto* api = _attr->as_docid_with_weight_posting_store(); - ASSERT_TRUE(api != nullptr); - auto dict_entry = api->lookup("123", api->get_dictionary_snapshot()); - ASSERT_TRUE(dict_entry.posting_idx.valid()); - return std::make_unique<queryeval::DocumentWeightSearchIterator>(_tfmd, *api, dict_entry); - } -private: - mutable fef::TermFieldMatchData _tfmd; - AttributeVector::SP _attr; -}; - -Verifier::Verifier() - : _attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)) -{ - add_docs(_attr, getDocIdLimit()); - auto docids = getExpectedDocIds(); - IntegerAttribute *int_attr = static_cast<IntegerAttribute *>(_attr.get()); - for (auto docid: docids) { - set_doc(int_attr, docid, int64_t(123), 1); - } -} -Verifier::~Verifier() {} - -TEST("verify document weight search iterator") { - Verifier verifier; - verifier.verify(); -} - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt deleted file mode 100644 index b2f86a9ddec..00000000000 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_document_weight_or_filter_search_test_app TEST - SOURCES - document_weight_or_filter_search_test.cpp - DEPENDS - searchlib - searchlib_test - GTest::GTest -) -vespa_add_test(NAME searchlib_document_weight_or_filter_search_test_app COMMAND searchlib_document_weight_or_filter_search_test_app) diff --git a/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt b/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt new file mode 100644 index 00000000000..4ec5d849ad3 --- /dev/null +++ b/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_multi_term_or_filter_search_test_app TEST + SOURCES + multi_term_or_filter_search_test.cpp + DEPENDS + searchlib + searchlib_test + GTest::GTest +) +vespa_add_test(NAME searchlib_multi_term_or_filter_search_test_app COMMAND searchlib_multi_term_or_filter_search_test_app) diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp index ae4812b5437..552a128c518 100644 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp +++ b/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp @@ -1,30 +1,34 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/gtest/gtest.h> #include <vespa/searchlib/attribute/i_direct_posting_store.h> -#include <vespa/searchlib/attribute/document_weight_or_filter_search.h> -#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/attribute/multi_term_or_filter_search.h> #include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/vespalib/gtest/gtest.h> #define ENABLE_GTEST_MIGRATION #include <vespa/searchlib/test/searchiteratorverifier.h> using PostingList = search::attribute::PostingListTraits<int32_t>::PostingStoreBase; using Iterator = search::attribute::PostingListTraits<int32_t>::const_iterator; using KeyData = PostingList::KeyDataType; + using search::BitVector; -using search::attribute::DocumentWeightOrFilterSearch; +using search::attribute::MultiTermOrFilterSearch; +using search::fef::TermFieldMatchData; using search::queryeval::SearchIterator; using vespalib::datastore::EntryRef; -class DocumentWeightOrFilterSearchTest : public ::testing::Test { +class MultiTermOrFilterSearchTest : public ::testing::Test { PostingList _postings; + mutable TermFieldMatchData _tfmd; vespalib::GenerationHandler _gens; std::vector<EntryRef> _trees; uint32_t _range_start; uint32_t _range_end; public: - DocumentWeightOrFilterSearchTest(); - ~DocumentWeightOrFilterSearchTest() override; + MultiTermOrFilterSearchTest(); + ~MultiTermOrFilterSearchTest() override; void inc_generation(); size_t num_trees() const { return _trees.size(); } Iterator get_tree(size_t idx) const { @@ -62,7 +66,7 @@ public: for (size_t i = 0; i < num_trees(); ++i) { iterators.emplace_back(get_tree(i)); } - auto result = DocumentWeightOrFilterSearch::create(std::move(iterators)); + auto result = MultiTermOrFilterSearch::create(std::move(iterators), _tfmd); result->initRange(_range_start, _range_end); return result; }; @@ -73,6 +77,8 @@ public: while (doc_id < _range_end) { if (iterator.seek(doc_id)) { result.emplace_back(doc_id); + iterator.unpack(doc_id); + EXPECT_EQ(doc_id, _tfmd.getDocId()); ++doc_id; } else { doc_id = std::max(doc_id + 1, iterator.getDocId()); @@ -121,7 +127,7 @@ public: } }; -DocumentWeightOrFilterSearchTest::DocumentWeightOrFilterSearchTest() +MultiTermOrFilterSearchTest::MultiTermOrFilterSearchTest() : _postings(true), _gens(), _range_start(1), @@ -129,7 +135,7 @@ DocumentWeightOrFilterSearchTest::DocumentWeightOrFilterSearchTest() { } -DocumentWeightOrFilterSearchTest::~DocumentWeightOrFilterSearchTest() +MultiTermOrFilterSearchTest::~MultiTermOrFilterSearchTest() { for (auto& tree : _trees) { _postings.clear(tree); @@ -140,7 +146,7 @@ DocumentWeightOrFilterSearchTest::~DocumentWeightOrFilterSearchTest() } void -DocumentWeightOrFilterSearchTest::inc_generation() +MultiTermOrFilterSearchTest::inc_generation() { _postings.freeze(); _postings.assign_generation(_gens.getCurrentGeneration()); @@ -148,19 +154,19 @@ DocumentWeightOrFilterSearchTest::inc_generation() _postings.reclaim_memory(_gens.get_oldest_used_generation()); } -TEST_F(DocumentWeightOrFilterSearchTest, daat_or) +TEST_F(MultiTermOrFilterSearchTest, daat_or) { make_sample_data(); expect_result(eval_daat(*make_iterator()), { 3, 10, 11, 14, 17, 20 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_get_hits) +TEST_F(MultiTermOrFilterSearchTest, taat_get_hits) { make_sample_data(); expect_result(frombv(*make_iterator()->get_hits(get_range_start())), { 3, 10, 11, 14, 17, 20 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into) +TEST_F(MultiTermOrFilterSearchTest, taat_or_hits_into) { make_sample_data(); auto bv = tobv({13, 14}); @@ -168,7 +174,7 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into) expect_result(frombv(*bv), { 3, 10, 11, 13, 14, 17, 20 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into) +TEST_F(MultiTermOrFilterSearchTest, taat_and_hits_into) { make_sample_data(); auto bv = tobv({13, 14}); @@ -176,21 +182,21 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into) expect_result(frombv(*bv), { 14 }); } -TEST_F(DocumentWeightOrFilterSearchTest, daat_or_ranged) +TEST_F(MultiTermOrFilterSearchTest, daat_or_ranged) { make_sample_data(); set_range(4, 15); expect_result(eval_daat(*make_iterator()), {10, 11, 14 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_get_hits_ranged) +TEST_F(MultiTermOrFilterSearchTest, taat_get_hits_ranged) { make_sample_data(); set_range(4, 15); expect_result(frombv(*make_iterator()->get_hits(get_range_start())), { 10, 11, 14 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into_ranged) +TEST_F(MultiTermOrFilterSearchTest, taat_or_hits_into_ranged) { make_sample_data(); set_range(4, 15); @@ -199,7 +205,7 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into_ranged) expect_result(frombv(*bv), { 10, 11, 13, 14 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into_ranged) +TEST_F(MultiTermOrFilterSearchTest, taat_and_hits_into_ranged) { make_sample_data(); set_range(4, 15); @@ -211,9 +217,9 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into_ranged) namespace { class Verifier : public search::test::SearchIteratorVerifier { - DocumentWeightOrFilterSearchTest &_test; + MultiTermOrFilterSearchTest &_test; public: - Verifier(DocumentWeightOrFilterSearchTest &test, int num_trees) + Verifier(MultiTermOrFilterSearchTest &test, int num_trees) : _test(test) { std::vector<std::vector<uint32_t>> trees(num_trees); @@ -239,7 +245,7 @@ public: }; -TEST_F(DocumentWeightOrFilterSearchTest, iterator_conformance) +TEST_F(MultiTermOrFilterSearchTest, iterator_conformance) { { Verifier verifier(*this, 1); diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp index 8831bd1ec75..ecc03ac54c5 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -488,11 +488,11 @@ TEST("require that direct attribute iterators work") { EXPECT_TRUE(result.has_minmax); EXPECT_EQUAL(100, result.min_weight); EXPECT_EQUAL(1000, result.max_weight); - EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") != vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") != vespalib::string::npos); } else { EXPECT_EQUAL(num_docs, result.est_hits); EXPECT_FALSE(result.has_minmax); - EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") == vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") == vespalib::string::npos); } ASSERT_EQUAL(3u, result.hits.size()); EXPECT_FALSE(result.est_empty); @@ -513,7 +513,7 @@ TEST("require that single weighted set turns filter on filter fields") { SimpleStringTerm node("foo", "", 0, Weight(1)); Result result = do_search(attribute_manager, node, strict); EXPECT_EQUAL(3u, result.est_hits); - EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") == vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") == vespalib::string::npos); EXPECT_TRUE(result.iterator_dump.find("FilterAttributePostingListIteratorT") != vespalib::string::npos); ASSERT_EQUAL(3u, result.hits.size()); EXPECT_FALSE(result.est_empty); diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp index 6e334fffa75..741a86b0beb 100644 --- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp @@ -468,7 +468,7 @@ template <typename V, typename T> ResultSetPtr SearchContextTest::performSearch(const V & vec, const T & term) { - return performSearch(search::queryeval::ExecuteInfo::TRUE, vec, term, TermType::WORD); + return performSearch(queryeval::ExecuteInfo::TRUE, vec, term, TermType::WORD); } template <typename V, typename T> @@ -503,7 +503,7 @@ void SearchContextTest::performSearch(const V & vec, const vespalib::string & term, const DocSet & expected, TermType termType) { - performSearch(search::queryeval::ExecuteInfo::TRUE, vec, term, expected, termType); + performSearch(queryeval::ExecuteInfo::TRUE, vec, term, expected, termType); } void @@ -1113,7 +1113,7 @@ SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::st { for (size_t num_threads : {1,3}) { vespalib::SimpleThreadBundle thread_bundle(num_threads); - auto executeInfo = search::queryeval::ExecuteInfo::create(true, 1.0, nullptr, thread_bundle, true, true); + auto executeInfo = queryeval::ExecuteInfo::create(true, 1.0, vespalib::Doom::never(), thread_bundle); performSearch(executeInfo, vec, term, expected, TermType::WORD); } } diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index 1beb2b1e501..1bfb9fb41f9 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -1,5 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/attribute/enumstore.h> #include <vespa/searchlib/attribute/singlestringattribute.h> #include <vespa/searchlib/attribute/singlestringpostattribute.h> @@ -8,7 +9,6 @@ #include <vespa/searchlib/attribute/enumstore.hpp> #include <vespa/searchlib/attribute/single_string_enum_search_context.h> -#include <vespa/searchlib/attribute/multistringpostattribute.hpp> #include <vespa/log/log.h> LOG_SETUP("stringattribute_test"); diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 81862b74eb2..b1b2235165f 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -7,7 +7,6 @@ #include <vespa/searchlib/tensor/dense_tensor_attribute.h> #include <vespa/searchlib/tensor/direct_tensor_attribute.h> #include <vespa/searchlib/tensor/doc_vector_access.h> -#include <vespa/searchlib/tensor/distance_functions.h> #include <vespa/searchlib/tensor/hnsw_index.h> #include <vespa/searchlib/tensor/mips_distance_transform.h> #include <vespa/searchlib/tensor/nearest_neighbor_index.h> @@ -25,7 +24,6 @@ #include <vespa/vespalib/testkit/test_kit.h> #include <vespa/vespalib/util/mmap_file_allocator_factory.h> #include <vespa/searchlib/util/bufferwriter.h> -#include <vespa/vespalib/util/fake_doom.h> #include <vespa/vespalib/util/threadstackexecutor.h> #include <vespa/document/base/exceptions.h> #include <vespa/eval/eval/fast_value.h> @@ -132,7 +130,7 @@ private: int _index_value; public: - MockIndexSaver(int index_value) : _index_value(index_value) {} + explicit MockIndexSaver(int index_value) noexcept : _index_value(index_value) {} void save(search::BufferWriter& writer) const override { writer.write(&_index_value, sizeof(int)); writer.flush(); @@ -158,7 +156,7 @@ public: class MockPrepareResult : public PrepareResult { public: uint32_t docid; - MockPrepareResult(uint32_t docid_in) : docid(docid_in) {} + explicit MockPrepareResult(uint32_t docid_in) noexcept : docid(docid_in) {} }; class MockNearestNeighborIndex : public NearestNeighborIndex { @@ -177,7 +175,7 @@ private: int _index_value; public: - MockNearestNeighborIndex(const DocVectorAccess& vectors) + explicit MockNearestNeighborIndex(const DocVectorAccess& vectors) : _vectors(vectors), _adds(), _removes(), @@ -279,11 +277,11 @@ public: } vespalib::MemoryUsage update_stat(const CompactionStrategy&) override { ++_memory_usage_cnt; - return vespalib::MemoryUsage(); + return {}; } vespalib::MemoryUsage memory_usage() const override { ++_memory_usage_cnt; - return vespalib::MemoryUsage(); + return {}; } void populate_address_space_usage(AddressSpaceUsage&) const override {} void get_state(const vespalib::slime::Inserter&) const override {} @@ -293,7 +291,7 @@ public: if (_index_value != 0) { return std::make_unique<MockIndexSaver>(_index_value); } - return std::unique_ptr<NearestNeighborIndexSaver>(); + return {}; } std::unique_ptr<NearestNeighborIndexLoader> make_loader(FastOS_FileInterface& file, const vespalib::GenericHeader& header) override { (void) header; @@ -310,7 +308,7 @@ public: (void) explore_k; (void) doom; (void) distance_threshold; - return std::vector<Neighbor>(); + return {}; } std::vector<Neighbor> find_top_k_with_filter(uint32_t k, const search::tensor::BoundDistanceFunction &df, @@ -324,7 +322,7 @@ public: (void) filter; (void) doom; (void) distance_threshold; - return std::vector<Neighbor>(); + return {}; } search::tensor::DistanceFunctionFactory &distance_function_factory() const override { @@ -427,7 +425,7 @@ struct Fixture { FixtureTraits _traits; vespalib::string _mmap_allocator_base_dir; - Fixture(const vespalib::string &typeSpec, FixtureTraits traits = FixtureTraits()); + explicit Fixture(const vespalib::string &typeSpec, FixtureTraits traits = FixtureTraits()); ~Fixture(); @@ -589,7 +587,7 @@ struct Fixture { } TensorSpec expEmptyDenseTensor() const { - return TensorSpec(denseSpec); + return {denseSpec}; } vespalib::string expEmptyDenseTensorSpec() const { @@ -1296,12 +1294,10 @@ template <typename ParentT> class NearestNeighborBlueprintFixtureBase : public ParentT { private: std::unique_ptr<Value> _query_tensor; - vespalib::FakeDoom _no_doom; public: NearestNeighborBlueprintFixtureBase() - : _query_tensor(), - _no_doom() + : _query_tensor() { this->set_tensor(1, vec_2d(1, 1)); this->set_tensor(2, vec_2d(2, 2)); @@ -1329,7 +1325,7 @@ public: std::make_unique<DistanceCalculator>(this->as_dense_tensor(), create_query_tensor(vec_2d(17, 42))), 3, approximate, 5, 100100.25, - global_filter_lower_limit, 1.0, target_hits_max_adjustment_factor, _no_doom.get_doom()); + global_filter_lower_limit, 1.0, target_hits_max_adjustment_factor, vespalib::Doom::never()); EXPECT_EQUAL(11u, bp->getState().estimate().estHits); EXPECT_EQUAL(100100.25 * 100100.25, bp->get_distance_threshold()); return bp; |