diff options
Diffstat (limited to 'searchlib')
7 files changed, 516 insertions, 222 deletions
diff --git a/searchlib/src/tests/attribute/searchable/CMakeLists.txt b/searchlib/src/tests/attribute/searchable/CMakeLists.txt index 29a4c122bf7..c3af20cc673 100644 --- a/searchlib/src/tests/attribute/searchable/CMakeLists.txt +++ b/searchlib/src/tests/attribute/searchable/CMakeLists.txt @@ -19,6 +19,7 @@ vespa_add_executable(searchlib_attribute_blueprint_test_app TEST attributeblueprint_test.cpp DEPENDS searchlib + searchlib_test GTest::GTest ) vespa_add_test(NAME searchlib_attribute_blueprint_test_app COMMAND searchlib_attribute_blueprint_test_app) diff --git a/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp index 29dbf33d29c..b211261ef57 100644 --- a/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp @@ -1,10 +1,15 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/searchcommon/attribute/config.h> +#include <vespa/searchcommon/attribute/iattributecontext.h> +#include <vespa/searchlib/attribute/attribute.h> #include <vespa/searchlib/attribute/attribute_blueprint_factory.h> #include <vespa/searchlib/attribute/attribute_read_guard.h> #include <vespa/searchlib/attribute/attributecontext.h> #include <vespa/searchlib/attribute/attributefactory.h> -#include <vespa/searchlib/attribute/attribute.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/query/tree/location.h> #include <vespa/searchlib/query/tree/point.h> @@ -14,11 +19,7 @@ #include <vespa/searchlib/queryeval/leaf_blueprints.h> #include <vespa/searchlib/queryeval/nearest_neighbor_blueprint.h> #include <vespa/searchlib/tensor/dense_tensor_attribute.h> -#include <vespa/searchcommon/attribute/iattributecontext.h> -#include <vespa/searchcommon/attribute/config.h> -#include <vespa/eval/eval/tensor_spec.h> -#include <vespa/eval/eval/value.h> -#include <vespa/eval/eval/value_codec.h> +#include <vespa/searchlib/test/attribute_builder.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/log/log.h> @@ -28,14 +29,18 @@ using search::AttributeGuard; using search::AttributeVector; using search::IAttributeManager; using search::attribute::IAttributeContext; +using search::attribute::test::AttributeBuilder; using search::fef::MatchData; using search::fef::TermFieldMatchData; using search::query::Location; using search::query::Node; using search::query::Point; +using search::query::SimpleDotProduct; using search::query::SimpleLocationTerm; using search::query::SimplePrefixTerm; using search::query::SimpleStringTerm; +using search::query::SimpleWandTerm; +using search::query::SimpleWeightedSetTerm; using search::query::Weight; using search::queryeval::Blueprint; using search::queryeval::EmptyBlueprint; @@ -44,6 +49,7 @@ using search::queryeval::FieldSpec; using search::queryeval::FilterWrapper; using search::queryeval::NearestNeighborBlueprint; using search::queryeval::SearchIterator; +using search::queryeval::SimpleResult; using std::string; using std::vector; using vespalib::eval::TensorSpec; @@ -100,7 +106,7 @@ public: } }; -constexpr uint32_t DOCID_LIMIT = 3; +constexpr uint32_t DOCID_LIMIT = 4; bool do_search(const Node &node, IAttributeManager &attribute_manager, bool expect_attribute_search_context = true) @@ -112,7 +118,7 @@ do_search(const Node &node, IAttributeManager &attribute_manager, bool expect_at Blueprint::UP result = source.createBlueprint(requestContext, FieldSpec(field, 0, 0), node); assert(result.get()); EXPECT_TRUE(!result->getState().estimate().empty); - EXPECT_EQ(3u, result->getState().estimate().estHits); + EXPECT_EQ(DOCID_LIMIT, result->getState().estimate().estHits); if (expect_attribute_search_context) { EXPECT_TRUE(result->get_attribute_search_context() != nullptr); } else { @@ -122,9 +128,10 @@ do_search(const Node &node, IAttributeManager &attribute_manager, bool expect_at result->setDocIdLimit(DOCID_LIMIT); SearchIterator::UP iterator = result->createSearch(*md, true); assert((bool)iterator); - iterator->initRange(1, 3); + iterator->initRange(1, DOCID_LIMIT); EXPECT_TRUE(!iterator->seek(1)); - return iterator->seek(2); + EXPECT_TRUE(!iterator->seek(2)); + return iterator->seek(3); } bool @@ -144,73 +151,33 @@ downcast(ParentType& parent) return *result; } -struct StringAttributeFiller { - using ValueType = vespalib::string; - static void add(AttributeVector& attr, const vespalib::string& value) { - auto& real = downcast<StringAttribute>(attr); - real.update(attr.getNumDocs() - 1, value); - real.commit(); - } -}; - -struct WsetStringAttributeFiller { - using ValueType = vespalib::string; - static void add(AttributeVector& attr, const vespalib::string& value) { - auto& real = downcast<StringAttribute>(attr); - uint32_t docid = attr.getNumDocs() - 1; - real.append(docid, value, 1); - real.commit(); - } -}; - -struct IntegerAttributeFiller { - using ValueType = int64_t; - static void add(AttributeVector& attr, int64_t value) { - auto& real = downcast<IntegerAttribute>(attr); - real.update(attr.getNumDocs() - 1, value); - real.commit(); - } -}; - -template <typename FillerType> -void -fill(AttributeVector& attr, typename FillerType::ValueType value) +AttributeVector::SP +make_string_attribute(const std::vector<vespalib::string>& values) { - AttributeVector::DocId docid; - attr.addDoc(docid); - attr.addDoc(docid); - attr.addDoc(docid); - assert(DOCID_LIMIT-1 == docid); - FillerType::add(attr, value); + Config cfg(BasicType::STRING, CollectionType::SINGLE); + return AttributeBuilder(field, cfg).fill(values).get(); } AttributeVector::SP make_string_attribute(const std::string& value) { - Config cfg(BasicType::STRING, CollectionType::SINGLE); - auto attr = AttributeFactory::createAttribute(field, cfg); - fill<StringAttributeFiller>(*attr, value); - return attr; + return make_string_attribute({"", "", value}); } AttributeVector::SP -make_wset_string_attribute(const std::string& value) +make_wset_string_attribute(const std::vector<std::vector<vespalib::string>>& values) { Config cfg(BasicType::STRING, CollectionType::WSET); // fast-search is needed to trigger use of DirectAttributeBlueprint. cfg.setFastSearch(true); - auto attr = AttributeFactory::createAttribute(field, cfg); - fill<WsetStringAttributeFiller>(*attr, value); - return attr; + return AttributeBuilder(field, cfg).fill_array(values).get(); } AttributeVector::SP make_int_attribute(int64_t value) { Config cfg(BasicType::INT32, CollectionType::SINGLE); - auto attr = AttributeFactory::createAttribute(field, cfg); - fill<IntegerAttributeFiller>(*attr, value); - return attr; + return AttributeBuilder(field, cfg).fill({-1, -1, value}).get(); } AttributeVector::SP @@ -218,9 +185,7 @@ make_fast_search_long_attribute(int64_t value) { Config cfg(BasicType::fromType(int64_t()), CollectionType::SINGLE); cfg.setFastSearch(true); - auto attr = AttributeFactory::createAttribute(field, cfg); - fill<IntegerAttributeFiller>(*attr, value); - return attr; + return AttributeBuilder(field, cfg).fill({-1, -1, value}).get(); } MyAttributeManager @@ -322,17 +287,21 @@ make_int_attribute(const vespalib::string& name) return AttributeFactory::createAttribute(name, cfg); } +using BFC = Blueprint::FilterConstraint; + class BlueprintFactoryFixture { public: + AttributeVector::SP attr; MyAttributeManager mgr; vespalib::string attr_name; AttributeContext attr_ctx; FakeRequestContext request_ctx; AttributeBlueprintFactory source; - BlueprintFactoryFixture(AttributeVector::SP attr) - : mgr(attr), - attr_name(attr->getName()), + BlueprintFactoryFixture(AttributeVector::SP attr_in) + : attr(attr_in), + mgr(attr_in), + attr_name(attr_in->getName()), attr_ctx(mgr), request_ctx(&attr_ctx), source() @@ -345,12 +314,30 @@ public: result->setDocIdLimit(DOCID_LIMIT); return result; } + void expect_document_weight_attribute() { + EXPECT_TRUE(attr->asDocumentWeightAttribute() != nullptr); + } + void expect_filter_search(const SimpleResult& upper_and_lower, const Node& term) { + expect_filter_search(upper_and_lower, upper_and_lower, term); + } + void expect_filter_search(const SimpleResult& upper, const SimpleResult& lower, const Node& term) { + auto blueprint = create_blueprint(term); + auto upper_itr = blueprint->createFilterSearch(true, BFC::UPPER_BOUND); + auto lower_itr = blueprint->createFilterSearch(true, BFC::LOWER_BOUND); + EXPECT_EQ(upper, SimpleResult().search(*upper_itr, DOCID_LIMIT)); + EXPECT_EQ(lower, SimpleResult().search(*lower_itr, DOCID_LIMIT)); + } + void expect_filter_wrapper(const Node& term) { + auto blueprint = create_blueprint(term); + auto itr = blueprint->createFilterSearch(true, BFC::UPPER_BOUND); + downcast<FilterWrapper>(*itr); + } }; class NearestNeighborFixture : public BlueprintFactoryFixture { public: - NearestNeighborFixture(AttributeVector::SP attr) - : BlueprintFactoryFixture(std::move(attr)) + NearestNeighborFixture(AttributeVector::SP attr_in) + : BlueprintFactoryFixture(std::move(attr_in)) { } ~NearestNeighborFixture() {} @@ -422,30 +409,60 @@ TEST(AttributeBlueprintTest, empty_blueprint_is_created_when_nearest_neighbor_te expect_empty_blueprint(make_tensor_attribute(field, "tensor(x[2])"), dense_x_3); // tensor types are not same size } -TEST(AttributeBlueprintTest, attribute_field_blueprint_wraps_filter_search_iterator) +TEST(AttributeBlueprintTest, attribute_field_blueprint_creates_exact_filter_search) { - BlueprintFactoryFixture f(make_string_attribute("foo")); + BlueprintFactoryFixture f(make_string_attribute({"foo", "x", "foo"})); SimpleStringTerm term("foo", field, 0, Weight(0)); - auto blueprint = f.create_blueprint(term); - - auto itr = blueprint->createFilterSearch(true, Blueprint::FilterConstraint::UPPER_BOUND); - auto& wrapper = downcast<FilterWrapper>(*itr); - wrapper.initRange(1, 3); - EXPECT_FALSE(wrapper.seek(1)); - EXPECT_TRUE(wrapper.seek(2)); + f.expect_filter_search(SimpleResult({1, 3}), term); + f.expect_filter_wrapper(term); } -TEST(AttributeBlueprintTest, direct_attribute_blueprint_wraps_filter_search_iterator) +TEST(AttributeBlueprintTest, direct_attribute_blueprint_creates_exact_filter_search) { - BlueprintFactoryFixture f(make_wset_string_attribute("foo")); + BlueprintFactoryFixture f(make_wset_string_attribute({{"foo"}, {}, {"foo"}})); + f.expect_document_weight_attribute(); SimpleStringTerm term("foo", field, 0, Weight(0)); - auto blueprint = f.create_blueprint(term); + f.expect_filter_search(SimpleResult({1, 3}), term); + f.expect_filter_wrapper(term); +} + +TEST(AttributeBlueprintTest, direct_wand_blueprint_creates_or_like_filter_search) +{ + BlueprintFactoryFixture f(make_wset_string_attribute({{"foo"}, {"x"}, {"bar"}})); + f.expect_document_weight_attribute(); + SimpleWandTerm term(2, field, 0, Weight(0), DOCID_LIMIT, 1000, 1.0); + term.addTerm("foo", Weight(10)); + term.addTerm("bar", Weight(20)); + f.expect_filter_search(SimpleResult({1, 3}), SimpleResult(), term); +} - auto itr = blueprint->createFilterSearch(true, Blueprint::FilterConstraint::UPPER_BOUND); - auto& wrapper = downcast<FilterWrapper>(*itr); - wrapper.initRange(1, 3); - EXPECT_FALSE(wrapper.seek(1)); - EXPECT_TRUE(wrapper.seek(2)); +TEST(AttributeBlueprintTest, direct_weighted_set_blueprint_creates_or_like_filter_search) +{ + BlueprintFactoryFixture f(make_wset_string_attribute({{"foo"}, {"x"}, {"bar"}})); + f.expect_document_weight_attribute(); + { + SimpleWeightedSetTerm term(2, field, 0, Weight(0)); + term.addTerm("foo", Weight(10)); + term.addTerm("bar", Weight(20)); + f.expect_filter_search(SimpleResult({1, 3}), term); + } + { + SimpleDotProduct term(2, field, 0, Weight(0)); + term.addTerm("foo", Weight(10)); + term.addTerm("bar", Weight(20)); + f.expect_filter_search(SimpleResult({1, 3}), term); + } +} + +TEST(AttributeBlueprintTest, attribute_weighted_set_blueprint_creates_or_like_filter_search) +{ + BlueprintFactoryFixture f(make_string_attribute({"foo", "x", "bar"})); + { + SimpleWeightedSetTerm term(2, field, 0, Weight(0)); + term.addTerm("foo", Weight(10)); + term.addTerm("bar", Weight(20)); + f.expect_filter_search(SimpleResult({1, 3}), term); + } } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt b/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt index 9a19c1bf1d5..7dff06507f4 100644 --- a/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt +++ b/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt @@ -3,7 +3,6 @@ vespa_add_executable(searchlib_diskindex_test_app TEST SOURCES diskindex_test.cpp DEPENDS - searchlib searchlib_test ) vespa_add_test(NAME searchlib_diskindex_test_app COMMAND searchlib_diskindex_test_app) diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp index 2e8617b4dcb..d153481ef36 100644 --- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -1,9 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/searchlib/diskindex/disktermblueprint.h> #include <vespa/searchlib/test/diskindex/testdiskindex.h> +#define ENABLE_GTEST_MIGRATION #include <vespa/searchlib/test/searchiteratorverifier.h> #include <vespa/searchlib/test/fakedata/fakeword.h> #include <vespa/searchlib/diskindex/zcposocciterators.h> @@ -12,38 +12,43 @@ #include <vespa/searchlib/queryeval/leaf_blueprints.h> #include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/simpleresult.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/searchlib/test/fakedata/fpfactory.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/stllike/asciistream.h> #include <filesystem> -#include <iostream> #include <set> +using search::BitVector; using search::BitVectorIterator; -using namespace search::fef; -using namespace search::index; -using namespace search::query; -using namespace search::queryeval; -using namespace search::queryeval::blueprint; +using search::diskindex::DiskIndex; +using search::diskindex::DiskTermBlueprint; +using search::diskindex::TestDiskIndex; +using search::diskindex::ZcRareWordPosOccIterator; +using search::fef::TermFieldMatchDataArray; +using search::index::DummyFileHeaderContext; +using search::index::PostingListHandle; +using search::index::Schema; +using search::query::SimpleStringTerm; +using search::queryeval::Blueprint; +using search::queryeval::BooleanMatchIteratorWrapper; +using search::queryeval::EmptyBlueprint; +using search::queryeval::EmptySearch; +using search::queryeval::ExecuteInfo; +using search::queryeval::FakeRequestContext; +using search::queryeval::FieldSpec; +using search::queryeval::LeafBlueprint; +using search::queryeval::SearchIterator; +using search::queryeval::SimpleResult; using search::test::SearchIteratorVerifier; -using namespace search::fakedata; +using search::fakedata::FPFactory; +using search::fakedata::FakePosting; +using search::fakedata::FakeWord; +using search::fakedata::getFPFactory; +using LookupResult = DiskIndex::LookupResult; -namespace search { -namespace diskindex { - -typedef DiskIndex::LookupResult LookupResult; - -std::string -toString(SearchIterator & sb) -{ - std::ostringstream oss; - bool first = true; - for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { - if (!first) oss << ","; - oss << sb.getDocId(); - first = false; - } - return oss.str(); -} +namespace { SimpleStringTerm makeTerm(const std::string & term) @@ -51,23 +56,6 @@ makeTerm(const std::string & term) return SimpleStringTerm(term, "field", 0, search::query::Weight(0)); } -class Test : public vespalib::TestApp, public TestDiskIndex { -private: - FakeRequestContext _requestContext; - - void requireThatLookupIsWorking(bool fieldEmpty, bool docEmpty, bool wordEmpty); - void requireThatWeCanReadPostingList(); - void require_that_we_can_get_field_length_info(); - void requireThatWeCanReadBitVector(); - void requireThatBlueprintIsCreated(); - void requireThatBlueprintCanCreateSearchIterators(); - void requireThatSearchIteratorsConforms(); -public: - Test(); - ~Test(); - int Main() override; -}; - class Verifier : public SearchIteratorVerifier { public: Verifier(FakePosting::SP fp); @@ -96,14 +84,67 @@ Verifier::Verifier(FakePosting::SP fp) Verifier::~Verifier() = default; +struct EmptySettings +{ + bool _empty_field; + bool _empty_doc; + bool _empty_word; + EmptySettings() + : _empty_field(false), + _empty_doc(false), + _empty_word(false) + { + } + EmptySettings empty_field() && { _empty_field = true; return *this; } + EmptySettings empty_doc() && { _empty_doc = true; return *this; } + EmptySettings empty_word() && { _empty_word = true; return *this; } +}; + +struct IOSettings +{ + bool _use_directio; + bool _use_mmap; + IOSettings() + : _use_directio(false), + _use_mmap(false) + { + } + IOSettings use_directio() && { _use_directio = true; return *this; } + IOSettings use_mmap() && { _use_mmap = true; return *this; } +}; + +class DiskIndexTest : public ::testing::Test, public TestDiskIndex { +private: + FakeRequestContext _requestContext; + +protected: + void requireThatLookupIsWorking(const EmptySettings& empty_settings); + void requireThatWeCanReadPostingList(); + void require_that_we_can_get_field_length_info(); + void requireThatWeCanReadBitVector(); + void requireThatBlueprintIsCreated(); + void requireThatBlueprintCanCreateSearchIterators(); + void requireThatSearchIteratorsConforms(); + void build_index(const IOSettings& io_settings, const EmptySettings& empty_settings); + void test_empty_settings(const EmptySettings& empty_settings); + void test_io_settings(const IOSettings& io_settings); +public: + DiskIndexTest(); + ~DiskIndexTest(); +}; + +DiskIndexTest::DiskIndexTest() = default; + +DiskIndexTest::~DiskIndexTest() = default; + void -Test::requireThatSearchIteratorsConforms() +DiskIndexTest::requireThatSearchIteratorsConforms() { FakePosting::SP tmp; Verifier verTmp(tmp); Schema schema; schema.addIndexField(Schema::IndexField("a", Schema::DataType::STRING)); - bitcompression::PosOccFieldsParams params; + search::bitcompression::PosOccFieldsParams params; params.setSchemaParams(schema, 0); search::fakedata::FakeWord fw(verTmp.getDocIdLimit(), verTmp.getExpectedDocIds(), "a", params, 0); TermFieldMatchData md; @@ -118,22 +159,23 @@ Test::requireThatSearchIteratorsConforms() "EGCompr64FilterOcc", "EGCompr64LEFilterOcc", "EGCompr64NoSkipFilterOcc", "EGCompr64SkipFilterOcc" }; for (auto postingType : search::fakedata::getPostingTypes()) { + SCOPED_TRACE(postingType); if (ignored.find(postingType) == ignored.end()) { - std::cerr << "Verifying " << postingType << std::endl; std::unique_ptr<FPFactory> ff(getFPFactory(postingType, schema)); ff->setup(v); FakePosting::SP f(ff->make(fw)); Verifier verifier(f); - TEST_DO(verifier.verify()); + verifier.verify(); } } } void -Test::requireThatLookupIsWorking(bool fieldEmpty, - bool docEmpty, - bool wordEmpty) +DiskIndexTest::requireThatLookupIsWorking(const EmptySettings& empty_settings) { + auto fieldEmpty = empty_settings._empty_field; + auto docEmpty = empty_settings._empty_doc; + auto wordEmpty = empty_settings._empty_word; uint32_t f1(_schema.getIndexFieldId("f1")); uint32_t f2(_schema.getIndexFieldId("f2")); uint32_t f3(_schema.getIndexFieldId("f3")); @@ -149,8 +191,8 @@ Test::requireThatLookupIsWorking(bool fieldEmpty, if (wordEmpty || fieldEmpty || docEmpty) { EXPECT_TRUE(!r || r->counts._numDocs == 0); } else { - EXPECT_EQUAL(1u, r->wordNum); - EXPECT_EQUAL(2u, r->counts._numDocs); + EXPECT_EQ(1u, r->wordNum); + EXPECT_EQ(2u, r->counts._numDocs); } r = _index->lookup(f1, "w2"); EXPECT_TRUE(!r || r->counts._numDocs == 0); @@ -160,15 +202,15 @@ Test::requireThatLookupIsWorking(bool fieldEmpty, if (wordEmpty || fieldEmpty || docEmpty) { EXPECT_TRUE(!r || r->counts._numDocs == 0); } else { - EXPECT_EQUAL(1u, r->wordNum); - EXPECT_EQUAL(3u, r->counts._numDocs); + EXPECT_EQ(1u, r->wordNum); + EXPECT_EQ(3u, r->counts._numDocs); } r = _index->lookup(f2, "w2"); if (wordEmpty || fieldEmpty || docEmpty) { EXPECT_TRUE(!r || r->counts._numDocs == 0); } else { - EXPECT_EQUAL(2u, r->wordNum); - EXPECT_EQUAL(17u, r->counts._numDocs); + EXPECT_EQ(2u, r->wordNum); + EXPECT_EQ(17u, r->counts._numDocs); } } { // field 'f3' doesn't exist @@ -180,35 +222,34 @@ Test::requireThatLookupIsWorking(bool fieldEmpty, } void -Test::requireThatWeCanReadPostingList() +DiskIndexTest::requireThatWeCanReadPostingList() { TermFieldMatchDataArray mda; { // field 'f1' LookupResult::UP r = _index->lookup(0, "w1"); PostingListHandle::UP h = _index->readPostingList(*r); SearchIterator * sb = h->createIterator(r->counts, mda); - sb->initFullRange(); - EXPECT_EQUAL("1,3", toString(*sb)); + EXPECT_EQ(SimpleResult({1,3}), SimpleResult().search(*sb)); delete sb; } } void -Test::require_that_we_can_get_field_length_info() +DiskIndexTest::require_that_we_can_get_field_length_info() { auto info = _index->get_field_length_info("f1"); - EXPECT_EQUAL(3.5, info.get_average_field_length()); - EXPECT_EQUAL(21u, info.get_num_samples()); + EXPECT_EQ(3.5, info.get_average_field_length()); + EXPECT_EQ(21u, info.get_num_samples()); info = _index->get_field_length_info("f2"); - EXPECT_EQUAL(4.0, info.get_average_field_length()); - EXPECT_EQUAL(23u, info.get_num_samples()); + EXPECT_EQ(4.0, info.get_average_field_length()); + EXPECT_EQ(23u, info.get_num_samples()); info = _index->get_field_length_info("f3"); - EXPECT_EQUAL(0.0, info.get_average_field_length()); - EXPECT_EQUAL(0u, info.get_num_samples()); + EXPECT_EQ(0.0, info.get_average_field_length()); + EXPECT_EQ(0u, info.get_num_samples()); } void -Test::requireThatWeCanReadBitVector() +DiskIndexTest::requireThatWeCanReadBitVector() { { // word 'w1' LookupResult::UP r = _index->lookup(1, "w1"); @@ -229,7 +270,7 @@ Test::requireThatWeCanReadBitVector() } void -Test::requireThatBlueprintIsCreated() +DiskIndexTest::requireThatBlueprintIsCreated() { { // unknown field Blueprint::UP b = @@ -245,7 +286,7 @@ Test::requireThatBlueprintIsCreated() Blueprint::UP b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1")); EXPECT_TRUE(dynamic_cast<DiskTermBlueprint *>(b.get()) != NULL); - EXPECT_EQUAL(2u, b->getState().estimate().estHits); + EXPECT_EQ(2u, b->getState().estimate().estHits); EXPECT_TRUE(!b->getState().estimate().empty); } { // known field & word without hits @@ -254,121 +295,190 @@ Test::requireThatBlueprintIsCreated() // std::cerr << "BP = " << typeid(*b).name() << std::endl; EXPECT_TRUE((dynamic_cast<DiskTermBlueprint *>(b.get()) != NULL) || (dynamic_cast<EmptyBlueprint *>(b.get()) != NULL)); - EXPECT_EQUAL(0u, b->getState().estimate().estHits); + EXPECT_EQ(0u, b->getState().estimate().estHits); EXPECT_TRUE(b->getState().estimate().empty); } } void -Test::requireThatBlueprintCanCreateSearchIterators() +DiskIndexTest::requireThatBlueprintCanCreateSearchIterators() { TermFieldMatchData md; TermFieldMatchDataArray mda; mda.add(&md); Blueprint::UP b; SearchIterator::UP s; + SimpleResult result_f1_w1({1,3}); + SimpleResult result_f1_w2; + SimpleResult result_f2_w2({1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}); + auto upper_bound = Blueprint::FilterConstraint::UPPER_BOUND; { // bit vector due to isFilter b = _index->createBlueprint(_requestContext, FieldSpec("f2", 0, 0, true), makeTerm("w2")); - b->fetchPostings(queryeval::ExecuteInfo::TRUE); - s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true); + b->fetchPostings(search::queryeval::ExecuteInfo::TRUE); + auto& leaf_b = dynamic_cast<LeafBlueprint&>(*b); + s = leaf_b.createLeafSearch(mda, true); EXPECT_TRUE(dynamic_cast<BitVectorIterator *>(s.get()) != NULL); + EXPECT_EQ(result_f2_w2, SimpleResult().search(*s)); + EXPECT_EQ(result_f2_w2, SimpleResult().search(*leaf_b.createFilterSearch(true, upper_bound))); } { // bit vector due to no ranking needed b = _index->createBlueprint(_requestContext, FieldSpec("f2", 0, 0, false), makeTerm("w2")); - b->fetchPostings(queryeval::ExecuteInfo::TRUE); - s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true); + b->fetchPostings(ExecuteInfo::TRUE); + auto& leaf_b = dynamic_cast<LeafBlueprint&>(*b); + s = leaf_b.createLeafSearch(mda, true); EXPECT_FALSE(dynamic_cast<BitVectorIterator *>(s.get()) != NULL); TermFieldMatchData md2; md2.tagAsNotNeeded(); TermFieldMatchDataArray mda2; mda2.add(&md2); EXPECT_TRUE(mda2[0]->isNotNeeded()); - s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda2, false); + s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda2, true); EXPECT_TRUE(dynamic_cast<BitVectorIterator *>(s.get()) != NULL); + EXPECT_EQ(result_f2_w2, SimpleResult().search(*s)); + EXPECT_EQ(result_f2_w2, SimpleResult().search(*leaf_b.createFilterSearch(true, upper_bound))); } { // fake bit vector b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0, true), makeTerm("w2")); // std::cerr << "BP = " << typeid(*b).name() << std::endl; - b->fetchPostings(queryeval::ExecuteInfo::TRUE); - s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true); + b->fetchPostings(ExecuteInfo::TRUE); + auto& leaf_b = dynamic_cast<LeafBlueprint&>(*b); + s = leaf_b.createLeafSearch(mda, true); // std::cerr << "SI = " << typeid(*s).name() << std::endl; EXPECT_TRUE((dynamic_cast<BooleanMatchIteratorWrapper *>(s.get()) != NULL) || dynamic_cast<EmptySearch *>(s.get())); + EXPECT_EQ(result_f1_w2, SimpleResult().search(*s)); + EXPECT_EQ(result_f1_w2, SimpleResult().search(*leaf_b.createFilterSearch(true, upper_bound))); } { // posting list iterator b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1")); - b->fetchPostings(queryeval::ExecuteInfo::TRUE); - s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true); + b->fetchPostings(ExecuteInfo::TRUE); + auto& leaf_b = dynamic_cast<LeafBlueprint&>(*b); + s = leaf_b.createLeafSearch(mda, true); ASSERT_TRUE((dynamic_cast<ZcRareWordPosOccIterator<true, false> *>(s.get()) != NULL)); + EXPECT_EQ(result_f1_w1, SimpleResult().search(*s)); + EXPECT_EQ(result_f1_w1, SimpleResult().search(*leaf_b.createFilterSearch(true, upper_bound))); + } +} + +void +DiskIndexTest::build_index(const IOSettings& io_settings, const EmptySettings& empty_settings) +{ + vespalib::asciistream name; + int io_settings_num = 1; + if (io_settings._use_directio) { + io_settings_num += 1; + } + if (io_settings._use_mmap) { + io_settings_num += 2; } + name << "index/" << io_settings_num; + if (empty_settings._empty_field) { + name << "fe"; + } else { + buildSchema(); + } + if (empty_settings._empty_doc) { + name << "de"; + } + if (empty_settings._empty_word) { + name << "we"; + } + openIndex(name.str(), io_settings._use_directio, io_settings._use_mmap, empty_settings._empty_field, empty_settings._empty_doc, empty_settings._empty_word); } -Test::Test() = default; +void +DiskIndexTest::test_empty_settings(const EmptySettings& empty_settings) +{ + build_index(IOSettings(), empty_settings); + requireThatLookupIsWorking(empty_settings); +} -Test::~Test() = default; +void +DiskIndexTest::test_io_settings(const IOSettings& io_settings) +{ + EmptySettings empty_settings; + build_index(io_settings, empty_settings); + requireThatLookupIsWorking(empty_settings); + requireThatWeCanReadPostingList(); + require_that_we_can_get_field_length_info(); + requireThatWeCanReadBitVector(); + requireThatBlueprintIsCreated(); + requireThatBlueprintCanCreateSearchIterators(); +} -int -Test::Main() +TEST_F(DiskIndexTest, empty_settings_empty_field_empty_doc_empty_word) { - TEST_INIT("diskindex_test"); + test_empty_settings(EmptySettings().empty_field().empty_doc().empty_word()); +} - if (_argc > 0) { - DummyFileHeaderContext::setCreator(_argv[0]); - } +TEST_F(DiskIndexTest, empty_settings_empty_field_empty_doc) +{ + test_empty_settings(EmptySettings().empty_field().empty_doc()); +} - std::filesystem::create_directory(std::filesystem::path("index")); - TEST_DO(openIndex("index/1fedewe", false, false, true, true, true)); - TEST_DO(requireThatLookupIsWorking(true, true, true)); - TEST_DO(openIndex("index/1fede", false, false, true, true, false)); - TEST_DO(requireThatLookupIsWorking(true, true, false)); - TEST_DO(openIndex("index/1fewe", false, false, true, false, true)); - TEST_DO(requireThatLookupIsWorking(true, false, true)); - TEST_DO(openIndex("index/1fe", false, false, true, false, false)); - TEST_DO(requireThatLookupIsWorking(true, false, false)); - buildSchema(); - TEST_DO(openIndex("index/1dewe", false, false, false, true, true)); - TEST_DO(requireThatLookupIsWorking(false, true, true)); - TEST_DO(openIndex("index/1de", false, false, false, true, false)); - TEST_DO(requireThatLookupIsWorking(false, true, false)); - TEST_DO(openIndex("index/1we", false, false, false, false, true)); - TEST_DO(requireThatLookupIsWorking(false, false, true)); - TEST_DO(openIndex("index/1", false, false, false, false, false)); - TEST_DO(requireThatLookupIsWorking(false, false, false)); - TEST_DO(requireThatWeCanReadPostingList()); - TEST_DO(require_that_we_can_get_field_length_info()); - TEST_DO(requireThatWeCanReadBitVector()); - TEST_DO(requireThatBlueprintIsCreated()); - TEST_DO(requireThatBlueprintCanCreateSearchIterators()); - - TEST_DO(openIndex("index/2", true, false, false, false, false)); - TEST_DO(requireThatLookupIsWorking(false, false, false)); - TEST_DO(requireThatWeCanReadPostingList()); - TEST_DO(require_that_we_can_get_field_length_info()); - TEST_DO(requireThatWeCanReadBitVector()); - TEST_DO(requireThatBlueprintIsCreated()); - TEST_DO(requireThatBlueprintCanCreateSearchIterators()); - - TEST_DO(openIndex("index/3", false, true, false, false, false)); - TEST_DO(requireThatLookupIsWorking(false, false, false)); - TEST_DO(requireThatWeCanReadPostingList()); - TEST_DO(require_that_we_can_get_field_length_info()); - TEST_DO(requireThatWeCanReadBitVector()); - TEST_DO(requireThatBlueprintIsCreated()); - TEST_DO(requireThatBlueprintCanCreateSearchIterators()); - - TEST_DO(openIndex("index/4", true, true, false, false, false)); - TEST_DO(requireThatLookupIsWorking(false, false, false)); - TEST_DO(requireThatWeCanReadPostingList()); - TEST_DO(require_that_we_can_get_field_length_info()); - TEST_DO(requireThatWeCanReadBitVector()); - TEST_DO(requireThatBlueprintIsCreated()); - TEST_DO(requireThatBlueprintCanCreateSearchIterators()); - TEST_DO(requireThatSearchIteratorsConforms()); - - TEST_DONE(); +TEST_F(DiskIndexTest, empty_settings_empty_field_empty_word) +{ + test_empty_settings(EmptySettings().empty_field().empty_word()); } +TEST_F(DiskIndexTest, empty_settings_empty_field) +{ + test_empty_settings(EmptySettings().empty_field()); } + +TEST_F(DiskIndexTest, empty_settings_empty_doc_empty_word) +{ + test_empty_settings(EmptySettings().empty_doc().empty_word()); } -TEST_APPHOOK(search::diskindex::Test); +TEST_F(DiskIndexTest, empty_settings_empty_doc) +{ + test_empty_settings(EmptySettings().empty_doc()); +} + +TEST_F(DiskIndexTest, empty_settings_empty_word) +{ + test_empty_settings(EmptySettings().empty_word()); +} + +TEST_F(DiskIndexTest, io_settings_normal) +{ + test_io_settings(IOSettings()); +} + +TEST_F(DiskIndexTest, io_settings_directio) +{ + test_io_settings(IOSettings().use_directio()); +} + +TEST_F(DiskIndexTest, io_settings_mmap) +{ + test_io_settings(IOSettings().use_mmap()); +} + +TEST_F(DiskIndexTest, io_settings_directio_mmap) +{ + test_io_settings(IOSettings().use_directio().use_mmap()); +} + +TEST_F(DiskIndexTest, search_iterators_conformance) +{ + requireThatSearchIteratorsConforms(); +} + +} + +int +main(int argc, char* argv[]) +{ + if (argc > 0) { + DummyFileHeaderContext::setCreator(argv[0]); + } + ::testing::InitGoogleTest(&argc, argv); + std::filesystem::path index_path("index"); + std::filesystem::remove_all(index_path); + std::filesystem::create_directory(index_path); + auto rval = RUN_ALL_TESTS(); + std::filesystem::remove_all(index_path); + return rval; +} diff --git a/searchlib/src/vespa/searchlib/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/CMakeLists.txt index b53b3097850..7decdb992e6 100644 --- a/searchlib/src/vespa/searchlib/test/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/test/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(searchlib_test SOURCES + attribute_builder.cpp document_weight_attribute_helper.cpp doc_builder.cpp imported_attribute_fixture.cpp diff --git a/searchlib/src/vespa/searchlib/test/attribute_builder.cpp b/searchlib/src/vespa/searchlib/test/attribute_builder.cpp new file mode 100644 index 00000000000..b804723d3be --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/attribute_builder.cpp @@ -0,0 +1,125 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "attribute_builder.h" +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/stringbase.h> +#include <cassert> + +namespace search::attribute::test { + +AttributeBuilder::AttributeBuilder(const vespalib::string& name, const Config& cfg) + : _attr_ptr(AttributeFactory::createAttribute(name, cfg)), + _attr(*_attr_ptr) +{ +} + +AttributeBuilder::AttributeBuilder(AttributeVector& attr) + : _attr_ptr(), + _attr(attr) +{ +} + +namespace { + +void +add_docs(AttributeVector& attr, size_t num_docs) +{ + attr.addReservedDoc(); + attr.addDocs(num_docs); +} + +template <typename AttrType, typename ValueType> +void +fill_helper(AttributeVector& attr, const std::vector<ValueType>& values) +{ + assert(attr.getConfig().collectionType() == CollectionType::SINGLE); + add_docs(attr, values.size()); + auto& real = dynamic_cast<AttrType&>(attr); + for (size_t i = 0; i < values.size(); ++i) { + uint32_t docid = (i + 1); + real.update(docid, values[i]); + } + attr.commit(true); +} + +template <typename AttrType, typename ValueType> +void +fill_array_helper(AttributeVector& attr, const std::vector<std::vector<ValueType>>& values) +{ + assert((attr.getConfig().collectionType() == CollectionType::ARRAY) || + (attr.getConfig().collectionType() == CollectionType::WSET)); + add_docs(attr, values.size()); + auto& real = dynamic_cast<AttrType&>(attr); + for (size_t i = 0; i < values.size(); ++i) { + uint32_t docid = (i + 1); + for (auto value : values[i]) { + real.append(docid, value, 1); + } + } + attr.commit(true); +} + +template <typename AttrType, typename ValueType> +void +fill_wset_helper(AttributeVector& attr, const std::vector<std::vector<std::pair<ValueType, int32_t>>>& values) +{ + assert(attr.getConfig().collectionType() == CollectionType::WSET); + add_docs(attr, values.size()); + auto& real = dynamic_cast<AttrType&>(attr); + for (size_t i = 0; i < values.size(); ++i) { + uint32_t docid = (i + 1); + for (auto value : values[i]) { + real.append(docid, value.first, value.second); + } + } + attr.commit(true); +} + +} + +AttributeBuilder& +AttributeBuilder::fill(const std::vector<int64_t>& values) +{ + fill_helper<IntegerAttribute, int64_t>(_attr, values); + return *this; +} + +AttributeBuilder& +AttributeBuilder::fill_array(const std::vector<std::vector<int64_t>>& values) +{ + fill_array_helper<IntegerAttribute, int64_t>(_attr, values); + return *this; +} + +AttributeBuilder& +AttributeBuilder::fill_wset(const std::vector<std::vector<std::pair<int64_t, int32_t>>>& values) +{ + fill_wset_helper<IntegerAttribute, int64_t>(_attr, values); + return *this; +} + +AttributeBuilder& +AttributeBuilder::fill(const std::vector<vespalib::string>& values) +{ + fill_helper<StringAttribute, vespalib::string>(_attr, values); + return *this; +} + +AttributeBuilder& +AttributeBuilder::fill_array(const std::vector<std::vector<vespalib::string>>& values) +{ + fill_array_helper<StringAttribute, vespalib::string>(_attr, values); + return *this; +} + +AttributeBuilder& +AttributeBuilder::fill_wset(const std::vector<std::vector<std::pair<vespalib::string, int32_t>>>& values) +{ + fill_wset_helper<StringAttribute, vespalib::string>(_attr, values); + return *this; +} + +} + diff --git a/searchlib/src/vespa/searchlib/test/attribute_builder.h b/searchlib/src/vespa/searchlib/test/attribute_builder.h new file mode 100644 index 00000000000..6d3099b5ea1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/attribute_builder.h @@ -0,0 +1,41 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchcommon/attribute/config.h> +#include <vespa/vespalib/stllike/string.h> +#include <memory> +#include <utility> +#include <vector> + +namespace search { class AttributeVector; } +namespace search::attribute { class Config; } + +namespace search::attribute::test { + +/** + * Helper class used to build and fill AttributeVector instances in unit tests. + */ +class AttributeBuilder { +private: + std::shared_ptr<AttributeVector> _attr_ptr; + AttributeVector& _attr; + +public: + AttributeBuilder(const vespalib::string& name, const Config& cfg); + AttributeBuilder(AttributeVector& attr); + + // Fill functions for integer attributes + AttributeBuilder& fill(const std::vector<int64_t>& values); + AttributeBuilder& fill_array(const std::vector<std::vector<int64_t>>& values); + AttributeBuilder& fill_wset(const std::vector<std::vector<std::pair<int64_t, int32_t>>>& values); + + // Fill functions for string attributes + AttributeBuilder& fill(const std::vector<vespalib::string>& values); + AttributeBuilder& fill_array(const std::vector<std::vector<vespalib::string>>& values); + AttributeBuilder& fill_wset(const std::vector<std::vector<std::pair<vespalib::string, int32_t>>>& values); + + std::shared_ptr<AttributeVector> get() const { return _attr_ptr; } +}; + +} |