diff options
16 files changed, 732 insertions, 234 deletions
diff --git a/searchcommon/src/vespa/searchcommon/attribute/search_context_params.h b/searchcommon/src/vespa/searchcommon/attribute/search_context_params.h index d15a97629f0..96c3d7f3470 100644 --- a/searchcommon/src/vespa/searchcommon/attribute/search_context_params.h +++ b/searchcommon/src/vespa/searchcommon/attribute/search_context_params.h @@ -2,6 +2,8 @@ #pragma once +#include <stddef.h> + namespace search { namespace attribute { diff --git a/searchcommon/src/vespa/searchcommon/common/range.h b/searchcommon/src/vespa/searchcommon/common/range.h index f33630daaf4..5bcf2355eb9 100644 --- a/searchcommon/src/vespa/searchcommon/common/range.h +++ b/searchcommon/src/vespa/searchcommon/common/range.h @@ -4,6 +4,9 @@ #pragma once +#include <limits> +#include <stdint.h> + namespace search { diff --git a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp index 339ceab83c8..84c34714526 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp +++ b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp @@ -39,6 +39,9 @@ FusionRunner::FusionRunner(const string &base_dir, _fileHeaderContext(fileHeaderContext) { } +FusionRunner::~FusionRunner() { +} + namespace { void readSelectorArray(const string &selector_name, SelectorArray &selector_array, diff --git a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.h b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.h index 097b76bc4cc..ea197378fac 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.h +++ b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.h @@ -44,6 +44,7 @@ public: const search::index::Schema &schema, const search::TuneFileAttributes &tuneFileAttributes, const search::common::FileHeaderContext &fileHeaderContext); + ~FusionRunner(); /** * Combine the indexes specified by the ids by running fusion. diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index ad62331ac2b..290ac63939b 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -92,6 +92,7 @@ vespa_define_module( src/tests/attribute/stringattribute src/tests/attribute/tensorattribute src/tests/attribute/imported_attribute_vector + src/tests/attribute/imported_search_context src/tests/bitcompression/expgolomb src/tests/bitvector src/tests/btree diff --git a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp index 2503d3c564a..b46875cf3e6 100644 --- a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp +++ b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp @@ -1,237 +1,13 @@ // Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/document/base/documentid.h> -#include <vespa/document/base/globalid.h> -#include <vespa/searchlib/attribute/attributefactory.h> -#include <vespa/searchlib/attribute/attributeguard.h> -#include <vespa/searchlib/attribute/floatbase.h> -#include <vespa/searchlib/attribute/imported_attribute_vector.h> -#include <vespa/searchlib/attribute/integerbase.h> -#include <vespa/searchlib/attribute/not_implemented_attribute.h> -#include <vespa/searchlib/attribute/stringbase.h> -#include <vespa/searchlib/test/mock_gid_to_lid_mapping.h> -#include <vespa/searchcommon/attribute/attributecontent.h> -#include <vespa/vespalib/testkit/testapp.h> -#include <algorithm> -#include <future> -#include <map> -#include <memory> -#include <vector> +#include <vespa/searchlib/test/imported_attribute_fixture.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchcommon/attribute/search_context_params.h> namespace search { namespace attribute { -using document::DocumentId; -using document::GlobalId; -using DocId = IAttributeVector::DocId; -using WeightedInt = IAttributeVector::WeightedInt; -using WeightedFloat = IAttributeVector::WeightedFloat; -using WeightedString = IAttributeVector::WeightedString; -using WeightedConstChar = IAttributeVector::WeightedConstChar; -using WeightedEnum = IAttributeVector::WeightedEnum; -using test::MockGidToLidMapperFactory; - -std::shared_ptr<ReferenceAttribute> create_reference_attribute(vespalib::stringref name = "ref") { - return std::make_shared<ReferenceAttribute>(name, Config(BasicType::REFERENCE)); -} - -template <typename AttrVecType> -std::shared_ptr<AttrVecType> create_typed_attribute(BasicType basic_type, - CollectionType collection_type, - vespalib::stringref name = "parent") { - return std::dynamic_pointer_cast<AttrVecType>( - AttributeFactory::createAttribute(name, Config(basic_type, collection_type))); -} - -template <typename AttrVecType> -std::shared_ptr<AttrVecType> create_single_attribute(BasicType type, vespalib::stringref name = "parent") { - return create_typed_attribute<AttrVecType>(type, CollectionType::SINGLE, name); -} - -template <typename AttrVecType> -std::shared_ptr<AttrVecType> create_array_attribute(BasicType type, vespalib::stringref name = "parent") { - return create_typed_attribute<AttrVecType>(type, CollectionType::ARRAY, name); -} - -template <typename AttrVecType> -std::shared_ptr<AttrVecType> create_wset_attribute(BasicType type, vespalib::stringref name = "parent") { - return create_typed_attribute<AttrVecType>(type, CollectionType::WSET, name); -} - -template <typename VectorType> -void add_n_docs_with_undefined_values(VectorType& vec, size_t n) { - vec.addDocs(n); - vec.commit(); -} - -GlobalId dummy_gid(uint32_t doc_index) { - return DocumentId(vespalib::make_string("id:foo:bar::%u", doc_index)).getGlobalId(); -} - -struct Fixture { - std::shared_ptr<AttributeVector> target_attr; - std::shared_ptr<ReferenceAttribute> reference_attr; - std::shared_ptr<ImportedAttributeVector> imported_attr; - std::shared_ptr<MockGidToLidMapperFactory> mapper_factory; - - Fixture(); - ~Fixture(); - - void map_reference(DocId from_lid, GlobalId via_gid, DocId to_lid) { - assert(from_lid < reference_attr->getNumDocs()); - reference_attr->update(from_lid, via_gid); - reference_attr->commit(); - mapper_factory->_map[via_gid] = to_lid; - } - - std::shared_ptr<ImportedAttributeVector> create_attribute_vector_from_members(vespalib::stringref name = "imported") { - return std::make_shared<ImportedAttributeVector>(name, reference_attr, target_attr); - } - - template <typename AttrVecType> - std::shared_ptr<AttrVecType> target_attr_as() { - auto ptr = std::dynamic_pointer_cast<AttrVecType>(target_attr); - assert(ptr.get() != nullptr); - return ptr; - } - - void reset_with_new_target_attr(std::shared_ptr<AttributeVector> new_target) { - target_attr = std::move(new_target); - imported_attr = create_attribute_vector_from_members(); - } - - template <typename ValueType> - struct LidToLidMapping { - DocId _from_lid; - GlobalId _via_gid; - DocId _to_lid; - ValueType _value_in_target_attr; - - LidToLidMapping(DocId from_lid, - GlobalId via_gid, - DocId to_lid, - ValueType value_in_target_attr) - : _from_lid(from_lid), - _via_gid(via_gid), - _to_lid(to_lid), - _value_in_target_attr(std::move(value_in_target_attr)) - {} - }; - - void set_up_attribute_vectors_before_adding_mappings() { - // Make a sneaky assumption that no tests try to use a lid > 9 - add_n_docs_with_undefined_values(*reference_attr, 10); - add_n_docs_with_undefined_values(*target_attr, 10); - } - - template <typename AttrVecType, typename MappingsType, typename ValueAssigner> - void set_up_and_map(const MappingsType& mappings, ValueAssigner assigner) { - set_up_attribute_vectors_before_adding_mappings(); - auto subtyped_target = target_attr_as<AttrVecType>(); - for (auto& m : mappings) { - map_reference(m._from_lid, m._via_gid, m._to_lid); - assigner(*subtyped_target, m); - } - subtyped_target->commit(); - } - - template <typename AttrVecType, typename ValueType> - void reset_with_single_value_reference_mappings( - BasicType type, - const std::vector<LidToLidMapping<ValueType>>& mappings) { - reset_with_new_target_attr(create_single_attribute<AttrVecType>(type)); - // Fun experiment: rename `auto& mapping` to `auto& m` and watch GCC howl about - // shadowing a variable... that exists in the set_up_and_map function! - set_up_and_map<AttrVecType>(mappings, [this](auto& target_vec, auto& mapping) { - ASSERT_TRUE(target_vec.update(mapping._to_lid, mapping._value_in_target_attr)); - }); - } - - template <typename AttrVecType, typename ValueType> - void reset_with_array_value_reference_mappings( - BasicType type, - const std::vector<LidToLidMapping<std::vector<ValueType>>> &mappings) { - reset_with_new_target_attr(create_array_attribute<AttrVecType>(type)); - set_up_and_map<AttrVecType>(mappings, [this](auto& target_vec, auto& mapping) { - constexpr uint32_t weight = 1; - for (const auto& v : mapping._value_in_target_attr) { - ASSERT_TRUE(target_vec.append(mapping._to_lid, v, weight)); - } - }); - } - - template <typename AttrVecType, typename WeightedValueType> - void reset_with_wset_value_reference_mappings( - BasicType type, - const std::vector<LidToLidMapping<std::vector<WeightedValueType>>> &mappings) { - reset_with_new_target_attr(create_wset_attribute<AttrVecType>(type)); - set_up_and_map<AttrVecType>(mappings, [this](auto& target_vec, auto& mapping) { - for (const auto& v : mapping._value_in_target_attr) { - ASSERT_TRUE(target_vec.append(mapping._to_lid, v.value(), v.weight())); - } - }); - } -}; - -Fixture::Fixture() - : target_attr(create_single_attribute<IntegerAttribute>(BasicType::INT32)), - reference_attr(create_reference_attribute()), - imported_attr(create_attribute_vector_from_members()), - mapper_factory(std::make_shared<MockGidToLidMapperFactory>()) -{ - reference_attr->setGidToLidMapperFactory(mapper_factory); -} - -Fixture::~Fixture() {} - -template <typename AttrValueType, typename PredicateType> -void assert_multi_value_matches(const Fixture& f, - DocId lid, - const std::vector<AttrValueType>& expected, - PredicateType predicate) { - AttributeContent<AttrValueType> content; - content.fill(*f.imported_attr, lid); - EXPECT_EQUAL(expected.size(), content.size()); - std::vector<AttrValueType> actual(content.begin(), content.end()); - EXPECT_TRUE(std::equal(expected.begin(), expected.end(), - actual.begin(), actual.end(), predicate)); -} - -template <typename AttrValueType> -void assert_multi_value_matches(const Fixture& f, - DocId lid, - const std::vector<AttrValueType>& expected) { - assert_multi_value_matches(f, lid, expected, std::equal_to<AttrValueType>()); -} - -// Simple wrappers to avoid ugly "f.template reset..." syntax. -template <typename AttrVecType, typename ValueType> -void reset_with_single_value_reference_mappings( - Fixture& f, - BasicType type, - const std::vector<Fixture::LidToLidMapping<ValueType>>& mappings) { - f.reset_with_single_value_reference_mappings<AttrVecType, ValueType>(type, mappings); -} - -template <typename AttrVecType, typename ValueType> -void reset_with_array_value_reference_mappings( - Fixture& f, - BasicType type, - const std::vector<Fixture::LidToLidMapping<std::vector<ValueType>>> &mappings) { - f.reset_with_array_value_reference_mappings<AttrVecType, ValueType>(type, mappings); -} - -template <typename AttrVecType, typename WeightedValueType> -void reset_with_wset_value_reference_mappings( - Fixture& f, - BasicType type, - const std::vector<Fixture::LidToLidMapping<std::vector<WeightedValueType>>> &mappings) { - f.reset_with_wset_value_reference_mappings<AttrVecType, WeightedValueType>(type, mappings); -} - -bool has_active_enum_guards(AttributeVector &attr) { - return std::async(std::launch::async, [&attr] { return attr.hasActiveEnumGuards(); }).get(); -} +using Fixture = ImportedAttributeFixture; TEST_F("Accessors return expected attributes", Fixture) { EXPECT_EQUAL(f.imported_attr->getReferenceAttribute().get(), @@ -343,6 +119,10 @@ TEST_F("getFixedWidth() is inherited from target attribute vector", Fixture) { f.imported_attr->getFixedWidth()); } +TEST_F("asDocumentWeightAttribute() returns nullptr", Fixture) { + EXPECT_TRUE(f.imported_attr->asDocumentWeightAttribute() == nullptr); +} + TEST_F("Multi-valued integer attribute values can be retrieved via reference", Fixture) { const std::vector<int64_t> doc3_values({1234}); const std::vector<int64_t> doc7_values({5678, 9876, 555, 777}); @@ -445,6 +225,19 @@ TEST_F("hasEnum() is true for enum target attribute vector", SingleStringAttrFix EXPECT_TRUE(f.imported_attr->hasEnum()); } +TEST_F("createSearchContext() returns an imported search context", SingleStringAttrFixture) { + auto ctx = f.imported_attr->createSearchContext(word_term("bar"), SearchContextParams()); + ASSERT_TRUE(ctx.get() != nullptr); + fef::TermFieldMatchData match; + // Iterator specifics are tested in imported_search_context_test, so just make sure + // we get the expected iterator functionality. In this case, a non-strict iterator. + auto iter = ctx->createIterator(&match, false); + EXPECT_FALSE(iter->seek(DocId(1))); + EXPECT_FALSE(iter->seek(DocId(2))); + EXPECT_FALSE(iter->seek(DocId(3))); + EXPECT_TRUE(iter->seek(DocId(4))); +} + bool string_eq(const char* lhs, const char* rhs) noexcept { return strcmp(lhs, rhs) == 0; }; diff --git a/searchlib/src/tests/attribute/imported_search_context/CMakeLists.txt b/searchlib/src/tests/attribute/imported_search_context/CMakeLists.txt new file mode 100644 index 00000000000..228dfe20b0b --- /dev/null +++ b/searchlib/src/tests/attribute/imported_search_context/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_imported_search_context_test_app TEST + SOURCES + imported_search_context_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_imported_search_context_test_app COMMAND searchlib_imported_search_context_test_app) diff --git a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp new file mode 100644 index 00000000000..55675089a41 --- /dev/null +++ b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp @@ -0,0 +1,295 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcommon/attribute/search_context_params.h> +#include <vespa/searchlib/test/imported_attribute_fixture.h> +#include <vespa/searchlib/attribute/imported_search_context.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> + +namespace search { +namespace attribute { + +using fef::TermFieldMatchData; +using vespalib::Trinary; + +struct Fixture : ImportedAttributeFixture { + std::unique_ptr<ImportedSearchContext> create_context(std::unique_ptr<QueryTermSimple> term) { + return std::make_unique<ImportedSearchContext>(std::move(term), SearchContextParams(), *imported_attr); + } + + std::unique_ptr<queryeval::SearchIterator> create_iterator( + ImportedSearchContext& ctx, + TermFieldMatchData& match, + bool strict) { + auto iter = ctx.createIterator(&match, strict); + assert(iter.get() != nullptr); + iter->initRange(DocId(1), reference_attr->getNumDocs() + 1); + return iter; + } + + std::unique_ptr<queryeval::SearchIterator> create_non_strict_iterator( + ImportedSearchContext& ctx, + TermFieldMatchData& match) { + return create_iterator(ctx, match, false); + } + + std::unique_ptr<queryeval::SearchIterator> create_strict_iterator( + ImportedSearchContext& ctx, + TermFieldMatchData& match) { + return create_iterator(ctx, match, true); + } +}; + +template <typename Iterator> +bool is_hit_with_weight(Iterator& iter, TermFieldMatchData& match, DocId lid, int32_t weight) { + if (!EXPECT_TRUE(iter.seek(lid))) { + return false; + } + iter.unpack(lid); + return (EXPECT_EQUAL(lid, match.getDocId()) && + EXPECT_EQUAL(weight, match.getWeight())); +} + +template <typename Iterator> +bool is_strict_hit_with_weight(Iterator& iter, TermFieldMatchData& match, + DocId seek_lid, DocId expected_lid, int32_t weight) { + iter.seek(seek_lid); + if (!EXPECT_EQUAL(expected_lid, iter.getDocId())) { + return false; + } + iter.unpack(expected_lid); + return (EXPECT_EQUAL(expected_lid, match.getDocId()) && + EXPECT_EQUAL(weight, match.getWeight())); +} + +TEST_F("approximateHits() returns document count of reference attribute", Fixture) { + add_n_docs_with_undefined_values(*f.reference_attr, 101); + + auto ctx = f.create_context(word_term("foo")); + EXPECT_EQUAL(101, ctx->approximateHits()); +} + +TEST_F("attributeName() returns imported attribute name", Fixture) { + auto ctx = f.create_context(word_term("foo")); + EXPECT_EQUAL(f.default_imported_attr_name(), ctx->attributeName()); +} + +TEST_F("valid() forwards to target search context", Fixture) { + auto ctx = f.create_context(word_term("foo")); + EXPECT_EQUAL(ctx->target_search_context().valid(), ctx->valid()); +} + +TEST_F("getAsIntegerTerm() forwards to target search context", Fixture) { + auto ctx = f.create_context(word_term("foo")); + // No operator== or printing for Range, so doing this the hard way + // TODO could add the darn things + auto expected_range = ctx->target_search_context().getAsIntegerTerm(); + auto actual_range = ctx->getAsIntegerTerm(); + EXPECT_EQUAL(expected_range.lower(), actual_range.lower()); + EXPECT_EQUAL(expected_range.upper(), actual_range.upper()); +} + +/* + FIXME this seems to not actually be implemented as expected by the target search context...! SIGSEGVs. +TEST_F("queryTerm() returns term context was created with", Fixture) { + auto ctx = f.create_context(word_term("helloworld")); + EXPECT_EQUAL(std::string("helloworld"), std::string(ctx->queryTerm().getTerm())); +} +*/ + +TEST_F("Non-strict iterator not marked as strict", Fixture) { + auto ctx = f.create_context(word_term("5678")); + TermFieldMatchData match; + auto iter = f.create_non_strict_iterator(*ctx, match); + + EXPECT_TRUE(iter->is_strict() == Trinary::False); // No EXPECT_EQUALS printing of Trinary... +} + +TEST_F("Non-strict iterator seek forwards to target attribute", Fixture) { + reset_with_single_value_reference_mappings<IntegerAttribute, int32_t>( + f, BasicType::INT32, + {{DocId(1), dummy_gid(3), DocId(3), 1234}, + {DocId(3), dummy_gid(7), DocId(7), 5678}, + {DocId(5), dummy_gid(8), DocId(8), 7890}}); + + auto ctx = f.create_context(word_term("5678")); + TermFieldMatchData match; + auto iter = f.create_non_strict_iterator(*ctx, match); + + EXPECT_FALSE(iter->isAtEnd()); + EXPECT_EQUAL(iter->beginId(), iter->getDocId()); + + EXPECT_FALSE(iter->seek(DocId(1))); + EXPECT_EQUAL(iter->beginId(), iter->getDocId()); // Non-strict iterator does not change current ID + + EXPECT_TRUE(iter->seek(DocId(3))); + EXPECT_EQUAL(DocId(3), iter->getDocId()); + + EXPECT_FALSE(iter->seek(DocId(5))); + EXPECT_EQUAL(DocId(3), iter->getDocId()); // Still unchanged +} + +TEST_F("Non-strict iterator unpacks target match data for single value hit", Fixture) { + reset_with_single_value_reference_mappings<IntegerAttribute, int32_t>( + f, BasicType::INT32, + {{DocId(1), dummy_gid(3), DocId(3), 1234}, + {DocId(2), dummy_gid(4), DocId(4), 1234}}); + + auto ctx = f.create_context(word_term("1234")); + TermFieldMatchData match; + auto iter = f.create_non_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(1), 1)); + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(2), 1)); +} + +struct ArrayValueFixture : Fixture { + ArrayValueFixture() { + const std::vector<int64_t> doc3_values({1234}); + const std::vector<int64_t> doc7_values({1234, 1234, 1234, 777}); + const std::vector<int64_t> doc8_values({}); + reset_with_array_value_reference_mappings<IntegerAttribute, int64_t>( + BasicType::INT64, + {{DocId(1), dummy_gid(3), DocId(3), doc3_values}, + {DocId(4), dummy_gid(7), DocId(7), doc7_values}, + {DocId(5), dummy_gid(8), DocId(8), doc8_values}}); + } +}; + +TEST_F("Non-strict iterator unpacks target match data for array hit", ArrayValueFixture) { + auto ctx = f.create_context(word_term("1234")); + TermFieldMatchData match; + auto iter = f.create_non_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(1), 1)); + EXPECT_FALSE(iter->seek(DocId(2))); + EXPECT_FALSE(iter->seek(DocId(3))); + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(4), 3)); +} + +struct WsetValueFixture : Fixture { + WsetValueFixture() { + std::vector<WeightedString> doc3_values{{WeightedString("foo", -5)}}; + std::vector<WeightedString> doc4_values{{WeightedString("baz", 10)}}; + std::vector<WeightedString> doc7_values{{WeightedString("bar", 7), WeightedString("foo", 42)}}; + reset_with_wset_value_reference_mappings<StringAttribute, WeightedString>( + BasicType::STRING, + {{DocId(2), dummy_gid(3), DocId(3), doc3_values}, + {DocId(4), dummy_gid(4), DocId(4), doc4_values}, + {DocId(6), dummy_gid(7), DocId(7), doc7_values}}); + } +}; + +TEST_F("Non-strict iterator unpacks target match data for weighted set hit", WsetValueFixture) { + auto ctx = f.create_context(word_term("foo")); + TermFieldMatchData match; + auto iter = f.create_non_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(2), -5)); + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(6), 42)); +} + +TEST_F("Strict iterator is marked as strict", Fixture) { + auto ctx = f.create_context(word_term("5678")); + TermFieldMatchData match; + auto iter = f.create_strict_iterator(*ctx, match); + + EXPECT_TRUE(iter->is_strict() == Trinary::True); // No EXPECT_EQUALS printing of Trinary... +} + +struct SingleValueFixture : Fixture { + SingleValueFixture() { + reset_with_single_value_reference_mappings<IntegerAttribute, int32_t>( + BasicType::INT32, + {{DocId(3), dummy_gid(5), DocId(5), 5678}, + {DocId(4), dummy_gid(6), DocId(6), 1234}, + {DocId(5), dummy_gid(8), DocId(8), 5678}, + {DocId(7), dummy_gid(9), DocId(9), 4321}}); + } +}; + +TEST_F("Strict iterator seeks to first available hit LID", SingleValueFixture) { + auto ctx = f.create_context(word_term("5678")); + TermFieldMatchData match; + auto iter = f.create_strict_iterator(*ctx, match); + + EXPECT_FALSE(iter->isAtEnd()); + EXPECT_EQUAL(iter->beginId(), iter->getDocId()); + + EXPECT_FALSE(iter->seek(DocId(1))); + EXPECT_FALSE(iter->isAtEnd()); + EXPECT_EQUAL(DocId(3), iter->getDocId()); + + EXPECT_TRUE(iter->seek(DocId(3))); + EXPECT_FALSE(iter->isAtEnd()); + EXPECT_EQUAL(DocId(3), iter->getDocId()); + + EXPECT_FALSE(iter->seek(DocId(4))); + EXPECT_FALSE(iter->isAtEnd()); + EXPECT_EQUAL(DocId(5), iter->getDocId()); + + // Seeking beyond last hit exhausts doc id limit and marks iterator as done + EXPECT_FALSE(iter->seek(DocId(6))); + EXPECT_TRUE(iter->isAtEnd()); +} + +TEST_F("Strict iterator unpacks target match data for single value hit", SingleValueFixture) { + auto ctx = f.create_context(word_term("5678")); + TermFieldMatchData match; + auto iter = f.create_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(1), DocId(3), 1)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(2), DocId(3), 1)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(3), DocId(3), 1)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(4), DocId(5), 1)); +} + +TEST_F("Strict iterator unpacks target match data for array hit", ArrayValueFixture) { + auto ctx = f.create_context(word_term("1234")); + TermFieldMatchData match; + auto iter = f.create_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(1), DocId(1), 1)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(2), DocId(4), 3)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(3), DocId(4), 3)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(4), DocId(4), 3)); +} + +TEST_F("Strict iterator unpacks target match data for weighted set hit", WsetValueFixture) { + auto ctx = f.create_context(word_term("foo")); + TermFieldMatchData match; + auto iter = f.create_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(1), DocId(2), -5)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(2), DocId(2), -5)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(3), DocId(6), 42)); +} + +TEST_F("cmp() performs GID mapping and forwards to target attribute", SingleValueFixture) { + auto ctx = f.create_context(word_term("5678")); + EXPECT_FALSE(ctx->cmp(DocId(2))); + EXPECT_TRUE(ctx->cmp(DocId(3))); + EXPECT_FALSE(ctx->cmp(DocId(4))); + EXPECT_TRUE(ctx->cmp(DocId(5))); +} + +TEST_F("cmp(weight) performs GID mapping and forwards to target attribute", WsetValueFixture) { + auto ctx = f.create_context(word_term("foo")); + int32_t weight = 0; + EXPECT_FALSE(ctx->cmp(DocId(1), weight)); + EXPECT_EQUAL(0, weight); // Unchanged + + EXPECT_TRUE(ctx->cmp(DocId(2), weight)); + EXPECT_EQUAL(-5, weight); + + EXPECT_TRUE(ctx->cmp(DocId(6), weight)); + EXPECT_EQUAL(42, weight); +} + +// TODO test multiple iterators created from same context +// TODO test non-mapped lid +// TODO test seek outside lid limit + +} // attribute +} // search + +TEST_MAIN() { TEST_RUN_ALL(); }
\ No newline at end of file diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 66a614379a8..e167dc38f22 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -45,6 +45,7 @@ vespa_add_library(searchlib_attribute OBJECT iattributemanager.cpp iattributesavetarget.cpp imported_attribute_vector.cpp + imported_search_context.cpp integerbase.cpp ipostinglistsearchcontext.cpp iterator_pack.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp b/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp index 4c48920406b..219ef2221f2 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp +++ b/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp @@ -208,7 +208,7 @@ FilterAttributeIteratorT<SC>::visitMembers(vespalib::ObjectVisitor &visitor) con template <typename SC> AttributeIteratorT<SC>::AttributeIteratorT(const SC &searchContext, fef::TermFieldMatchData *matchData) - : AttributeIterator(matchData, searchContext._attr.getCommittedDocIdLimit()), + : AttributeIterator(matchData, searchContext.attribute().getCommittedDocIdLimit()), _searchContext(searchContext) { } diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp index 5e2973790fc..05d925ee7d1 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp @@ -1,7 +1,9 @@ // Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "imported_attribute_vector.h" +#include "imported_search_context.h" #include "attributeguard.h" +#include <vespa/searchlib/query/queryterm.h> #include <vespa/vespalib/util/exceptions.h> namespace search { @@ -94,9 +96,7 @@ bool ImportedAttributeVector::findEnum(const char *value, EnumHandle &e) const { std::unique_ptr<ISearchContext> ImportedAttributeVector::createSearchContext(std::unique_ptr<QueryTermSimple> term, const SearchContextParams ¶ms) const { - (void) term; - (void) params; - return std::unique_ptr<ISearchContext>(); + return std::make_unique<ImportedSearchContext>(std::move(term), params, *this); } const IDocumentWeightAttribute *ImportedAttributeVector::asDocumentWeightAttribute() const { diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h index 95a817bac35..556ce7c2722 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h +++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h @@ -89,7 +89,7 @@ private: vespalib::string _name; std::shared_ptr<ReferenceAttribute> _reference_attribute; - std::shared_ptr<AttributeVector> _target_attribute; + std::shared_ptr<AttributeVector> _target_attribute; }; } // attribute diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp new file mode 100644 index 00000000000..51b765f725a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp @@ -0,0 +1,71 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "imported_search_context.h" +#include "attributeiterators.hpp" +#include "imported_attribute_vector.h" +#include "reference_attribute.h" +#include <vespa/searchcommon/attribute/search_context_params.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/queryterm.h> + +namespace search { +namespace attribute { + +ImportedSearchContext::ImportedSearchContext( + std::unique_ptr<QueryTermSimple> term, + const SearchContextParams& params, + const ImportedAttributeVector& imported_attribute) + : _imported_attribute(imported_attribute), + _reference_attribute(*_imported_attribute.getReferenceAttribute()), + _target_attribute(*_imported_attribute.getTargetAttribute()), + _target_search_context(_target_attribute.getSearch(std::move(term), params)) +{ +} + +ImportedSearchContext::~ImportedSearchContext() { +} + +unsigned int ImportedSearchContext::approximateHits() const { + return _reference_attribute.getNumDocs(); +} + +std::unique_ptr<queryeval::SearchIterator> +ImportedSearchContext::createIterator(fef::TermFieldMatchData* matchData, bool strict) { + if (!strict) { + return std::make_unique<AttributeIteratorT<ImportedSearchContext>>(*this, matchData); + } else { + return std::make_unique<AttributeIteratorStrict<ImportedSearchContext>>(*this, matchData); + } +} + +void ImportedSearchContext::fetchPostings(bool strict) { + (void)strict; + // Imported attributes do not have posting lists (at least not currently), so this is a no-op. +} + +bool ImportedSearchContext::valid() const { + return _target_search_context->valid(); +} + +Int64Range ImportedSearchContext::getAsIntegerTerm() const { + return _target_search_context->getAsIntegerTerm(); +} + +const QueryTermBase& ImportedSearchContext::queryTerm() const { + return _target_search_context->queryTerm(); +} + +const vespalib::string& ImportedSearchContext::attributeName() const { + return _imported_attribute.getName(); +} + +bool ImportedSearchContext::cmp(DocId docId, int32_t& weight) const { + return _target_search_context->cmp(_reference_attribute.getReferencedLid(docId), weight); +} + +bool ImportedSearchContext::cmp(DocId docId) const { + return _target_search_context->cmp(_reference_attribute.getReferencedLid(docId)); +} + +} // attribute +} // search
\ No newline at end of file diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h new file mode 100644 index 00000000000..1a383b2705e --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h @@ -0,0 +1,71 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributevector.h" +#include <vespa/searchcommon/attribute/i_search_context.h> +#include <memory> + +namespace search { + +namespace fef { +class TermFieldMatchData; +} + +namespace attribute { + +class ReferenceAttribute; +class ImportedAttributeVector; +class SearchContextParams; + +/** + * Search context exposing iteraton over an imported attribute vector. + * + * Iterator doc id matching is performed via the GID->LID indirection of the + * associated reference attribute. This means that if the _referenced_ document + * matches the search term, the doc id of the _referring_ document will be + * considered a match. + */ +class ImportedSearchContext : public ISearchContext { + const ImportedAttributeVector& _imported_attribute; + const ReferenceAttribute& _reference_attribute; + const AttributeVector& _target_attribute; + std::unique_ptr<AttributeVector::SearchContext> _target_search_context; +public: + ImportedSearchContext(std::unique_ptr<QueryTermSimple> term, + const SearchContextParams& params, + const ImportedAttributeVector& imported_attribute); + ~ImportedSearchContext(); + + unsigned int approximateHits() const override; + + std::unique_ptr<queryeval::SearchIterator> + createIterator(fef::TermFieldMatchData* matchData, bool strict) override; + + void fetchPostings(bool strict) override; + + bool valid() const override; + + Int64Range getAsIntegerTerm() const override; + + const QueryTermBase& queryTerm() const override; + + const vespalib::string& attributeName() const override; + + using DocId = IAttributeVector::DocId; + + bool cmp(DocId docId, int32_t& weight) const; + bool cmp(DocId docId) const; + + const ReferenceAttribute& attribute() const noexcept { return _reference_attribute; } + + const AttributeVector::SearchContext& target_search_context() const noexcept { + return *_target_search_context; + } +}; + +} // attribute +} // search + + + diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h index 17fa8fd9902..89abe560b25 100644 --- a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h @@ -271,7 +271,7 @@ public: * * @return global posting info or NULL if no info is available. **/ - virtual const PostingInfo *getPostingInfo() const { return NULL; } + virtual const PostingInfo *getPostingInfo() const { return nullptr; } /** * Create a human-readable representation of this object. This diff --git a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.h b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.h new file mode 100644 index 00000000000..21b3d8e8040 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.h @@ -0,0 +1,249 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "mock_gid_to_lid_mapping.h" +#include <vespa/document/base/documentid.h> +#include <vespa/document/base/globalid.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/floatbase.h> +#include <vespa/searchlib/attribute/imported_attribute_vector.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/not_implemented_attribute.h> +#include <vespa/searchlib/attribute/stringbase.h> +#include <vespa/searchlib/query/queryterm.h> +#include <vespa/searchcommon/attribute/attributecontent.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <algorithm> +#include <future> +#include <map> +#include <memory> +#include <vector> + +namespace search { +namespace attribute { + +using document::DocumentId; +using document::GlobalId; +using DocId = IAttributeVector::DocId; +using WeightedInt = IAttributeVector::WeightedInt; +using WeightedFloat = IAttributeVector::WeightedFloat; +using WeightedString = IAttributeVector::WeightedString; +using WeightedConstChar = IAttributeVector::WeightedConstChar; +using WeightedEnum = IAttributeVector::WeightedEnum; +using test::MockGidToLidMapperFactory; + +std::shared_ptr<ReferenceAttribute> create_reference_attribute(vespalib::stringref name = "ref") { + return std::make_shared<ReferenceAttribute>(name, Config(BasicType::REFERENCE)); +} + +template<typename AttrVecType> +std::shared_ptr<AttrVecType> create_typed_attribute(BasicType basic_type, + CollectionType collection_type, + vespalib::stringref name = "parent") { + return std::dynamic_pointer_cast<AttrVecType>( + AttributeFactory::createAttribute(name, Config(basic_type, collection_type))); +} + +template<typename AttrVecType> +std::shared_ptr<AttrVecType> create_single_attribute(BasicType type, vespalib::stringref name = "parent") { + return create_typed_attribute<AttrVecType>(type, CollectionType::SINGLE, name); +} + +template<typename AttrVecType> +std::shared_ptr<AttrVecType> create_array_attribute(BasicType type, vespalib::stringref name = "parent") { + return create_typed_attribute<AttrVecType>(type, CollectionType::ARRAY, name); +} + +template<typename AttrVecType> +std::shared_ptr<AttrVecType> create_wset_attribute(BasicType type, vespalib::stringref name = "parent") { + return create_typed_attribute<AttrVecType>(type, CollectionType::WSET, name); +} + +template<typename VectorType> +void add_n_docs_with_undefined_values(VectorType &vec, size_t n) { + vec.addDocs(n); + vec.commit(); +} + +GlobalId dummy_gid(uint32_t doc_index) { + return DocumentId(vespalib::make_string("id:foo:bar::%u", doc_index)).getGlobalId(); +} + +std::unique_ptr<QueryTermSimple> word_term(vespalib::stringref term) { + return std::make_unique<QueryTermSimple>(term, QueryTerm::WORD); +} + +struct ImportedAttributeFixture { + std::shared_ptr<AttributeVector> target_attr; + std::shared_ptr<ReferenceAttribute> reference_attr; + std::shared_ptr<ImportedAttributeVector> imported_attr; + std::shared_ptr<MockGidToLidMapperFactory> mapper_factory; + + ImportedAttributeFixture(); + + ~ImportedAttributeFixture(); + + void map_reference(DocId from_lid, GlobalId via_gid, DocId to_lid) { + assert(from_lid < reference_attr->getNumDocs()); + reference_attr->update(from_lid, via_gid); + reference_attr->commit(); + mapper_factory->_map[via_gid] = to_lid; + } + + static vespalib::stringref default_imported_attr_name() { + return "imported"; + } + + std::shared_ptr<ImportedAttributeVector> + create_attribute_vector_from_members(vespalib::stringref name = default_imported_attr_name()) { + return std::make_shared<ImportedAttributeVector>(name, reference_attr, target_attr); + } + + template<typename AttrVecType> + std::shared_ptr<AttrVecType> target_attr_as() { + auto ptr = std::dynamic_pointer_cast<AttrVecType>(target_attr); + assert(ptr.get() != nullptr); + return ptr; + } + + void reset_with_new_target_attr(std::shared_ptr<AttributeVector> new_target) { + target_attr = std::move(new_target); + imported_attr = create_attribute_vector_from_members(); + } + + template<typename ValueType> + struct LidToLidMapping { + DocId _from_lid; + GlobalId _via_gid; + DocId _to_lid; + ValueType _value_in_target_attr; + + LidToLidMapping(DocId from_lid, + GlobalId via_gid, + DocId to_lid, + ValueType value_in_target_attr) + : _from_lid(from_lid), + _via_gid(via_gid), + _to_lid(to_lid), + _value_in_target_attr(std::move(value_in_target_attr)) {} + }; + + void set_up_attribute_vectors_before_adding_mappings() { + // Make a sneaky assumption that no tests try to use a lid > 9 + add_n_docs_with_undefined_values(*reference_attr, 10); + target_attr->addReservedDoc(); + add_n_docs_with_undefined_values(*target_attr, 10); + } + + template<typename AttrVecType, typename MappingsType, typename ValueAssigner> + void set_up_and_map(const MappingsType &mappings, ValueAssigner assigner) { + set_up_attribute_vectors_before_adding_mappings(); + auto subtyped_target = target_attr_as<AttrVecType>(); + for (auto &m : mappings) { + map_reference(m._from_lid, m._via_gid, m._to_lid); + assigner(*subtyped_target, m); + } + subtyped_target->commit(); + } + + template<typename AttrVecType, typename ValueType> + void reset_with_single_value_reference_mappings( + BasicType type, + const std::vector<LidToLidMapping<ValueType>> &mappings) { + reset_with_new_target_attr(create_single_attribute<AttrVecType>(type)); + // Fun experiment: rename `auto& mapping` to `auto& m` and watch GCC howl about + // shadowing a variable... that exists in the set_up_and_map function! + set_up_and_map<AttrVecType>(mappings, [this](auto &target_vec, auto &mapping) { + ASSERT_TRUE(target_vec.update(mapping._to_lid, mapping._value_in_target_attr)); + }); + } + + template<typename AttrVecType, typename ValueType> + void reset_with_array_value_reference_mappings( + BasicType type, + const std::vector<LidToLidMapping<std::vector<ValueType>>> &mappings) { + reset_with_new_target_attr(create_array_attribute<AttrVecType>(type)); + set_up_and_map<AttrVecType>(mappings, [this](auto &target_vec, auto &mapping) { + constexpr uint32_t weight = 1; + for (const auto &v : mapping._value_in_target_attr) { + ASSERT_TRUE(target_vec.append(mapping._to_lid, v, weight)); + } + }); + } + + template<typename AttrVecType, typename WeightedValueType> + void reset_with_wset_value_reference_mappings( + BasicType type, + const std::vector<LidToLidMapping<std::vector<WeightedValueType>>> &mappings) { + reset_with_new_target_attr(create_wset_attribute<AttrVecType>(type)); + set_up_and_map<AttrVecType>(mappings, [this](auto &target_vec, auto &mapping) { + for (const auto &v : mapping._value_in_target_attr) { + ASSERT_TRUE(target_vec.append(mapping._to_lid, v.value(), v.weight())); + } + }); + } +}; + +ImportedAttributeFixture::ImportedAttributeFixture() + : target_attr(create_single_attribute<IntegerAttribute>(BasicType::INT32)), + reference_attr(create_reference_attribute()), + imported_attr(create_attribute_vector_from_members()), + mapper_factory(std::make_shared<MockGidToLidMapperFactory>()) { + reference_attr->setGidToLidMapperFactory(mapper_factory); +} + +ImportedAttributeFixture::~ImportedAttributeFixture() {} + +template<typename AttrValueType, typename PredicateType> +void assert_multi_value_matches(const ImportedAttributeFixture &f, + DocId lid, + const std::vector<AttrValueType> &expected, + PredicateType predicate) { + AttributeContent<AttrValueType> content; + content.fill(*f.imported_attr, lid); + EXPECT_EQUAL(expected.size(), content.size()); + std::vector<AttrValueType> actual(content.begin(), content.end()); + EXPECT_TRUE(std::equal(expected.begin(), expected.end(), + actual.begin(), actual.end(), predicate)); +} + +template<typename AttrValueType> +void assert_multi_value_matches(const ImportedAttributeFixture &f, + DocId lid, + const std::vector<AttrValueType> &expected) { + assert_multi_value_matches(f, lid, expected, std::equal_to<AttrValueType>()); +} + +// Simple wrappers to avoid ugly "f.template reset..." syntax. +template<typename AttrVecType, typename ValueType> +void reset_with_single_value_reference_mappings( + ImportedAttributeFixture &f, + BasicType type, + const std::vector<ImportedAttributeFixture::LidToLidMapping<ValueType>> &mappings) { + f.reset_with_single_value_reference_mappings<AttrVecType, ValueType>(type, mappings); +} + +template<typename AttrVecType, typename ValueType> +void reset_with_array_value_reference_mappings( + ImportedAttributeFixture &f, + BasicType type, + const std::vector<ImportedAttributeFixture::LidToLidMapping<std::vector<ValueType>>> &mappings) { + f.reset_with_array_value_reference_mappings<AttrVecType, ValueType>(type, mappings); +} + +template<typename AttrVecType, typename WeightedValueType> +void reset_with_wset_value_reference_mappings( + ImportedAttributeFixture &f, + BasicType type, + const std::vector<ImportedAttributeFixture::LidToLidMapping<std::vector<WeightedValueType>>> &mappings) { + f.reset_with_wset_value_reference_mappings<AttrVecType, WeightedValueType>(type, mappings); +} + +bool has_active_enum_guards(AttributeVector &attr) { + return std::async(std::launch::async, [&attr] { return attr.hasActiveEnumGuards(); }).get(); +} + +} // attribute +} // search
\ No newline at end of file |