diff options
3 files changed, 94 insertions, 335 deletions
diff --git a/searchlib/src/tests/features/imported_dot_product/imported_dot_product_test.cpp b/searchlib/src/tests/features/imported_dot_product/imported_dot_product_test.cpp index a32d52d06a0..72c9f9db165 100644 --- a/searchlib/src/tests/features/imported_dot_product/imported_dot_product_test.cpp +++ b/searchlib/src/tests/features/imported_dot_product/imported_dot_product_test.cpp @@ -86,8 +86,11 @@ struct FixtureBase : ImportedAttributeFixture { const vespalib::string& vector, DocId doc_id, const vespalib::string& shared_param = "") { + check_executions<int32_t>([this](auto int_type){ this->setup_integer_mappings(int_type); }, + {{BasicType::INT32}}, + expected, vector, doc_id, shared_param); check_executions<int64_t>([this](auto int_type){ this->setup_integer_mappings(int_type); }, - {{BasicType::INT32, BasicType::INT64}}, + {{BasicType::INT64}}, expected, vector, doc_id, shared_param); } }; @@ -95,21 +98,47 @@ struct FixtureBase : ImportedAttributeFixture { struct ArrayFixture : FixtureBase { ~ArrayFixture() override; - void setup_integer_mappings(BasicType int_type) override { - reset_with_array_value_reference_mappings<IntegerAttribute, int64_t>( + template <typename T> + void setup_integer_mappings_helper(BasicType int_type) { + reset_with_array_value_reference_mappings<IntegerAttribute, T>( int_type, {{DocId(1), dummy_gid(3), DocId(3), {{2, 3, 5}}}, {DocId(3), dummy_gid(7), DocId(7), {{7, 11}}}, {DocId(5), dummy_gid(8), DocId(8), {{13, 17, 19, 23}}}}); } + void setup_integer_mappings(BasicType int_type) override { + switch (int_type.type()) { + case BasicType::INT32: + setup_integer_mappings_helper<int32_t>(int_type); + break; + case BasicType::INT64: + setup_integer_mappings_helper<int64_t>(int_type); + break; + default: + TEST_FATAL("unexpected integer type"); + } + } - void setup_float_mappings(BasicType float_type) { - reset_with_array_value_reference_mappings<FloatingPointAttribute, double>( + template <typename T> + void setup_float_mappings_helper(BasicType float_type) { + reset_with_array_value_reference_mappings<FloatingPointAttribute, T>( float_type, {{DocId(2), dummy_gid(4), DocId(4), {{2.2, 3.3, 5.5}}}, {DocId(4), dummy_gid(8), DocId(8), {{7.7, 11.11}}}, {DocId(6), dummy_gid(9), DocId(9), {{13.1, 17.2, 19.3, 23.4}}}}); } + void setup_float_mappings(BasicType float_type) { + switch(float_type.type()) { + case BasicType::FLOAT: + setup_float_mappings_helper<float>(float_type); + break; + case BasicType::DOUBLE: + setup_float_mappings_helper<double>(float_type); + break; + default: + TEST_FATAL("unexpected float type"); + } + } template <typename ExpectedType> void check_prepare_state_output(const vespalib::eval::Value & tensor, const ExpectedType & expected) { @@ -163,8 +192,11 @@ struct ArrayFixture : FixtureBase { void check_all_float_executions(feature_t expected, const vespalib::string& vector, DocId doc_id, const vespalib::string& shared_param = "") { + check_executions<float>([this](auto float_type){ this->setup_float_mappings(float_type); }, + {{BasicType::FLOAT}}, + expected, vector, doc_id, shared_param); check_executions<double>([this](auto float_type){ this->setup_float_mappings(float_type); }, - {{BasicType::FLOAT, BasicType::DOUBLE}}, + {{BasicType::DOUBLE}}, expected, vector, doc_id, shared_param); } }; @@ -187,9 +219,9 @@ TEST_F("Zero-length float/double array query vector evaluates to zero", ArrayFix f.check_all_float_executions(0, "[]", DocId(1)); } -TEST_F("prepareSharedState emits i64 vector for i32 imported attribute", ArrayFixture) { +TEST_F("prepareSharedState emits i32 vector for i32 imported attribute", ArrayFixture) { f.setup_integer_mappings(BasicType::INT32); - f.template check_prepare_state_output("[101 202 303]", dotproduct::ArrayParam<int64_t>({101, 202, 303})); + f.template check_prepare_state_output("[101 202 303]", dotproduct::ArrayParam<int32_t>({101, 202, 303})); } TEST_F("prepareSharedState emits i64 vector for i64 imported attribute", ArrayFixture) { @@ -197,9 +229,9 @@ TEST_F("prepareSharedState emits i64 vector for i64 imported attribute", ArrayFi f.template check_prepare_state_output("[101 202 303]", dotproduct::ArrayParam<int64_t>({101, 202, 303})); } -TEST_F("prepareSharedState emits double vector for float imported attribute", ArrayFixture) { +TEST_F("prepareSharedState emits float vector for float imported attribute", ArrayFixture) { f.setup_float_mappings(BasicType::FLOAT); - f.template check_prepare_state_output("[10.1 20.2 30.3]", dotproduct::ArrayParam<double>({10.1, 20.2, 30.3})); + f.template check_prepare_state_output("[10.1 20.2 30.3]", dotproduct::ArrayParam<float>({10.1, 20.2, 30.3})); } TEST_F("prepareSharedState emits double vector for double imported attribute", ArrayFixture) { diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp index 1ee5fe90773..98f276d1fed 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp @@ -4,12 +4,8 @@ #include "valuefeature.h" #include "weighted_set_parser.hpp" #include "array_parser.hpp" +#include <vespa/searchcommon/attribute/i_multi_value_attribute.h> #include <vespa/searchlib/fef/properties.h> -#include <vespa/searchlib/attribute/integerbase.h> -#include <vespa/searchlib/attribute/imported_attribute_vector_read_guard.h> -#include <vespa/searchlib/attribute/floatbase.h> -#include <vespa/searchlib/attribute/multinumericattribute.h> -#include <vespa/searchlib/attribute/multienumattribute.h> #include <vespa/eval/eval/fast_value.h> #include <vespa/eval/eval/value_codec.h> #include <vespa/vespalib/objects/nbostream.h> @@ -294,108 +290,6 @@ SparseDotProductByArrayReadViewExecutor<BaseType>::getAttributeValues(uint32_t d return vespalib::ConstArrayRef(_scratch.data(), i); } -template <typename A> -DotProductByCopyExecutor<A>::DotProductByCopyExecutor(const A * attribute, const V & queryVector) : - DotProductExecutor<A>(attribute, queryVector), - _copy(static_cast<size_t>(attribute->getMaxValueCount())) -{ -} - -template <typename A> -DotProductByCopyExecutor<A>::~DotProductByCopyExecutor() = default; - -template <typename A> -vespalib::ConstArrayRef<typename A::BaseType> -DotProductByCopyExecutor<A>::getAttributeValues(uint32_t docId) -{ - size_t count = this->_attribute->getAll(docId, &_copy[0], _copy.size()); - if (count > _copy.size()) { - _copy.resize(count); - count = this->_attribute->getAll(docId, &_copy[0], _copy.size()); - } - return vespalib::ConstArrayRef(_copy.data(), count); -} - -template <typename A> -SparseDotProductByCopyExecutor<A>::SparseDotProductByCopyExecutor(const A * attribute, const V & queryVector, const IV & queryIndexes) : - SparseDotProductExecutorBase<typename A::BaseType>(queryVector, queryIndexes), - _attribute(attribute), - _copy(std::max(static_cast<size_t>(attribute->getMaxValueCount()), queryIndexes.size())) -{ -} - -template <typename A> -SparseDotProductByCopyExecutor<A>::~SparseDotProductByCopyExecutor() = default; - -template <typename A> -vespalib::ConstArrayRef<typename A::BaseType> -SparseDotProductByCopyExecutor<A>::getAttributeValues(uint32_t docId) -{ - size_t count = this->_attribute->getAll(docId, &_copy[0], _copy.size()); - if (count > _copy.size()) { - _copy.resize(count); - count = this->_attribute->getAll(docId, &_copy[0], _copy.size()); - } - size_t i(0); - for (const IV & iv(this->_queryIndexes); (i < iv.size()) && (iv[i] < count); i++) { - _copy[i] = _copy[iv[i]]; - } - return vespalib::ConstArrayRef(_copy.data(), i); -} - -template <typename BaseType> -DotProductByContentFillExecutor<BaseType>::DotProductByContentFillExecutor( - const attribute::IAttributeVector * attribute, - const V & queryVector) - : DotProductExecutorBase<BaseType>(queryVector), - _attribute(attribute), - _filler() -{ - _filler.allocate(attribute->getMaxValueCount()); -} - -template <typename BaseType> -DotProductByContentFillExecutor<BaseType>::~DotProductByContentFillExecutor() = default; - -template <typename BaseType> -vespalib::ConstArrayRef<BaseType> -DotProductByContentFillExecutor<BaseType>::getAttributeValues(uint32_t docid) { - _filler.fill(*_attribute, docid); - return vespalib::ConstArrayRef(_filler.data(), _filler.size()); -} - -template <typename BaseType> -SparseDotProductByContentFillExecutor<BaseType>::SparseDotProductByContentFillExecutor( - const attribute::IAttributeVector * attribute, - const V & queryVector, - const IV & queryIndexes) - : DotProductExecutorBase<BaseType>(queryVector), - _attribute(attribute), - _queryIndexes(queryIndexes), - _filler() -{ - _filler.allocate(std::max(static_cast<size_t>(attribute->getMaxValueCount()), queryIndexes.size())); -} - -template <typename BaseType> -SparseDotProductByContentFillExecutor<BaseType>::~SparseDotProductByContentFillExecutor() = default; - -template <typename BaseType> -vespalib::ConstArrayRef<BaseType> -SparseDotProductByContentFillExecutor<BaseType>::getAttributeValues(uint32_t docid) -{ - _filler.fill(*_attribute, docid); - - const size_t count = _filler.size(); - BaseType * data = _filler.data(); - size_t i = 0; - for (; (i < _queryIndexes.size()) && (_queryIndexes[i] < count); ++i) { - data[i] = data[_queryIndexes[i]]; - } - - return vespalib::ConstArrayRef(data, i); -} - } namespace { @@ -480,6 +374,7 @@ template ArrayParam<int64_t>::ArrayParam(const Property & prop); #ifdef __clang__ template ArrayParam<int64_t>::~ArrayParam(); #endif +template struct ArrayParam<int32_t>; template struct ArrayParam<double>; template struct ArrayParam<float>; @@ -500,91 +395,44 @@ make_multi_value_read_view(const IAttributeVector& attribute, vespalib::Stash& s return nullptr; } -// Precondition: attribute->isImported() == false -template <typename A> +template <typename T> FeatureExecutor & createForDirectArrayImpl(const IAttributeVector * attribute, - const std::vector<typename A::BaseType> & values, + const std::vector<T> & values, const std::vector<uint32_t> & indexes, vespalib::Stash & stash) { if (values.empty()) { return stash.create<SingleZeroValueExecutor>(); } - const A * iattr = dynamic_cast<const A *>(attribute); - using T = typename A::BaseType; - using VT = T; - auto array_read_view = make_multi_value_read_view<VT>(*attribute, stash); - if (indexes.empty()) { - if (array_read_view != nullptr) { + auto array_read_view = make_multi_value_read_view<T>(*attribute, stash); + if (array_read_view != nullptr) { + if (indexes.empty()) { return stash.create<dotproduct::array::DotProductByArrayReadViewExecutor<T>>(array_read_view, values); } else { - return stash.create<dotproduct::array::DotProductByCopyExecutor<A>>(iattr, values); - } - } else { - if (array_read_view != nullptr) { return stash.create<dotproduct::array::SparseDotProductByArrayReadViewExecutor<T>>(array_read_view, values, indexes); - } else { - return stash.create<dotproduct::array::SparseDotProductByCopyExecutor<A>>(iattr, values, indexes); } } return stash.create<SingleZeroValueExecutor>(); } -template <typename BaseType> -FeatureExecutor & -createForImportedArrayImpl(const IAttributeVector * attribute, - const std::vector<BaseType> & values, - const std::vector<uint32_t> & indexes, - vespalib::Stash & stash) { - if (values.empty()) { - return stash.create<SingleZeroValueExecutor>(); - } - if (indexes.empty()) { - using ExecutorType = dotproduct::array::DotProductByContentFillExecutor<BaseType>; - return stash.create<ExecutorType>(attribute, values); - } else { - using ExecutorType = dotproduct::array::SparseDotProductByContentFillExecutor<BaseType>; - return stash.create<ExecutorType>(attribute, values, indexes); - } -} - -template <typename BaseType> -FeatureExecutor& -createForImportedArray(const IAttributeVector * attribute, - const Property & prop, - vespalib::Stash & stash) { - std::vector<BaseType> values; - std::vector<uint32_t> indexes; - parseVectors(prop, values, indexes); - return createForImportedArrayImpl<BaseType>(attribute, values, indexes, stash); -} - -template <typename BaseType> -FeatureExecutor& -createForImportedArray(const IAttributeVector * attribute, - const ArrayParam<BaseType> & arguments, - vespalib::Stash & stash) { - return createForImportedArrayImpl<BaseType>(attribute, arguments.values, arguments.indexes, stash); -} - -template <typename A> +template <typename T> FeatureExecutor & createForDirectArray(const IAttributeVector * attribute, const Property & prop, vespalib::Stash & stash) { - std::vector<typename A::BaseType> values; + std::vector<T> values; std::vector<uint32_t> indexes; parseVectors(prop, values, indexes); - return createForDirectArrayImpl<A>(attribute, values, indexes, stash); + return createForDirectArrayImpl<T>(attribute, values, indexes, stash); } -template <typename A> +template <typename T> FeatureExecutor & createForDirectArray(const IAttributeVector * attribute, - const ArrayParam<typename A::BaseType> & arguments, + const ArrayParam<T> & arguments, vespalib::Stash & stash) { - return createForDirectArrayImpl<A>(attribute, arguments.values, arguments.indexes, stash); + return createForDirectArrayImpl<T>(attribute, arguments.values, arguments.indexes, stash); } template<typename T> @@ -656,33 +504,19 @@ FeatureExecutor & createFromObject(const IAttributeVector * attribute, const fef::Anything & object, vespalib::Stash &stash) { if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) { - if (!attribute->isImported()) { - switch (attribute->getBasicType()) { - case BasicType::INT8: - return createForDirectArray<IntegerAttributeTemplate<int8_t>>(attribute, dynamic_cast<const ArrayParam<int8_t> &>(object), stash); - case BasicType::INT32: - return createForDirectArray<IntegerAttributeTemplate<int32_t>>(attribute, dynamic_cast<const ArrayParam<int32_t> &>(object), stash); - case BasicType::INT64: - return createForDirectArray<IntegerAttributeTemplate<int64_t>>(attribute, dynamic_cast<const ArrayParam<int64_t> &>(object), stash); - case BasicType::FLOAT: - return createForDirectArray<FloatingPointAttributeTemplate<float>>(attribute, dynamic_cast<const ArrayParam<float> &>(object), stash); - case BasicType::DOUBLE: - return createForDirectArray<FloatingPointAttributeTemplate<double>>(attribute, dynamic_cast<const ArrayParam<double> &>(object), stash); - default: - break; - } - } else { - switch (attribute->getBasicType()) { - case BasicType::INT8: - case BasicType::INT32: - case BasicType::INT64: - return createForImportedArray<int64_t>(attribute, dynamic_cast<const ArrayParam<int64_t> &>(object), stash); - case BasicType::FLOAT: - case BasicType::DOUBLE: - return createForImportedArray<double>(attribute, dynamic_cast<const ArrayParam<double> &>(object), stash); - default: - break; - } + switch (attribute->getBasicType()) { + case BasicType::INT8: + return createForDirectArray<int8_t>(attribute, dynamic_cast<const ArrayParam<int8_t> &>(object), stash); + case BasicType::INT32: + return createForDirectArray<int32_t>(attribute, dynamic_cast<const ArrayParam<int32_t> &>(object), stash); + case BasicType::INT64: + return createForDirectArray<int64_t>(attribute, dynamic_cast<const ArrayParam<int64_t> &>(object), stash); + case BasicType::FLOAT: + return createForDirectArray<float>(attribute, dynamic_cast<const ArrayParam<float> &>(object), stash); + case BasicType::DOUBLE: + return createForDirectArray<double>(attribute, dynamic_cast<const ArrayParam<double> &>(object), stash); + default: + break; } } else if (attribute->getCollectionType() == attribute::CollectionType::WSET) { using namespace dotproduct::wset; @@ -727,37 +561,19 @@ createFromObject(const IAttributeVector * attribute, const fef::Anything & objec FeatureExecutor * createTypedArrayExecutor(const IAttributeVector * attribute, const Property & prop, vespalib::Stash & stash) { - if (!attribute->isImported()) { - switch (attribute->getBasicType()) { - case BasicType::INT8: - return &createForDirectArray<IntegerAttributeTemplate<int8_t>>(attribute, prop, stash); - case BasicType::INT32: - return &createForDirectArray<IntegerAttributeTemplate<int32_t>>(attribute, prop, stash); - case BasicType::INT64: - return &createForDirectArray<IntegerAttributeTemplate<int64_t>>(attribute, prop, stash); - case BasicType::FLOAT: - return &createForDirectArray<FloatingPointAttributeTemplate<float>>(attribute, prop, stash); - case BasicType::DOUBLE: - return &createForDirectArray<FloatingPointAttributeTemplate<double>>(attribute, prop, stash); - default: - break; - } - } else { - // When using AttributeContent, integers are always extracted as largeint_t and - // floats always as double. This means that we cannot allow type specializations - // on int32_t or float, or reinterpreting type casts will end up pointing at - // data that is not of the correct size. Which would be Bad(tm). - switch (attribute->getBasicType()) { - case BasicType::INT8: - case BasicType::INT32: - case BasicType::INT64: - return &createForImportedArray<IAttributeVector::largeint_t>(attribute, prop, stash); - case BasicType::FLOAT: - case BasicType::DOUBLE: - return &createForImportedArray<double>(attribute, prop, stash); - default: - break; - } + switch (attribute->getBasicType()) { + case BasicType::INT8: + return &createForDirectArray<int8_t>(attribute, prop, stash); + case BasicType::INT32: + return &createForDirectArray<int32_t>(attribute, prop, stash); + case BasicType::INT64: + return &createForDirectArray<int64_t>(attribute, prop, stash); + case BasicType::FLOAT: + return &createForDirectArray<float>(attribute, prop, stash); + case BasicType::DOUBLE: + return &createForDirectArray<double>(attribute, prop, stash); + default: + break; } return nullptr; } @@ -836,35 +652,19 @@ createFromString(const IAttributeVector * attribute, const Property & prop, vesp fef::Anything::UP attemptParseArrayQueryVector(const IAttributeVector & attribute, const Property & prop) { - if (!attribute.isImported()) { - switch (attribute.getBasicType()) { - case BasicType::INT8: - return std::make_unique<ArrayParam<int8_t>>(prop); - case BasicType::INT32: - return std::make_unique<ArrayParam<int32_t>>(prop); - case BasicType::INT64: - return std::make_unique<ArrayParam<int64_t>>(prop); - case BasicType::FLOAT: - return std::make_unique<ArrayParam<float>>(prop); - case BasicType::DOUBLE: - return std::make_unique<ArrayParam<double>>(prop); - default: - break; - } - } else { - // See rationale in createTypedArrayExecutor() as to why we promote < 64 bit types - // to their full-width equivalent when dealing with imported attributes. - switch (attribute.getBasicType()) { - case BasicType::INT8: - case BasicType::INT32: - case BasicType::INT64: - return std::make_unique<ArrayParam<int64_t>>(prop); - case BasicType::FLOAT: - case BasicType::DOUBLE: - return std::make_unique<ArrayParam<double>>(prop); - default: - break; - } + switch (attribute.getBasicType()) { + case BasicType::INT8: + return std::make_unique<ArrayParam<int8_t>>(prop); + case BasicType::INT32: + return std::make_unique<ArrayParam<int32_t>>(prop); + case BasicType::INT64: + return std::make_unique<ArrayParam<int64_t>>(prop); + case BasicType::FLOAT: + return std::make_unique<ArrayParam<float>>(prop); + case BasicType::DOUBLE: + return std::make_unique<ArrayParam<double>>(prop); + default: + break; } return std::unique_ptr<fef::Anything>(); } diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.h b/searchlib/src/vespa/searchlib/features/dotproductfeature.h index ee0a85e689b..1339dbf985e 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.h +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.h @@ -208,43 +208,6 @@ public: ~DotProductExecutor(); }; -template <typename A> -class DotProductByCopyExecutor : public DotProductExecutor<A> { -public: - typedef typename DotProductExecutor<A>::V V; - DotProductByCopyExecutor(const A * attribute, const V & queryVector); - ~DotProductByCopyExecutor(); -private: - vespalib::ConstArrayRef<typename A::BaseType> getAttributeValues(uint32_t docid) final override; - std::vector<typename A::BaseType> _copy; -}; - -/** - * Dot product executor which uses AttributeContent for the specified base value type - * to extract array elements from a given attribute vector. Used for "synthetic" - * attribute vectors such as imported attributes, where we cannot directly access - * the memory of the underlying attribute store. - * - * Some caveats: - * - 64 bit value type width is enforced, so 32-bit value types will not benefit - * from extra SIMD register capacity. - * - Additional overhead caused by call indirection and copy step. - */ -template <typename BaseType> -class DotProductByContentFillExecutor : public DotProductExecutorBase<BaseType> { -public: - using V = typename DotProductExecutorBase<BaseType>::V; - using ValueFiller = attribute::AttributeContent<BaseType>; - - DotProductByContentFillExecutor(const attribute::IAttributeVector * attribute, const V & queryVector); - ~DotProductByContentFillExecutor(); -private: - vespalib::ConstArrayRef<BaseType> getAttributeValues(uint32_t docid) final override; - - const attribute::IAttributeVector* _attribute; - ValueFiller _filler; -}; - template <typename BaseType> class SparseDotProductExecutorBase : public DotProductExecutorBase<BaseType> { public: @@ -272,42 +235,6 @@ private: const ArrayReadView* _array_read_view; }; -template <typename A> -class SparseDotProductByCopyExecutor : public SparseDotProductExecutorBase<typename A::BaseType> { -public: - typedef std::vector<uint32_t> IV; - typedef typename SparseDotProductExecutorBase<typename A::BaseType>::V V; - SparseDotProductByCopyExecutor(const A * attribute, const V & queryVector, const IV & queryIndexes); - ~SparseDotProductByCopyExecutor(); -private: - vespalib::ConstArrayRef<typename A::BaseType> getAttributeValues(uint32_t docid) final override; - const A* _attribute; - std::vector<typename A::BaseType> _copy; -}; - -/** - * Dot product executor which uses AttributeContent for fetching values. See - * DotProductByContentFillExecutor for a more in-depth description and caveats. - */ -template <typename BaseType> -class SparseDotProductByContentFillExecutor : public DotProductExecutorBase<BaseType> { -public: - using IV = std::vector<uint32_t>; - using V = typename DotProductExecutorBase<BaseType>::V; - using ValueFiller = attribute::AttributeContent<BaseType>; - - SparseDotProductByContentFillExecutor(const attribute::IAttributeVector * attribute, - const V & queryVector, - const IV & queryIndexes); - ~SparseDotProductByContentFillExecutor() override; -private: - vespalib::ConstArrayRef<BaseType> getAttributeValues(uint32_t docid) final override; - - const attribute::IAttributeVector* _attribute; - IV _queryIndexes; - ValueFiller _filler; -}; - } } |