diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2019-06-13 20:09:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-06-13 20:09:45 +0200 |
commit | f8a628da56286741785ba6f450148bd4da990974 (patch) | |
tree | f03e4ec3c0f2888422d6b13d981052233133eb03 | |
parent | 98187c9181c2acc140e8556a5a9cfd8b8bda1764 (diff) | |
parent | 59043d984c5a0928df7780762301693c90a32bac (diff) |
Merge pull request #9773 from vespa-engine/balder/lookup-attribute-once
Implement prepareSharedState and lookup attribute once.
8 files changed, 396 insertions, 225 deletions
diff --git a/searchlib/src/vespa/searchlib/features/attributefeature.cpp b/searchlib/src/vespa/searchlib/features/attributefeature.cpp index 1e18a2d3af8..4fff5ae5b3f 100644 --- a/searchlib/src/vespa/searchlib/features/attributefeature.cpp +++ b/searchlib/src/vespa/searchlib/features/attributefeature.cpp @@ -324,7 +324,7 @@ AttributeBlueprint::createInstance() const } #define CREATE_AND_RETURN_IF_SINGLE_NUMERIC(a, T) \ - if (dynamic_cast<const SingleValueNumericAttribute<T> *>(a) != NULL) { \ + if (dynamic_cast<const SingleValueNumericAttribute<T> *>(a) != nullptr) { \ return stash.create<SingleAttributeExecutor<SingleValueNumericAttribute<T>>>(*static_cast<const SingleValueNumericAttribute<T> *>(a)); \ } @@ -333,7 +333,7 @@ namespace { fef::FeatureExecutor & createAttributeExecutor(const IAttributeVector *attribute, const vespalib::string &attrName, const vespalib::string &extraParam, vespalib::Stash &stash) { - if (attribute == NULL) { + if (attribute == nullptr) { LOG(warning, "The attribute vector '%s' was not found in the attribute manager, returning default values.", attrName.c_str()); std::vector<feature_t> values(4, 0.0f); @@ -382,7 +382,7 @@ createTensorAttributeExecutor(const IAttributeVector *attribute, const vespalib: const ValueType &tensorType, vespalib::Stash &stash) { - if (attribute == NULL) { + if (attribute == nullptr) { LOG(warning, "The attribute vector '%s' was not found in the attribute manager." " Returning empty tensor.", attrName.c_str()); return ConstantTensorExecutor::createEmpty(tensorType, stash); diff --git a/searchlib/src/vespa/searchlib/features/attributefeature.h b/searchlib/src/vespa/searchlib/features/attributefeature.h index 47597823f08..e1e3ddf7300 100644 --- a/searchlib/src/vespa/searchlib/features/attributefeature.h +++ b/searchlib/src/vespa/searchlib/features/attributefeature.h @@ -22,7 +22,7 @@ private: public: AttributeBlueprint(); - ~AttributeBlueprint(); + ~AttributeBlueprint() override; void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; fef::Blueprint::UP createInstance() const override; diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp index 1f51ee5cef6..1560d043be2 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp @@ -43,16 +43,32 @@ VectorBase<DimensionVType, DimensionHType, ComponentType, HashMapComparator>::sy return *this; } -template VectorBase<int64_t, int64_t, double> & VectorBase<int64_t, int64_t, double>::syncMap(); +template class VectorBase<int64_t, int64_t, double>; +template class VectorBase<uint32_t, uint32_t, double>; + +template class IntegerVectorT<int64_t>; template <typename Vector, typename Buffer> -DotProductExecutorByCopy<Vector, Buffer>::DotProductExecutorByCopy(const IAttributeVector * attribute, Vector queryVector) : +DotProductExecutorByCopy<Vector, Buffer>::DotProductExecutorByCopy(const IAttributeVector * attribute, const Vector & queryVector) : FeatureExecutor(), _attribute(attribute), - _queryVector(std::move(queryVector)), - _end(_queryVector.syncMap().getDimMap().end()), - _buffer() + _queryVector(queryVector), + _end(_queryVector.getDimMap().end()), + _buffer(), + _backing() +{ + _buffer.allocate(_attribute->getMaxValueCount()); +} + +template <typename Vector, typename Buffer> +DotProductExecutorByCopy<Vector, Buffer>::DotProductExecutorByCopy(const IAttributeVector * attribute, std::unique_ptr<Vector> queryVector) : + FeatureExecutor(), + _attribute(attribute), + _queryVector(*queryVector), + _end(_queryVector.getDimMap().end()), + _buffer(), + _backing(std::move(queryVector)) { _buffer.allocate(_attribute->getMaxValueCount()); } @@ -79,10 +95,10 @@ StringVector::StringVector() = default; StringVector::~StringVector() = default; template <typename BaseType> -DotProductExecutorBase<BaseType>::DotProductExecutorBase(V queryVector) +DotProductExecutorBase<BaseType>::DotProductExecutorBase(const V & queryVector) : FeatureExecutor(), - _queryVector(std::move(queryVector)), - _end(_queryVector.syncMap().getDimMap().end()) + _queryVector(queryVector), + _end(_queryVector.getDimMap().end()) { } @@ -104,9 +120,18 @@ void DotProductExecutorBase<BaseType>::execute(uint32_t docId) { } template <typename A> -DotProductExecutor<A>::DotProductExecutor(const A * attribute, V queryVector) : - DotProductExecutorBase<typename A::BaseType>(std::move(queryVector)), - _attribute(attribute) +DotProductExecutor<A>::DotProductExecutor(const A * attribute, const V & queryVector) : + DotProductExecutorBase<typename A::BaseType>(queryVector), + _attribute(attribute), + _backing() +{ +} + +template <typename A> +DotProductExecutor<A>::DotProductExecutor(const A * attribute, std::unique_ptr<V> queryVector) : + DotProductExecutorBase<typename A::BaseType>(*queryVector), + _attribute(attribute), + _backing(std::move(queryVector)) { } @@ -127,19 +152,32 @@ public: using V = VectorBase<EnumHandle, EnumHandle, feature_t>; private: const IWeightedIndexVector * _attribute; - V _queryVector; + const V & _queryVector; const typename V::HashMap::const_iterator _end; + std::unique_ptr<V> _backing; public: - DotProductExecutorByEnum(const IWeightedIndexVector * attribute, V queryVector); + DotProductExecutorByEnum(const IWeightedIndexVector * attribute, const V & queryVector); + DotProductExecutorByEnum(const IWeightedIndexVector * attribute, std::unique_ptr<V> queryVector); ~DotProductExecutorByEnum() override; void execute(uint32_t docId) override; }; -DotProductExecutorByEnum::DotProductExecutorByEnum(const IWeightedIndexVector * attribute, V queryVector) +DotProductExecutorByEnum::DotProductExecutorByEnum(const IWeightedIndexVector * attribute, const V & queryVector) + : FeatureExecutor(), + _attribute(attribute), + _queryVector(queryVector), + _end(_queryVector.getDimMap().end()), + _backing() +{ +} + + +DotProductExecutorByEnum::DotProductExecutorByEnum(const IWeightedIndexVector * attribute, std::unique_ptr<V> queryVector) : FeatureExecutor(), _attribute(attribute), - _queryVector(std::move(queryVector)), - _end(_queryVector.syncMap().getDimMap().end()) + _queryVector(*queryVector), + _end(_queryVector.getDimMap().end()), + _backing(std::move(queryVector)) { } @@ -351,51 +389,6 @@ size_t SparseDotProductByContentFillExecutor<BaseType>::getAttributeValues(uint3 } -DotProductBlueprint::DotProductBlueprint() : - Blueprint("dotProduct"), - _defaultAttribute(), - _queryVector() -{ } - -DotProductBlueprint::~DotProductBlueprint() = default; - -vespalib::string -DotProductBlueprint::getAttribute(const IQueryEnvironment & env) const -{ - Property prop = env.getProperties().lookup(getBaseName(), _defaultAttribute + ".override.name"); - if (prop.found() && !prop.get().empty()) { - return prop.get(); - } - return _defaultAttribute; -} - -void -DotProductBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const -{ -} - -bool -DotProductBlueprint::setup(const IIndexEnvironment & env, const ParameterList & params) -{ - _defaultAttribute = params[0].getValue(); - _queryVector = params[1].getValue(); - describeOutput("scalar", "The result after calculating the dot product of the vector represented by the weighted set " - "and the vector sent down with the query"); - env.hintAttributeAccess(_defaultAttribute); - return true; -} - -ParameterDescriptions -DotProductBlueprint::getDescriptions() const { - return ParameterDescriptions().desc().attribute(ParameterDataTypeSet::normalTypeSet(), ParameterCollection::ANY).string(); -} - -Blueprint::UP -DotProductBlueprint::createInstance() const -{ - return std::make_unique<DotProductBlueprint>(); -} - namespace { template <typename T, typename AsT = T> @@ -503,7 +496,7 @@ createForDirectArrayImpl(const IAttributeVector * attribute, if (supportsGetRawValues<A,VT>(*iattr)) { using ExactA = MultiValueNumericAttribute<A, VT>; - const ExactA * exactA = dynamic_cast<const ExactA *>(iattr); + auto * exactA = dynamic_cast<const ExactA *>(iattr); if (exactA != nullptr) { return stash.create<dotproduct::array::DotProductExecutor<ExactA>>(exactA, values); } @@ -577,6 +570,35 @@ createForDirectArray(const IAttributeVector * attribute, return createForDirectArrayImpl<A>(attribute, arguments.values, arguments.indexes, stash); } +template <typename A, typename V> +FeatureExecutor & +createForDirectWSetImpl(const IAttributeVector * attribute, V && vector, vespalib::Stash & stash) +{ + using namespace dotproduct::wset; + using T = typename A::BaseType; + const A * iattr = dynamic_cast<const A *>(attribute); + using VT = multivalue::WeightedValue<T>; + using ExactA = MultiValueNumericAttribute<A, VT>; + if (!attribute->isImported() && (iattr != nullptr) && supportsGetRawValues<A, VT>(*iattr)) { + auto * exactA = dynamic_cast<const ExactA *>(iattr); + if (exactA != nullptr) { + return stash.create<DotProductExecutor<ExactA>>(exactA, std::forward<V>(vector)); + } + return stash.create<DotProductExecutor<A>>(iattr, std::forward<V>(vector)); + } + return stash.create<DotProductExecutorByCopy<IntegerVectorT<T>, WeightedIntegerContent>>(attribute, std::forward<V>(vector)); +} + +template <typename T> +FeatureExecutor & +createForDirectIntegerWSet(const IAttributeVector * attribute, const dotproduct::wset::IntegerVectorT<T> & vector, vespalib::Stash & stash) +{ + using namespace dotproduct::wset; + return vector.empty() + ? stash.create<SingleZeroValueExecutor>() + : createForDirectWSetImpl<IntegerAttributeTemplate<T>>(attribute, vector, stash); +} + FeatureExecutor & createFromObject(const IAttributeVector * attribute, const fef::Anything & object, vespalib::Stash &stash) { @@ -609,6 +631,35 @@ createFromObject(const IAttributeVector * attribute, const fef::Anything & objec break; } } + } else if (attribute->getCollectionType() == attribute::CollectionType::WSET) { + using namespace dotproduct::wset; + if (attribute->hasEnum()) { + const auto & vector = dynamic_cast<const EnumVector &>(object); + if (vector.empty()) { + return stash.create<SingleZeroValueExecutor>(); + } + const auto * getEnumHandles = dynamic_cast<const IWeightedIndexVector *>(attribute); + if (supportsGetEnumHandles(getEnumHandles)) { + return stash.create<DotProductExecutorByEnum>(getEnumHandles, vector); + } + return stash.create<DotProductExecutorByCopy<EnumVector, WeightedEnumContent>>(attribute, vector); + } else { + if (attribute->isStringType()) { + const auto & vector = dynamic_cast<const StringVector &>(object); + if (vector.empty()) { + return stash.create<SingleZeroValueExecutor>(); + } + return stash.create<DotProductExecutorByCopy<StringVector, WeightedConstCharContent>>(attribute, vector); + } else if (attribute->isIntegerType()) { + if (attribute->getBasicType() == BasicType::INT32) { + return createForDirectIntegerWSet<int32_t>(attribute, dynamic_cast<const IntegerVectorT<int32_t> &>(object), stash); + } else if (attribute->getBasicType() == BasicType::INT64) { + return createForDirectIntegerWSet<int64_t>(attribute, dynamic_cast<const IntegerVectorT<int64_t> &>(object), stash); + } else if (attribute->getBasicType() == BasicType::INT8) { + return createForDirectIntegerWSet<int8_t>(attribute, dynamic_cast<const IntegerVectorT<int8_t> &>(object), stash); + } + } + } } // TODO: Add support for creating executor for weighted set string / integer attribute // where the query vector is represented as an object instead of a string. @@ -654,60 +705,43 @@ createTypedArrayExecutor(const IAttributeVector * attribute, const Property & pr return nullptr; } -template <typename A, typename V> -FeatureExecutor * -createForDirectWSetImpl(const IAttributeVector * attribute, V vector, vespalib::Stash & stash) -{ - using namespace dotproduct::wset; - using T = typename A::BaseType; - const A * iattr = dynamic_cast<const A *>(attribute); - using VT = multivalue::WeightedValue<T>; - using ExactA = MultiValueNumericAttribute<A, VT>; - if (!attribute->isImported() && (iattr != nullptr) && supportsGetRawValues<A, VT>(*iattr)) { - const ExactA * exactA = dynamic_cast<const ExactA *>(iattr); - if (exactA != nullptr) { - return &stash.create<DotProductExecutor<ExactA>>(exactA, std::move(vector)); - } - return &stash.create<DotProductExecutor<A>>(iattr, std::move(vector)); - } - return &stash.create<DotProductExecutorByCopy<IntegerVectorT<T>, WeightedIntegerContent>>(attribute, std::move(vector)); -} - template <typename T> -FeatureExecutor * +FeatureExecutor & createForDirectIntegerWSet(const IAttributeVector * attribute, const Property & prop, vespalib::Stash & stash) { using namespace dotproduct::wset; - IntegerVectorT<T> vector; - WeightedSetParser::parse(prop.get(), vector); - return vector.empty() - ? &stash.create<SingleZeroValueExecutor>() + auto vector = std::make_unique<IntegerVectorT<T>>(); + WeightedSetParser::parse(prop.get(), *vector); + vector->syncMap(); + return vector->empty() + ? stash.create<SingleZeroValueExecutor>() : createForDirectWSetImpl<IntegerAttributeTemplate<T>>(attribute, std::move(vector), stash); } - -FeatureExecutor * +FeatureExecutor & createTypedWsetExecutor(const IAttributeVector * attribute, const Property & prop, vespalib::Stash & stash) { using namespace dotproduct::wset; if (attribute->hasEnum()) { - EnumVector vector(attribute); - WeightedSetParser::parse(prop.get(), vector); - if (vector.empty()) { - return &stash.create<SingleZeroValueExecutor>(); + auto vector = std::make_unique<EnumVector>(attribute); + WeightedSetParser::parse(prop.get(), *vector); + if (vector->empty()) { + return stash.create<SingleZeroValueExecutor>(); } - const IWeightedIndexVector * getEnumHandles = dynamic_cast<const IWeightedIndexVector *>(attribute); + vector->syncMap(); + auto * getEnumHandles = dynamic_cast<const IWeightedIndexVector *>(attribute); if (supportsGetEnumHandles(getEnumHandles)) { - return &stash.create<DotProductExecutorByEnum>(getEnumHandles, std::move(vector)); + return stash.create<DotProductExecutorByEnum>(getEnumHandles, std::move(vector)); } - return &stash.create<DotProductExecutorByCopy<EnumVector, WeightedEnumContent>>(attribute, std::move(vector)); + return stash.create<DotProductExecutorByCopy<EnumVector, WeightedEnumContent>>(attribute, std::move(vector)); } else { if (attribute->isStringType()) { - StringVector vector; - WeightedSetParser::parse(prop.get(), vector); - if (vector.empty()) { - return &stash.create<SingleZeroValueExecutor>(); + auto vector = std::make_unique<StringVector>(); + WeightedSetParser::parse(prop.get(), *vector); + if (vector->empty()) { + return stash.create<SingleZeroValueExecutor>(); } - return &stash.create<DotProductExecutorByCopy<StringVector, WeightedConstCharContent>>(attribute, std::move(vector)); + vector->syncMap(); + return stash.create<DotProductExecutorByCopy<StringVector, WeightedConstCharContent>>(attribute, std::move(vector)); } else if (attribute->isIntegerType()) { if (attribute->getBasicType() == BasicType::INT32) { return createForDirectIntegerWSet<int32_t>(attribute, prop, stash); @@ -718,7 +752,7 @@ createTypedWsetExecutor(const IAttributeVector * attribute, const Property & pro } } } - return nullptr; + return stash.create<SingleZeroValueExecutor>(); } FeatureExecutor & @@ -726,7 +760,7 @@ createFromString(const IAttributeVector * attribute, const Property & prop, vesp { FeatureExecutor * executor = nullptr; if (attribute->getCollectionType() == attribute::CollectionType::WSET) { - executor = createTypedWsetExecutor(attribute, prop, stash); + executor = &createTypedWsetExecutor(attribute, prop, stash); } else if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) { executor = createTypedArrayExecutor(attribute, prop, stash); } @@ -830,18 +864,35 @@ createQueryVector(const IQueryEnvironment & env, const IAttributeVector * attrib Property prop = env.getProperties().lookup(baseName, queryVector); if (prop.found() && !prop.get().empty()) { if (attribute->isStringType() && attribute->hasEnum()) { - dotproduct::wset::EnumVector vector(attribute); - WeightedSetParser::parse(prop.get(), vector); + auto vector = std::make_unique<dotproduct::wset::EnumVector>(attribute); + WeightedSetParser::parse(prop.get(), *vector); + vector->syncMap(); + arguments = std::move(vector); } else if (attribute->isIntegerType()) { if (attribute->hasEnum()) { - dotproduct::wset::EnumVector vector(attribute); - WeightedSetParser::parse(prop.get(), vector); + auto vector = std::make_unique<dotproduct::wset::EnumVector>(attribute); + WeightedSetParser::parse(prop.get(), *vector); + vector->syncMap(); + arguments = std::move(vector); } else { - dotproduct::wset::IntegerVector vector; - WeightedSetParser::parse(prop.get(), vector); + if (attribute->getBasicType() == BasicType::INT32) { + auto vector = std::make_unique<dotproduct::wset::IntegerVectorT<int32_t>>(); + WeightedSetParser::parse(prop.get(), *vector); + vector->syncMap(); + arguments = std::move(vector); + } else if (attribute->getBasicType() == BasicType::INT64) { + auto vector = std::make_unique<dotproduct::wset::IntegerVectorT<int64_t>>(); + WeightedSetParser::parse(prop.get(), *vector); + vector->syncMap(); + arguments = std::move(vector); + } else if (attribute->getBasicType() == BasicType::INT8) { + auto vector = std::make_unique<dotproduct::wset::IntegerVectorT<int8_t>>(); + WeightedSetParser::parse(prop.get(), *vector); + vector->syncMap(); + arguments = std::move(vector); + } } } - // TODO actually use the parsed output for wset operations! } } return arguments; @@ -849,19 +900,66 @@ createQueryVector(const IQueryEnvironment & env, const IAttributeVector * attrib } +DotProductBlueprint::DotProductBlueprint() : + Blueprint("dotProduct"), + _defaultAttribute(), + _queryVector(), + _attrKey(), + _queryVectorKey() +{ } + +DotProductBlueprint::~DotProductBlueprint() = default; + +vespalib::string +DotProductBlueprint::getAttribute(const IQueryEnvironment & env) const +{ + Property prop = env.getProperties().lookup(getBaseName(), _defaultAttribute + ".override.name"); + if (prop.found() && !prop.get().empty()) { + return prop.get(); + } + return _defaultAttribute; +} + +void +DotProductBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const +{ +} + +bool +DotProductBlueprint::setup(const IIndexEnvironment & env, const ParameterList & params) +{ + _defaultAttribute = params[0].getValue(); + _queryVector = params[1].getValue(); + _attrKey = make_attribute_key(getBaseName(), _defaultAttribute); + _queryVectorKey = make_queryvector_key(getBaseName(), _queryVector); + describeOutput("scalar", "The result after calculating the dot product of the vector represented by the weighted set " + "and the vector sent down with the query"); + env.hintAttributeAccess(_defaultAttribute); + return true; +} + +ParameterDescriptions +DotProductBlueprint::getDescriptions() const { + return ParameterDescriptions().desc().attribute(ParameterDataTypeSet::normalTypeSet(), ParameterCollection::ANY).string(); +} + +Blueprint::UP +DotProductBlueprint::createInstance() const +{ + return std::make_unique<DotProductBlueprint>(); +} + void DotProductBlueprint::prepareSharedState(const IQueryEnvironment & env, IObjectStore & store) const { - vespalib::string attributeKey = make_attribute_key(getBaseName(), _defaultAttribute); - const IAttributeVector * attribute = lookupAndStoreAttribute(attributeKey, getAttribute(env), env, store); + const IAttributeVector * attribute = lookupAndStoreAttribute(_attrKey, getAttribute(env), env, store); if (attribute == nullptr) return; - vespalib::string queryVectorKey = make_queryvector_key(getBaseName(), _queryVector); - const fef::Anything * queryVector = env.getObjectStore().get(queryVectorKey); + const fef::Anything * queryVector = env.getObjectStore().get(_queryVectorKey); if (queryVector == nullptr) { fef::Anything::UP arguments = createQueryVector(env, attribute, getBaseName(), _queryVector); if (arguments) { - store.add(queryVectorKey, std::move(arguments)); + store.add(_queryVectorKey, std::move(arguments)); } } @@ -872,7 +970,7 @@ FeatureExecutor & DotProductBlueprint::createExecutor(const IQueryEnvironment & env, vespalib::Stash &stash) const { // Doing it "manually" here to avoid looking up attribute override unless needed. - const fef::Anything * attributeArg = env.getObjectStore().get(make_attribute_key(getBaseName(), _defaultAttribute)); + const fef::Anything * attributeArg = env.getObjectStore().get(_attrKey); const IAttributeVector * attribute = (attributeArg != nullptr) ? static_cast<const fef::AnyWrapper<const IAttributeVector *> *>(attributeArg)->getValue() : env.getAttributeContext().getAttribute(getAttribute(env)); @@ -882,7 +980,7 @@ DotProductBlueprint::createExecutor(const IQueryEnvironment & env, vespalib::Sta return stash.create<SingleZeroValueExecutor>(); } attribute = upgradeIfNecessary(attribute, env); - const fef::Anything * queryVectorArg = env.getObjectStore().get(make_queryvector_key(getBaseName(), _queryVector)); + const fef::Anything * queryVectorArg = env.getObjectStore().get(_queryVectorKey); if (queryVectorArg != nullptr) { return createFromObject(attribute, *queryVectorArg, stash); } else { diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.h b/searchlib/src/vespa/searchlib/features/dotproductfeature.h index 94c72233c4b..d315a24ecb3 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.h +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.h @@ -45,7 +45,7 @@ struct ArrayParam : public fef::Anything { namespace wset { template <typename DimensionVType, typename DimensionHType, typename ComponentType, typename HashMapComparator = std::equal_to<DimensionHType> > -class VectorBase { +class VectorBase : public fef::Anything { public: typedef std::pair<DimensionVType, ComponentType> Element; // <dimension, component> typedef std::vector<Element> Vector; @@ -75,6 +75,10 @@ public: } }; +extern template class VectorBase<int64_t, int64_t, double>; +extern template class VectorBase<uint32_t, uint32_t, double>; +extern template class IntegerVectorT<int64_t>; + using IntegerVector = IntegerVectorT<int64_t>; /** @@ -118,11 +122,11 @@ public: using AT = multivalue::WeightedValue<BaseType>; using V = VectorBase<BaseType, BaseType, feature_t>; private: - V _queryVector; - const typename V::HashMap::const_iterator _end; + const V & _queryVector; + const typename V::HashMap::const_iterator _end; virtual size_t getAttributeValues(uint32_t docid, const AT * & count) = 0; public: - DotProductExecutorBase(V queryVector); + DotProductExecutorBase(const V & queryVector); ~DotProductExecutorBase() override; void execute(uint32_t docId) override; }; @@ -135,9 +139,11 @@ public: protected: const A * _attribute; private: + std::unique_ptr<V> _backing; size_t getAttributeValues(uint32_t docid, const AT * & count) override; public: - DotProductExecutor(const A * attribute, V queryVector); + DotProductExecutor(const A * attribute, const V & queryVector); + DotProductExecutor(const A * attribute, std::unique_ptr<V> queryVector); ~DotProductExecutor(); }; @@ -149,12 +155,13 @@ template <typename Vector, typename Buffer> class DotProductExecutorByCopy final : public fef::FeatureExecutor { private: const attribute::IAttributeVector * _attribute; - Vector _queryVector; + const Vector & _queryVector; const typename Vector::HashMap::const_iterator _end; Buffer _buffer; - + std::unique_ptr<Vector> _backing; public: - DotProductExecutorByCopy(const attribute::IAttributeVector * attribute, Vector queryVector); + DotProductExecutorByCopy(const attribute::IAttributeVector * attribute, const Vector & queryVector); + DotProductExecutorByCopy(const attribute::IAttributeVector * attribute, std::unique_ptr<Vector> queryVector); ~DotProductExecutorByCopy() override; void execute(uint32_t docId) override; }; @@ -303,6 +310,8 @@ private: using IAttributeVector = attribute::IAttributeVector; vespalib::string _defaultAttribute; vespalib::string _queryVector; + vespalib::string _attrKey; + vespalib::string _queryVectorKey; vespalib::string getAttribute(const fef::IQueryEnvironment & env) const; const IAttributeVector * upgradeIfNecessary(const IAttributeVector * attribute, const fef::IQueryEnvironment & env) const; diff --git a/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp b/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp index eb73cef1f4c..fd1faeae5ea 100644 --- a/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp @@ -22,32 +22,46 @@ using search::features::dotproduct::wset::IntegerVector; namespace search::features { +namespace { + /** * Executor used when array can be accessed directly */ -template <typename BaseType> +template<typename BaseType> class RawExecutor : public FeatureExecutor { +private: + std::unique_ptr<IntegerVector> _backing; protected: const IAttributeVector *_attribute; - IntegerVector _queryVector; + const IntegerVector &_queryVector; public: - RawExecutor(const IAttributeVector *attribute, IntegerVector queryVector); + RawExecutor(const IAttributeVector *attribute, const IntegerVector & queryVector); + RawExecutor(const IAttributeVector *attribute, std::unique_ptr<IntegerVector> queryVector); + void execute(uint32_t docId) override; }; -template <typename BaseType> -RawExecutor<BaseType>::RawExecutor(const IAttributeVector *attribute, IntegerVector queryVector) : - FeatureExecutor(), - _attribute(attribute), - _queryVector(std::move(queryVector)) +template<typename BaseType> +RawExecutor<BaseType>::RawExecutor(const IAttributeVector *attribute, std::unique_ptr<IntegerVector> queryVector) + : FeatureExecutor(), + _backing(std::move(queryVector)), + _attribute(attribute), + _queryVector(*_backing) { - _queryVector.syncMap(); } -template <typename A, typename V> -feature_t maxProduct(const A &array, size_t count, const V &query) +template<typename BaseType> +RawExecutor<BaseType>::RawExecutor(const IAttributeVector *attribute, const IntegerVector & queryVector) + : FeatureExecutor(), + _backing(), + _attribute(attribute), + _queryVector(queryVector) { +} + +template<typename A, typename V> +feature_t maxProduct(const A &array, size_t count, const V &query) { feature_t val = -std::numeric_limits<double>::max(); for (size_t i = 0; i < count; ++i) { auto itr = query.getDimMap().find(array[i].value()); @@ -61,10 +75,9 @@ feature_t maxProduct(const A &array, size_t count, const V &query) return val == -std::numeric_limits<double>::max() ? 0.0 : val; } -template <typename BaseType> +template<typename BaseType> void -RawExecutor<BaseType>::execute(uint32_t docId) -{ +RawExecutor<BaseType>::execute(uint32_t docId) { using A = IntegerAttributeTemplate<BaseType>; const multivalue::Value<BaseType> *values(nullptr); const A *iattr = static_cast<const A *>(_attribute); @@ -75,68 +88,40 @@ RawExecutor<BaseType>::execute(uint32_t docId) /** * Executor when array can't be accessed directly */ -template <typename BaseType> +template<typename BaseType> class BufferedExecutor : public RawExecutor<BaseType> { private: WeightedIntegerContent _buffer; public: - BufferedExecutor(const IAttributeVector *attribute, IntegerVector queryVector); + BufferedExecutor(const IAttributeVector *attribute, const IntegerVector & queryVector); + BufferedExecutor(const IAttributeVector *attribute, std::unique_ptr<IntegerVector> queryVector); + void execute(uint32_t docId) override; }; -template <typename BaseType> -BufferedExecutor<BaseType>::BufferedExecutor(const IAttributeVector *attribute, IntegerVector queryVector) : - RawExecutor<BaseType>(attribute, std::move(queryVector)), - _buffer() -{ -} - - -template <typename BaseType> -void -BufferedExecutor<BaseType>::execute(uint32_t docId) +template<typename BaseType> +BufferedExecutor<BaseType>::BufferedExecutor(const IAttributeVector *attribute, const IntegerVector & queryVector) + : RawExecutor<BaseType>(attribute, queryVector), + _buffer() { - _buffer.fill(*(this->_attribute), docId); - this->outputs().set_number(0, maxProduct(_buffer, _buffer.size(), this->_queryVector)); } -/** - * Blueprint - */ -InternalMaxReduceProdJoinBlueprint::InternalMaxReduceProdJoinBlueprint() : - Blueprint("internalMaxReduceProdJoin") +template<typename BaseType> +BufferedExecutor<BaseType>::BufferedExecutor(const IAttributeVector *attribute, std::unique_ptr<IntegerVector> queryVector) + : RawExecutor<BaseType>(attribute, std::move(queryVector)), + _buffer() { } -InternalMaxReduceProdJoinBlueprint::~InternalMaxReduceProdJoinBlueprint() = default; +template<typename BaseType> void -InternalMaxReduceProdJoinBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const -{ -} - -Blueprint::UP -InternalMaxReduceProdJoinBlueprint::createInstance() const -{ - return std::make_unique<InternalMaxReduceProdJoinBlueprint>(); +BufferedExecutor<BaseType>::execute(uint32_t docId) { + _buffer.fill(*(this->_attribute), docId); + this->outputs().set_number(0, maxProduct(_buffer, _buffer.size(), this->_queryVector)); } -ParameterDescriptions -InternalMaxReduceProdJoinBlueprint::getDescriptions() const -{ - return ParameterDescriptions().desc().attribute(ParameterDataTypeSet::int32OrInt64TypeSet(), ParameterCollection::ARRAY).string(); -} - -bool -InternalMaxReduceProdJoinBlueprint::setup(const IIndexEnvironment &env, const ParameterList ¶ms) -{ - _attribute = params[0].getValue(); - _query = params[1].getValue(); - describeOutput("scalar", "Internal executor for optimized execution of reduce(join(A,Q,f(x,y)(x*y)),max)"); - env.hintAttributeAccess(_attribute); - return true; -} template<typename A> bool supportsGetRawValues(const A &attr) noexcept { @@ -150,10 +135,9 @@ bool supportsGetRawValues(const A &attr) noexcept { } } -template <typename BaseType> +template<typename BaseType, typename V> FeatureExecutor & -selectTypedExecutor(const IAttributeVector *attribute, IntegerVector vector, vespalib::Stash &stash) -{ +selectTypedExecutor(const IAttributeVector *attribute, V && vector, vespalib::Stash &stash) { if (!attribute->isImported()) { using A = IntegerAttributeTemplate<BaseType>; using VT = multivalue::Value<BaseType>; @@ -163,50 +147,127 @@ selectTypedExecutor(const IAttributeVector *attribute, IntegerVector vector, ves if (supportsGetRawValues(*iattr)) { const ExactA *exactA = dynamic_cast<const ExactA *>(iattr); if (exactA != nullptr) { - return stash.create<RawExecutor<BaseType>>(attribute, std::move(vector)); + return stash.create<RawExecutor<BaseType>>(attribute, std::forward<V>(vector)); } } } - return stash.create<BufferedExecutor<BaseType>>(attribute, std::move(vector)); + return stash.create<BufferedExecutor<BaseType>>(attribute, std::forward<V>(vector)); } +template<typename V> FeatureExecutor & -selectExecutor(const IAttributeVector *attribute, IntegerVector vector, vespalib::Stash &stash) -{ +selectExecutor(const IAttributeVector *attribute, V && vector, vespalib::Stash &stash) { if (attribute->getCollectionType() == CollectionType::ARRAY) { switch (attribute->getBasicType()) { case BasicType::INT32: - return selectTypedExecutor<int32_t>(attribute, std::move(vector), stash); + return selectTypedExecutor<int32_t, V>(attribute, std::forward<V>(vector), stash); case BasicType::INT64: - return selectTypedExecutor<int64_t>(attribute, std::move(vector), stash); + return selectTypedExecutor<int64_t, V>(attribute, std::forward<V>(vector), stash); default: break; } } LOG(warning, "The attribute vector '%s' is not of type " - "array<int/long>, returning executor with default value.", attribute->getName().c_str()); + "array<int/long>, returning executor with default value.", attribute->getName().c_str()); return stash.create<SingleZeroValueExecutor>(); } +vespalib::string +make_queryvector_key(const vespalib::string & base, const vespalib::string & subKey) { + vespalib::string key(base); + key.append(".vector."); + key.append(subKey); + return key; +} + +std::unique_ptr<IntegerVector> +createQueryVector(const Property & prop) { + if (prop.found() && !prop.get().empty()) { + auto vector = std::make_unique<IntegerVector>(); + WeightedSetParser::parse(prop.get(), *vector); + if (!vector->getVector().empty()) { + vector->syncMap(); + return vector; + } + } + return std::unique_ptr<IntegerVector>(); +} + +} + +InternalMaxReduceProdJoinBlueprint::InternalMaxReduceProdJoinBlueprint() + : Blueprint("internalMaxReduceProdJoin"), + _attribute(), + _queryVector(), + _attrKey(), + _queryVectorKey() +{ +} + +InternalMaxReduceProdJoinBlueprint::~InternalMaxReduceProdJoinBlueprint() = default; + +void +InternalMaxReduceProdJoinBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const { +} + +Blueprint::UP +InternalMaxReduceProdJoinBlueprint::createInstance() const { + return std::make_unique<InternalMaxReduceProdJoinBlueprint>(); +} + +ParameterDescriptions +InternalMaxReduceProdJoinBlueprint::getDescriptions() const { + return ParameterDescriptions().desc().attribute(ParameterDataTypeSet::int32OrInt64TypeSet(), + ParameterCollection::ARRAY).string(); +} + +bool +InternalMaxReduceProdJoinBlueprint::setup(const IIndexEnvironment &env, const ParameterList ¶ms) { + _attribute = params[0].getValue(); + _attrKey = createAttributeKey(_attribute); + _queryVector = params[1].getValue(); + _queryVectorKey = make_queryvector_key(getBaseName(), _queryVector); + describeOutput("scalar", "Internal executor for optimized execution of reduce(join(A,Q,f(x,y)(x*y)),max)"); + env.hintAttributeAccess(_attribute); + return true; +} + +void +InternalMaxReduceProdJoinBlueprint::prepareSharedState(const fef::IQueryEnvironment & env, fef::IObjectStore & store) const +{ + const IAttributeVector * attribute = lookupAndStoreAttribute(_attrKey, _attribute, env, store); + if (attribute == nullptr) return; + + const fef::Anything * queryVector = env.getObjectStore().get(_queryVectorKey); + if (queryVector == nullptr) { + std::unique_ptr<IntegerVector> vector = createQueryVector(env.getProperties().lookup(_queryVector)); + if (vector) { + store.add(_queryVectorKey, std::move(vector)); + } + } +} FeatureExecutor & InternalMaxReduceProdJoinBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const { - const IAttributeVector *attribute = env.getAttributeContext().getAttribute(_attribute); + const IAttributeVector * attribute = lookupAttribute(_attrKey, _attribute, env); if (attribute == nullptr) { LOG(warning, "The attribute vector '%s' was not found in the attribute manager, " - "returning executor with default value.", - _attribute.c_str()); + "returning executor with default value.", _attribute.c_str()); return stash.create<SingleZeroValueExecutor>(); } - Property prop = env.getProperties().lookup(_query); - if (prop.found() && !prop.get().empty()) { - IntegerVector vector; - WeightedSetParser::parse(prop.get(), vector); - if (!vector.getVector().empty()) { + const fef::Anything * queryVectorArg = env.getObjectStore().get(_queryVectorKey); + if (queryVectorArg != nullptr) { + // Vector is not copied as it is safe in ObjectStore + return selectExecutor<const IntegerVector &>(attribute, *dynamic_cast<const IntegerVector *>(queryVectorArg), stash); + } else { + std::unique_ptr<IntegerVector> vector = createQueryVector(env.getProperties().lookup(_queryVector)); + if (vector) { + // Vector is moved and handed over to the executor. return selectExecutor(attribute, std::move(vector), stash); } } + return stash.create<SingleZeroValueExecutor>(); } diff --git a/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.h b/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.h index 65dd0ac2082..5314687c98d 100644 --- a/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.h +++ b/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.h @@ -25,15 +25,18 @@ namespace search::features { class InternalMaxReduceProdJoinBlueprint : public fef::Blueprint { private: vespalib::string _attribute; - vespalib::string _query; + vespalib::string _queryVector; + vespalib::string _attrKey; + vespalib::string _queryVectorKey; public: InternalMaxReduceProdJoinBlueprint(); - ~InternalMaxReduceProdJoinBlueprint(); + ~InternalMaxReduceProdJoinBlueprint() override; fef::ParameterDescriptions getDescriptions() const override; fef::Blueprint::UP createInstance() const override; bool setup(const fef::IIndexEnvironment &env, const fef::ParameterList ¶ms) override; + void prepareSharedState(const fef::IQueryEnvironment & queryEnv, fef::IObjectStore & objectStore) const override; fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; void visitDumpFeatures(const fef::IIndexEnvironment &env, fef::IDumpFeatureVisitor &visitor) const override; diff --git a/searchlib/src/vespa/searchlib/fef/blueprint.cpp b/searchlib/src/vespa/searchlib/fef/blueprint.cpp index d7c5cb665ed..7073d0c0ccd 100644 --- a/searchlib/src/vespa/searchlib/fef/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/fef/blueprint.cpp @@ -10,8 +10,7 @@ LOG_SETUP(".fef.blueprint"); namespace search::fef { const FeatureType & -Blueprint::defineInput(vespalib::stringref inName, - AcceptInput accept) +Blueprint::defineInput(vespalib::stringref inName, AcceptInput accept) { assert(_dependency_handler != nullptr); return _dependency_handler->resolve_input(inName, accept); @@ -60,8 +59,7 @@ Blueprint::setup(const IIndexEnvironment &indexEnv, } bool -Blueprint::setup(const IIndexEnvironment &indexEnv, - const ParameterList ¶ms) +Blueprint::setup(const IIndexEnvironment &indexEnv, const ParameterList ¶ms) { (void) indexEnv; (void) params; LOG(error, "The setup function using a typed parameter list does not have a default implementation. " @@ -69,6 +67,11 @@ Blueprint::setup(const IIndexEnvironment &indexEnv, return false; } +void +Blueprint::prepareSharedState(const IQueryEnvironment & queryEnv, IObjectStore & objectStore) const { + (void) queryEnv; (void) objectStore; +} + const attribute::IAttributeVector * Blueprint::lookupAndStoreAttribute(const vespalib::string & key, vespalib::stringref attrName, const IQueryEnvironment & env, IObjectStore & store) diff --git a/searchlib/src/vespa/searchlib/fef/blueprint.h b/searchlib/src/vespa/searchlib/fef/blueprint.h index dd622ea36d9..5d7eb6eb2c0 100644 --- a/searchlib/src/vespa/searchlib/fef/blueprint.h +++ b/searchlib/src/vespa/searchlib/fef/blueprint.h @@ -45,7 +45,7 @@ public: struct DependencyHandler { virtual const FeatureType &resolve_input(const vespalib::string &feature_name, AcceptInput accept_type) = 0; virtual void define_output(const vespalib::string &output_name, const FeatureType &type) = 0; - virtual ~DependencyHandler() {} + virtual ~DependencyHandler() = default; }; /** @@ -62,14 +62,19 @@ public: typedef std::vector<string> StringVector; private: - Blueprint(const Blueprint &); - Blueprint &operator=(const Blueprint &); - string _baseName; string _name; DependencyHandler *_dependency_handler; protected: + /** + * Create an empty blueprint. Blueprints in their initial state + * are used as prototypes to create other instances of the same + * class. The @ref setup method is used to tailor a blueprint + * object for a specific set of parameters. + **/ + Blueprint(vespalib::stringref baseName); + using IAttributeVector = attribute::IAttributeVector; /** * Define an input feature for this blueprint. This method should @@ -115,13 +120,8 @@ protected: lookupAttribute(const vespalib::string & key, vespalib::stringref attrName, const IQueryEnvironment & env); static vespalib::string createAttributeKey(vespalib::stringref attrName); public: - /** - * Create an empty blueprint. Blueprints in their initial state - * are used as prototypes to create other instances of the same - * class. The @ref setup method is used to tailor a blueprint - * object for a specific set of parameters. - **/ - Blueprint(vespalib::stringref baseName); + Blueprint(const Blueprint &) = delete; + Blueprint &operator=(const Blueprint &) = delete; /** * Obtain the base name of this blueprint. This method will @@ -239,10 +239,7 @@ public: * This is called before creating multiple execution threads. * @param queryEnv The query environment. */ - virtual void prepareSharedState(const IQueryEnvironment & queryEnv, IObjectStore & objectStore) const { - (void) queryEnv; - (void) objectStore; - } + virtual void prepareSharedState(const IQueryEnvironment & queryEnv, IObjectStore & objectStore) const; /** * Create a feature executor based on this blueprint. Failure to |