// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once #include "utils.h" #include #include #include #include #include #include namespace search::fef { class Property; } namespace vespalib { class nbostream; } namespace search::features { namespace dotproduct { struct ConstCharComparator { bool operator()(const char * lhs, const char * rhs) const { return strcmp(lhs, rhs) == 0; } }; template struct Converter { Dst convert(const Src & value) const { return value; } }; template <> struct Converter { const char * convert(const vespalib::string & value) const { return value.c_str(); } }; template struct ArrayParam : public fef::Anything { ArrayParam(const fef::Property & prop); ArrayParam(vespalib::nbostream & stream); ArrayParam(std::vector v) : values(std::move(v)) {} ~ArrayParam() override; std::vector values; std::vector indexes; }; namespace wset { template > class VectorBase : public fef::Anything { public: using Element = std::pair; // using Vector = std::vector; using HashMap = vespalib::hash_map, HashMapComparator, vespalib::hashtable_base::and_modulator>; protected: VectorBase(); Vector _vector; HashMap _dimMap; // dimension -> component public: VectorBase(VectorBase && rhs) = default; VectorBase & operator = (VectorBase && rhs) = default; ~VectorBase(); const Vector & getVector() const { return _vector; } VectorBase & syncMap(); const HashMap & getDimMap() const { return _dimMap; } bool empty() const { return _vector.empty(); } }; template using NumericVectorBaseT = VectorBase; /** * Represents a vector where the dimensions are integers. **/ template class IntegerVectorT : public NumericVectorBaseT { public: void insert(vespalib::stringref label, vespalib::stringref value) { this->_vector.emplace_back(util::strToNum(label), util::strToNum(value)); } }; extern template class VectorBase; extern template class VectorBase; extern template class IntegerVectorT; using IntegerVector = IntegerVectorT; using StringVectorBase = VectorBase; /** * Represents a vector where the dimensions are string values. **/ class StringVector : public StringVectorBase { public: StringVector(); StringVector(StringVector &&) = default; StringVector & operator = (StringVector &&) = default; ~StringVector(); void insert(vespalib::stringref label, vespalib::stringref value) { _vector.emplace_back(label, util::strToNum(value)); } }; /** * Represents a vector where the dimensions are enum values for strings. **/ class EnumVector : public VectorBase { private: const attribute::IAttributeVector * _attribute; public: EnumVector(const attribute::IAttributeVector * attribute) : _attribute(attribute) {} void insert(vespalib::stringref label, vespalib::stringref value) { attribute::EnumHandle e; if (_attribute->findEnum(label.data(), e)) { _vector.emplace_back(e, util::strToNum(value)); } } }; /** * Common base for handling execution for all wset dot product executors. * Only cares about the underlying value type, not the concrete type of the * attribute vector itself. */ template class DotProductExecutorBase : public fef::FeatureExecutor { public: using AT = multivalue::WeightedValue; using V = std::conditional_t,StringVectorBase,NumericVectorBaseT>; private: const V & _queryVector; const typename V::HashMap::const_iterator _end; virtual vespalib::ConstArrayRef getAttributeValues(uint32_t docid) = 0; public: DotProductExecutorBase(const V & queryVector); ~DotProductExecutorBase() override; void execute(uint32_t docId) override; }; template class DotProductByWeightedSetReadViewExecutor final : public DotProductExecutorBase { public: using WeightedSetReadView = attribute::IWeightedSetReadView; using AT = typename DotProductExecutorBase::AT; using V = typename DotProductExecutorBase::V; protected: const WeightedSetReadView * _weighted_set_read_view; private: std::unique_ptr _backing; vespalib::ConstArrayRef getAttributeValues(uint32_t docid) override; public: DotProductByWeightedSetReadViewExecutor(const WeightedSetReadView* weighted_set_read_view, const V & queryVector); DotProductByWeightedSetReadViewExecutor(const WeightedSetReadView * weighted_set_read_view, std::unique_ptr queryVector); ~DotProductByWeightedSetReadViewExecutor(); }; } namespace array { /** * Common base for handling execution for all array dot product executors. * Only cares about the underlying value type, not the concrete type of the * attribute vector itself. */ template class DotProductExecutorBase : public fef::FeatureExecutor { public: using V = std::vector; private: const vespalib::hwaccelrated::IAccelrated & _multiplier; V _queryVector; virtual vespalib::ConstArrayRef getAttributeValues(uint32_t docid) = 0; public: DotProductExecutorBase(const V & queryVector); ~DotProductExecutorBase() override; void execute(uint32_t docId) final override; }; /** * Implements the executor for the dotproduct feature using array read view. */ template class DotProductByArrayReadViewExecutor : public DotProductExecutorBase { public: using V = typename DotProductExecutorBase::V; using ArrayReadView = attribute::IArrayReadView; protected: const ArrayReadView* _array_read_view; private: vespalib::ConstArrayRef getAttributeValues(uint32_t docid) override; public: DotProductByArrayReadViewExecutor(const ArrayReadView* array_read_view, const V & queryVector); ~DotProductByArrayReadViewExecutor(); }; /** * Implements the executor for the dotproduct feature. */ template class DotProductExecutor : public DotProductExecutorBase { public: using V = typename DotProductExecutorBase::V; protected: const A * _attribute; public: DotProductExecutor(const A * attribute, const V & queryVector); ~DotProductExecutor(); }; template class SparseDotProductExecutorBase : public DotProductExecutorBase { public: using IV = std::vector; using V = typename DotProductExecutorBase::V; SparseDotProductExecutorBase(const V & queryVector, const IV & queryIndexes); ~SparseDotProductExecutorBase(); protected: IV _queryIndexes; std::vector _scratch; }; template class SparseDotProductByArrayReadViewExecutor : public SparseDotProductExecutorBase { public: using SparseDotProductExecutorBase::_queryIndexes; using SparseDotProductExecutorBase::_scratch; using IV = std::vector; using V = typename SparseDotProductExecutorBase::V; using ArrayReadView = attribute::IArrayReadView; SparseDotProductByArrayReadViewExecutor(const ArrayReadView* array_read_view, const V & queryVector, const IV & queryIndexes); ~SparseDotProductByArrayReadViewExecutor(); private: vespalib::ConstArrayRef getAttributeValues(uint32_t docid) override; const ArrayReadView* _array_read_view; }; } } /** * Implements the blueprint for the foreach executor. */ class DotProductBlueprint : public fef::Blueprint { private: using IAttributeVector = attribute::IAttributeVector; vespalib::string _defaultAttribute; vespalib::string _attributeOverride; vespalib::string _queryVector; vespalib::string _attrKey; vespalib::string _queryVectorKey; const vespalib::string & getAttribute(const fef::IQueryEnvironment & env) const; const IAttributeVector * upgradeIfNecessary(const IAttributeVector * attribute, const fef::IQueryEnvironment & env) const; public: DotProductBlueprint(); ~DotProductBlueprint() override; void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; fef::Blueprint::UP createInstance() const override; fef::ParameterDescriptions getDescriptions() const override; bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; void prepareSharedState(const fef::IQueryEnvironment & queryEnv, fef::IObjectStore & objectStore) const override; fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; }; }