diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2019-05-07 18:11:57 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-07 18:11:57 +0200 |
commit | 6903dc2689554a60702d6fb02f9558c265fe76a2 (patch) | |
tree | a41ec4b21ab2250e93ed4021599b4ab7f337a681 /searchlib | |
parent | 1177ebc58e982149f4513a3ed007aab39538ad85 (diff) | |
parent | c4488c26da55c9db6198bae10b1bad90821f6a1b (diff) |
Merge pull request #9297 from vespa-engine/balder/use-raw-values
Also use a specialized implementation for wset. For now only non enum…
Diffstat (limited to 'searchlib')
4 files changed, 183 insertions, 71 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index f08cb0855af..1d9433c739f 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -1217,8 +1217,8 @@ Test::testDotProduct() vespalib::Stash stash; FeatureExecutor &exc = bp.createExecutor(ft.getQueryEnv(), stash); // check that we have the optimized enum version - dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent> * myExc = - dynamic_cast<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent> *>(&exc); + dotproduct::wset::DotProductExecutorByCopy<dotproduct::wset::EnumVector, WeightedEnumContent> * myExc = + dynamic_cast<dotproduct::wset::DotProductExecutorByCopy<dotproduct::wset::EnumVector, WeightedEnumContent> *>(&exc); EXPECT_TRUE(myExc != nullptr); EXPECT_EQUAL(1u, deps.output.size()); } diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp index 1dcd3e35580..55a550837e1 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp @@ -30,27 +30,38 @@ VectorBase<DimensionVType, DimensionHType, ComponentType, HashMapComparator>::Ve template <typename DimensionVType, typename DimensionHType, typename ComponentType, typename HashMapComparator> VectorBase<DimensionVType, DimensionHType, ComponentType, HashMapComparator>::~VectorBase() = default; -template <typename V> -V copyAndSync(const V & v) { - V tmp(v); - tmp.syncMap(); - return tmp; +template <typename DimensionVType, typename DimensionHType, typename ComponentType, typename HashMapComparator> +VectorBase<DimensionVType, DimensionHType, ComponentType, HashMapComparator> & +VectorBase<DimensionVType, DimensionHType, ComponentType, HashMapComparator>::syncMap() { + Converter<DimensionVType, DimensionHType> conv; + _dimMap.clear(); + _dimMap.resize(_vector.size()*2); + for (size_t i = 0; i < _vector.size(); ++i) { + _dimMap.insert(std::make_pair(conv.convert(_vector[i].first), _vector[i].second)); + } + return *this; } +template VectorBase<int64_t, int64_t, double> & VectorBase<int64_t, int64_t, double>::syncMap(); + + template <typename Vector, typename Buffer> -DotProductExecutor<Vector, Buffer>::DotProductExecutor(const IAttributeVector * attribute, const Vector & queryVector) : +DotProductExecutorByCopy<Vector, Buffer>::DotProductExecutorByCopy(const IAttributeVector * attribute, Vector queryVector) : FeatureExecutor(), _attribute(attribute), - _queryVector(copyAndSync(queryVector)), - _end(_queryVector.getDimMap().end()), + _queryVector(std::move(queryVector)), + _end(_queryVector.syncMap().getDimMap().end()), _buffer() { _buffer.allocate(_attribute->getMaxValueCount()); } template <typename Vector, typename Buffer> +DotProductExecutorByCopy<Vector, Buffer>::~DotProductExecutorByCopy() = default; + +template <typename Vector, typename Buffer> void -DotProductExecutor<Vector, Buffer>::execute(uint32_t docId) +DotProductExecutorByCopy<Vector, Buffer>::execute(uint32_t docId) { feature_t val = 0; if (!_queryVector.getDimMap().empty()) { @@ -69,6 +80,50 @@ StringVector::StringVector() = default; StringVector::~StringVector() = default; +template <typename BaseType> +DotProductExecutorBase<BaseType>::DotProductExecutorBase(V queryVector) + : FeatureExecutor(), + _queryVector(std::move(queryVector)), + _end(_queryVector.syncMap().getDimMap().end()) +{ +} + +template <typename BaseType> +DotProductExecutorBase<BaseType>::~DotProductExecutorBase() = default; + +template <typename BaseType> +void DotProductExecutorBase<BaseType>::execute(uint32_t docId) { + feature_t val = 0; + if (!_queryVector.getDimMap().empty()) { + const AT * values(nullptr); + uint32_t sz = getAttributeValues(docId, values); + for (size_t i = 0; i < sz; ++i) { + typename V::HashMap::const_iterator itr = _queryVector.getDimMap().find(values[i].value()); + if (itr != _end) { + val += values[i].weight() * itr->second; + } + } + } + outputs().set_number(0, val); +} + +template <typename A> +DotProductExecutor<A>::DotProductExecutor(const A * attribute, V queryVector) : + DotProductExecutorBase<typename A::BaseType>(std::move(queryVector)), + _attribute(attribute) +{ +} + +template <typename A> +DotProductExecutor<A>::~DotProductExecutor() = default; + +template <typename A> +size_t +DotProductExecutor<A>::getAttributeValues(uint32_t docId, const AT * & values) +{ + return _attribute->getRawValues(docId, values); +} + } namespace dotproduct::array { @@ -507,9 +562,8 @@ createFromObject(const IAttributeVector * attribute, const fef::Anything & objec return stash.create<SingleZeroValueExecutor>(); } -FeatureExecutor * createTypedArrayExecutor(const IAttributeVector * attribute, - const Property & prop, - vespalib::Stash & stash) { +FeatureExecutor * +createTypedArrayExecutor(const IAttributeVector * attribute, const Property & prop, vespalib::Stash & stash) { if (!attribute->isImported()) { switch (attribute->getBasicType()) { case BasicType::INT32: @@ -542,29 +596,55 @@ FeatureExecutor * createTypedArrayExecutor(const IAttributeVector * attribute, return nullptr; } -FeatureExecutor * createTypedWsetExecutor(const IAttributeVector * attribute, - const Property & prop, - vespalib::Stash & stash) { - if (attribute->isStringType()) { - if (attribute->hasEnum()) { - dotproduct::wset::EnumVector vector(attribute); - WeightedSetParser::parse(prop.get(), vector); - return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent>>(attribute, vector); - } else { - dotproduct::wset::StringVector vector; - WeightedSetParser::parse(prop.get(), vector); - return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::StringVector, WeightedConstCharContent>>(attribute, vector); +template <typename A, typename V> +FeatureExecutor * +createForDirectWSetImpl(const IAttributeVector * attribute, V vector, vespalib::Stash & stash) +{ + using namespace dotproduct::wset; + using T = typename A::BaseType; + const A * iattr = dynamic_cast<const A *>(attribute); + if (!attribute->isImported() && (iattr != nullptr) && supportsGetRawValues(*iattr)) { + using VT = multivalue::WeightedValue<T>; + using ExactA = MultiValueNumericAttribute<A, VT>; + + const ExactA * exactA = dynamic_cast<const ExactA *>(iattr); + if (exactA != nullptr) { + return &stash.create<DotProductExecutor<ExactA>>(exactA, std::move(vector)); } - } else if (attribute->isIntegerType()) { - if (attribute->hasEnum()) { - dotproduct::wset::EnumVector vector(attribute); - WeightedSetParser::parse(prop.get(), vector); - return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent>>(attribute, vector); + return &stash.create<DotProductExecutor<A>>(iattr, std::move(vector)); + } + return &stash.create<DotProductExecutorByCopy<IntegerVectorT<T>, WeightedIntegerContent>>(attribute, std::move(vector)); +} - } else { - dotproduct::wset::IntegerVector vector; +template <typename T> +FeatureExecutor * +createForDirectIntegerWSet(const IAttributeVector * attribute, const Property & prop, vespalib::Stash & stash) +{ + using namespace dotproduct::wset; + IntegerVectorT<T> vector; + WeightedSetParser::parse(prop.get(), vector); + return createForDirectWSetImpl<IntegerAttributeTemplate<T>>(attribute, std::move(vector), stash); +} + + +FeatureExecutor * +createTypedWsetExecutor(const IAttributeVector * attribute, const Property & prop, vespalib::Stash & stash) { + using namespace dotproduct::wset; + if (attribute->hasEnum()) { + EnumVector vector(attribute); + WeightedSetParser::parse(prop.get(), vector); + return &stash.create<DotProductExecutorByCopy<EnumVector, WeightedEnumContent>>(attribute, std::move(vector)); + } else { + if (attribute->isStringType()) { + StringVector vector; WeightedSetParser::parse(prop.get(), vector); - return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::IntegerVector, WeightedIntegerContent>>(attribute, vector); + return &stash.create<DotProductExecutorByCopy<StringVector, WeightedConstCharContent>>(attribute, std::move(vector)); + } else if (attribute->isIntegerType()) { + if (attribute->getBasicType() == BasicType::INT32) { + return createForDirectIntegerWSet<int32_t>(attribute, prop, stash); + } else if (attribute->getBasicType() == BasicType::INT64) { + return createForDirectIntegerWSet<int64_t>(attribute, prop, stash); + } } } return nullptr; diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.h b/searchlib/src/vespa/searchlib/features/dotproductfeature.h index 089066cb5f6..38dcdd54929 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.h +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.h @@ -55,71 +55,106 @@ protected: Vector _vector; HashMap _dimMap; // dimension -> component public: + VectorBase(VectorBase && rhs) = default; + VectorBase & operator = (VectorBase && rhs) = default; ~VectorBase(); const Vector & getVector() const { return _vector; } - void syncMap() { - Converter<DimensionVType, DimensionHType> conv; - _dimMap.clear(); - _dimMap.resize(_vector.size()*2); - for (size_t i = 0; i < _vector.size(); ++i) { - _dimMap.insert(std::make_pair(conv.convert(_vector[i].first), _vector[i].second)); - } - } + VectorBase & syncMap(); const HashMap & getDimMap() const { return _dimMap; } }; /** * Represents a vector where the dimensions are integers. **/ -class IntegerVector : public VectorBase<int64_t, int64_t, feature_t> { +template<typename T> +class IntegerVectorT : public VectorBase<T, T, feature_t> { public: void insert(vespalib::stringref label, vespalib::stringref value) { - _vector.push_back(std::make_pair(util::strToNum<int64_t>(label), util::strToNum<feature_t>(value))); + this->_vector.emplace_back(util::strToNum<T>(label), util::strToNum<feature_t>(value)); } }; +using IntegerVector = IntegerVectorT<int64_t>; + /** * Represents a vector where the dimensions are string values. **/ class StringVector : public VectorBase<vespalib::string, const char *, feature_t, ConstCharComparator> { public: StringVector(); + StringVector(StringVector &&) = default; + StringVector & operator = (StringVector &&) = default; ~StringVector(); void insert(vespalib::stringref label, vespalib::stringref value) { - _vector.push_back(std::make_pair(label, util::strToNum<feature_t>(value))); + _vector.emplace_back(label, util::strToNum<feature_t>(value)); } }; /** * Represents a vector where the dimensions are enum values for strings. **/ -class EnumVector : public VectorBase<search::attribute::EnumHandle, search::attribute::EnumHandle, feature_t> { +class EnumVector : public VectorBase<attribute::EnumHandle, attribute::EnumHandle, feature_t> { private: const attribute::IAttributeVector * _attribute; public: EnumVector(const attribute::IAttributeVector * attribute) : _attribute(attribute) {} void insert(vespalib::stringref label, vespalib::stringref value) { - search::attribute::EnumHandle e; + attribute::EnumHandle e; if (_attribute->findEnum(label.data(), e)) { - _vector.push_back(std::make_pair(e, util::strToNum<feature_t>(value))); + _vector.emplace_back(e, util::strToNum<feature_t>(value)); } } }; +/** + * Common base for handling execution for all wset dot product executors. + * Only cares about the underlying value type, not the concrete type of the + * attribute vector itself. + */ +template <typename BaseType> +class DotProductExecutorBase : public fef::FeatureExecutor { +public: + using AT = multivalue::WeightedValue<BaseType>; + using V = VectorBase<BaseType, BaseType, feature_t>; +private: + V _queryVector; + const typename V::HashMap::const_iterator _end; + virtual size_t getAttributeValues(uint32_t docid, const AT * & count) = 0; +public: + DotProductExecutorBase(V queryVector); + ~DotProductExecutorBase() override; + void execute(uint32_t docId) override; +}; + +template <typename A> +class DotProductExecutor final : public DotProductExecutorBase<typename A::BaseType> { +public: + using AT = typename DotProductExecutorBase<typename A::BaseType>::AT; + using V = typename DotProductExecutorBase<typename A::BaseType>::V; +protected: + const A * _attribute; +private: + size_t getAttributeValues(uint32_t docid, const AT * & count) override; +public: + DotProductExecutor(const A * attribute, V queryVector); + ~DotProductExecutor(); +}; + /** * Implements the executor for the dotproduct feature. */ template <typename Vector, typename Buffer> -class DotProductExecutor : public fef::FeatureExecutor { +class DotProductExecutorByCopy final : public fef::FeatureExecutor { private: const attribute::IAttributeVector * _attribute; - const Vector _queryVector; + Vector _queryVector; const typename Vector::HashMap::const_iterator _end; Buffer _buffer; public: - DotProductExecutor(const attribute::IAttributeVector * attribute, const Vector & queryVector); + DotProductExecutorByCopy(const attribute::IAttributeVector * attribute, Vector queryVector); + ~DotProductExecutorByCopy() override; void execute(uint32_t docId) override; }; @@ -143,7 +178,7 @@ private: virtual size_t getAttributeValues(uint32_t docid, const AT * & count) = 0; public: DotProductExecutorBase(const V & queryVector); - ~DotProductExecutorBase(); + ~DotProductExecutorBase() override; void execute(uint32_t docId) final override; }; diff --git a/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp b/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp index 9d383e5a03a..eb73cef1f4c 100644 --- a/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp @@ -32,15 +32,15 @@ protected: IntegerVector _queryVector; public: - RawExecutor(const IAttributeVector *attribute, const IntegerVector &queryVector); + RawExecutor(const IAttributeVector *attribute, IntegerVector queryVector); void execute(uint32_t docId) override; }; template <typename BaseType> -RawExecutor<BaseType>::RawExecutor(const IAttributeVector *attribute, const IntegerVector &queryVector) : +RawExecutor<BaseType>::RawExecutor(const IAttributeVector *attribute, IntegerVector queryVector) : FeatureExecutor(), _attribute(attribute), - _queryVector(queryVector) + _queryVector(std::move(queryVector)) { _queryVector.syncMap(); } @@ -81,13 +81,13 @@ private: WeightedIntegerContent _buffer; public: - BufferedExecutor(const IAttributeVector *attribute, const IntegerVector &queryVector); + BufferedExecutor(const IAttributeVector *attribute, IntegerVector queryVector); void execute(uint32_t docId) override; }; template <typename BaseType> -BufferedExecutor<BaseType>::BufferedExecutor(const IAttributeVector *attribute, const IntegerVector &queryVector) : - RawExecutor<BaseType>(attribute, queryVector), +BufferedExecutor<BaseType>::BufferedExecutor(const IAttributeVector *attribute, IntegerVector queryVector) : + RawExecutor<BaseType>(attribute, std::move(queryVector)), _buffer() { } @@ -109,20 +109,17 @@ InternalMaxReduceProdJoinBlueprint::InternalMaxReduceProdJoinBlueprint() : { } -InternalMaxReduceProdJoinBlueprint::~InternalMaxReduceProdJoinBlueprint() -{ -} +InternalMaxReduceProdJoinBlueprint::~InternalMaxReduceProdJoinBlueprint() = default; void -InternalMaxReduceProdJoinBlueprint::visitDumpFeatures(const IIndexEnvironment &, - IDumpFeatureVisitor &) const +InternalMaxReduceProdJoinBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const { } Blueprint::UP InternalMaxReduceProdJoinBlueprint::createInstance() const { - return Blueprint::UP(new InternalMaxReduceProdJoinBlueprint()); + return std::make_unique<InternalMaxReduceProdJoinBlueprint>(); } ParameterDescriptions @@ -155,7 +152,7 @@ bool supportsGetRawValues(const A &attr) noexcept { template <typename BaseType> FeatureExecutor & -selectTypedExecutor(const IAttributeVector *attribute, const IntegerVector &vector, vespalib::Stash &stash) +selectTypedExecutor(const IAttributeVector *attribute, IntegerVector vector, vespalib::Stash &stash) { if (!attribute->isImported()) { using A = IntegerAttributeTemplate<BaseType>; @@ -166,22 +163,22 @@ selectTypedExecutor(const IAttributeVector *attribute, const IntegerVector &vect if (supportsGetRawValues(*iattr)) { const ExactA *exactA = dynamic_cast<const ExactA *>(iattr); if (exactA != nullptr) { - return stash.create<RawExecutor<BaseType>>(attribute, vector); + return stash.create<RawExecutor<BaseType>>(attribute, std::move(vector)); } } } - return stash.create<BufferedExecutor<BaseType>>(attribute, vector); + return stash.create<BufferedExecutor<BaseType>>(attribute, std::move(vector)); } FeatureExecutor & -selectExecutor(const IAttributeVector *attribute, const IntegerVector &vector, vespalib::Stash &stash) +selectExecutor(const IAttributeVector *attribute, IntegerVector vector, vespalib::Stash &stash) { if (attribute->getCollectionType() == CollectionType::ARRAY) { switch (attribute->getBasicType()) { case BasicType::INT32: - return selectTypedExecutor<int32_t>(attribute, vector, stash); + return selectTypedExecutor<int32_t>(attribute, std::move(vector), stash); case BasicType::INT64: - return selectTypedExecutor<int64_t>(attribute, vector, stash); + return selectTypedExecutor<int64_t>(attribute, std::move(vector), stash); default: break; } @@ -207,7 +204,7 @@ InternalMaxReduceProdJoinBlueprint::createExecutor(const IQueryEnvironment &env, IntegerVector vector; WeightedSetParser::parse(prop.get(), vector); if (!vector.getVector().empty()) { - return selectExecutor(attribute, vector, stash); + return selectExecutor(attribute, std::move(vector), stash); } } return stash.create<SingleZeroValueExecutor>(); |