From dfbaedb2ac336035ddc5dc259a6af5f2567f061f Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Tue, 7 May 2019 15:58:11 +0000 Subject: Also make a faster DotproductExecutor when using enumerated values. --- searchlib/src/tests/features/prod_features.cpp | 4 +- .../vespa/searchlib/attribute/attributevector.h | 1 + .../searchlib/attribute/multienumattribute.cpp | 6 +++ .../vespa/searchlib/attribute/multienumattribute.h | 22 +++++++- .../searchlib/attribute/multienumattribute.hpp | 24 +++++++++ .../src/vespa/searchlib/attribute/multivalue.h | 12 ++--- .../vespa/searchlib/features/dotproductfeature.cpp | 61 +++++++++++++++++++++- .../src/vespa/searchlib/fef/featureexecutor.cpp | 16 +++--- .../src/vespa/searchlib/fef/featureexecutor.h | 7 +++ 9 files changed, 133 insertions(+), 20 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 1d9433c739f..ce2260e1681 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -1217,9 +1217,7 @@ Test::testDotProduct() vespalib::Stash stash; FeatureExecutor &exc = bp.createExecutor(ft.getQueryEnv(), stash); // check that we have the optimized enum version - dotproduct::wset::DotProductExecutorByCopy * myExc = - dynamic_cast *>(&exc); - EXPECT_TRUE(myExc != nullptr); + EXPECT_EQUAL("search::features::dotproduct::wset::(anonymous namespace)::DotProductExecutorByEnum", exc.getClassName()); EXPECT_EQUAL(1u, deps.output.size()); } } diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index af308044292..58779a17e00 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -552,6 +552,7 @@ public: virtual SearchContext::UP getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams ¶ms) const = 0; virtual const EnumStoreBase *getEnumStoreBase() const; virtual const attribute::MultiValueMappingBase *getMultiValueBase() const; + private: /** * This is called before adding docs will commence. diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp index e7704cf19c1..05e83012421 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp @@ -2,8 +2,14 @@ #include "multienumattribute.h" #include "multienumattribute.hpp" +#include namespace search { +uint32_t +IWeightedIndexVector::getEnumHandles(uint32_t, const WeightedIndex * &) const { + throw std::runtime_error("IWeightedIndexVector::getEnumHandles() not implmented"); +} + } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h index 9300d93168b..ee77baf778f 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h @@ -5,9 +5,25 @@ #include "multivalueattribute.h" #include "enumstorebase.h" #include "loadedenumvalue.h" +#include "multivalue.h" namespace search { +class IWeightedIndexVector { +public: + virtual ~IWeightedIndexVector() = default; + using WeightedIndex = multivalue::WeightedValue; + /** + * Provides a reference to the underlying enum/weight pairs. + * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET. + * + * @param doc document identifier + * @param values Reference to values and weights + * @return the number of values for this document + **/ + virtual uint32_t getEnumHandles(uint32_t doc, const WeightedIndex * & values) const; +}; + class ReaderBase; /* @@ -18,7 +34,8 @@ class ReaderBase; * M: MultiValueType */ template -class MultiValueEnumAttribute : public MultiValueAttribute +class MultiValueEnumAttribute : public MultiValueAttribute, + public IWeightedIndexVector { protected: typedef typename B::UniqueSet UniqueSet; @@ -67,6 +84,8 @@ protected: public: MultiValueEnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg); + uint32_t getEnumHandles(DocId doc, const IWeightedIndexVector::WeightedIndex * & values) const override final; + void onCommit() override; void onUpdateStat() override; @@ -84,6 +103,7 @@ public: return indices[0].value().ref(); } } + uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const override { WeightedIndexArrayRef indices(this->_mvMapping.get(doc)); uint32_t valueCount = indices.size(); diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index f8f8e84b41e..cb31dbb4b14 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -132,6 +132,30 @@ MultiValueEnumAttribute(const vespalib::string &baseFileName, { } +namespace { + +template +const IWeightedIndexVector::WeightedIndex * +extract(const T *) { + throw std::runtime_error("IWeightedIndexVector::getEnumHandles not implemented"); +} + +template <> +inline const IWeightedIndexVector::WeightedIndex * +extract(const IWeightedIndexVector::WeightedIndex * values) { + return values; +} + +} + +template +uint32_t +MultiValueEnumAttribute::getEnumHandles(DocId doc, const IWeightedIndexVector::WeightedIndex * & values) const { + WeightedIndexArrayRef indices(this->_mvMapping.get(doc)); + values = extract(&indices[0]); + return indices.size(); +} + template void MultiValueEnumAttribute::onCommit() diff --git a/searchlib/src/vespa/searchlib/attribute/multivalue.h b/searchlib/src/vespa/searchlib/attribute/multivalue.h index 330e69a534e..c59f975e00a 100644 --- a/searchlib/src/vespa/searchlib/attribute/multivalue.h +++ b/searchlib/src/vespa/searchlib/attribute/multivalue.h @@ -2,11 +2,9 @@ #pragma once -#include +#include -namespace search { - -namespace multivalue { +namespace search::multivalue { template class Value { @@ -29,7 +27,7 @@ public: bool operator >(const Value & rhs) const { return _v > rhs._v; } static bool hasWeight() { return false; } - static const bool _hasWeight = false; + static constexpr bool _hasWeight = false; private: T _v; }; @@ -52,12 +50,10 @@ public: bool operator >(const WeightedValue & rhs) const { return _v > rhs._v; } static bool hasWeight() { return true; } - static const bool _hasWeight = true; + static constexpr bool _hasWeight = true; private: T _v; int32_t _w; }; } -} - diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp index 55a550837e1..60a8fb372d7 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -77,7 +78,6 @@ DotProductExecutorByCopy::execute(uint32_t docId) } StringVector::StringVector() = default; - StringVector::~StringVector() = default; template @@ -124,6 +124,48 @@ DotProductExecutor::getAttributeValues(uint32_t docId, const AT * & values) return _attribute->getRawValues(docId, values); } +namespace { + +class DotProductExecutorByEnum : public fef::FeatureExecutor { +public: + using V = VectorBase; +private: + const IWeightedIndexVector * _attribute; + V _queryVector; + const typename V::HashMap::const_iterator _end; +public: + DotProductExecutorByEnum(const IWeightedIndexVector * attribute, V queryVector); + ~DotProductExecutorByEnum() override; + void execute(uint32_t docId) override; +}; + +DotProductExecutorByEnum::DotProductExecutorByEnum(const IWeightedIndexVector * attribute, V queryVector) + : FeatureExecutor(), + _attribute(attribute), + _queryVector(std::move(queryVector)), + _end(_queryVector.syncMap().getDimMap().end()) +{ +} + +DotProductExecutorByEnum::~DotProductExecutorByEnum() = default; + +void DotProductExecutorByEnum::execute(uint32_t docId) { + feature_t val = 0; + if (!_queryVector.getDimMap().empty()) { + const IWeightedIndexVector::WeightedIndex *values(nullptr); + uint32_t sz = _attribute->getEnumHandles(docId, values); + for (size_t i = 0; i < sz; ++i) { + typename V::HashMap::const_iterator itr = _queryVector.getDimMap().find(values[i].value().ref()); + if (itr != _end) { + val += values[i].weight() * itr->second; + } + } + } + outputs().set_number(0, val); +} + +} + } namespace dotproduct::array { @@ -431,6 +473,19 @@ bool supportsGetRawValues(const A & attr) noexcept { } } +bool supportsGetEnumHandles(const IWeightedIndexVector * attr) noexcept { + if (attr == nullptr) return false; + try { + const IWeightedIndexVector::WeightedIndex * tmp = nullptr; + attr->getEnumHandles(0, tmp); // Throws if unsupported + return true; + } catch (const std::runtime_error & e) { + (void) e; + return false; + } +} + + // Precondition: attribute->isImported() == false template FeatureExecutor & @@ -633,6 +688,10 @@ createTypedWsetExecutor(const IAttributeVector * attribute, const Property & pro if (attribute->hasEnum()) { EnumVector vector(attribute); WeightedSetParser::parse(prop.get(), vector); + const IWeightedIndexVector * getEnumHandles = dynamic_cast(attribute); + if (supportsGetEnumHandles(getEnumHandles)) { + return &stash.create(getEnumHandles, std::move(vector)); + } return &stash.create>(attribute, std::move(vector)); } else { if (attribute->isStringType()) { diff --git a/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp b/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp index 02d69fbc5ca..1ea3da3939c 100644 --- a/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp +++ b/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp @@ -1,14 +1,17 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "featureexecutor.h" +#include -namespace search { -namespace fef { +namespace search::fef { -FeatureExecutor::FeatureExecutor() - : _inputs(), - _outputs() +FeatureExecutor::FeatureExecutor() = default; + + +vespalib::string +FeatureExecutor::getClassName() const { + return vespalib::getClassName(*this); } bool @@ -52,5 +55,4 @@ FeatureExecutor::bind_match_data(const MatchData &md) handle_bind_match_data(md); } -} // namespace fef -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/fef/featureexecutor.h b/searchlib/src/vespa/searchlib/fef/featureexecutor.h index dfc46230e18..dc8a4ba6075 100644 --- a/searchlib/src/vespa/searchlib/fef/featureexecutor.h +++ b/searchlib/src/vespa/searchlib/fef/featureexecutor.h @@ -112,6 +112,13 @@ public: **/ FeatureExecutor(); + /** + * Obtain the fully qualified name of the concrete class for this object. + * + * @return fully qualified class name + **/ + vespalib::string getClassName() const; + // bind order per executor: inputs, outputs, match_data void bind_inputs(vespalib::ConstArrayRef inputs); void bind_outputs(vespalib::ArrayRef outputs); -- cgit v1.2.3