summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2019-05-07 18:11:57 +0200
committerGitHub <noreply@github.com>2019-05-07 18:11:57 +0200
commit6903dc2689554a60702d6fb02f9558c265fe76a2 (patch)
treea41ec4b21ab2250e93ed4021599b4ab7f337a681 /searchlib
parent1177ebc58e982149f4513a3ed007aab39538ad85 (diff)
parentc4488c26da55c9db6198bae10b1bad90821f6a1b (diff)
Merge pull request #9297 from vespa-engine/balder/use-raw-values
Also use a specialized implementation for wset. For now only non enum…
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/features/prod_features.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/features/dotproductfeature.cpp144
-rw-r--r--searchlib/src/vespa/searchlib/features/dotproductfeature.h71
-rw-r--r--searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp35
4 files changed, 183 insertions, 71 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index f08cb0855af..1d9433c739f 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -1217,8 +1217,8 @@ Test::testDotProduct()
vespalib::Stash stash;
FeatureExecutor &exc = bp.createExecutor(ft.getQueryEnv(), stash);
// check that we have the optimized enum version
- dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent> * myExc =
- dynamic_cast<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent> *>(&exc);
+ dotproduct::wset::DotProductExecutorByCopy<dotproduct::wset::EnumVector, WeightedEnumContent> * myExc =
+ dynamic_cast<dotproduct::wset::DotProductExecutorByCopy<dotproduct::wset::EnumVector, WeightedEnumContent> *>(&exc);
EXPECT_TRUE(myExc != nullptr);
EXPECT_EQUAL(1u, deps.output.size());
}
diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
index 1dcd3e35580..55a550837e1 100644
--- a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
@@ -30,27 +30,38 @@ VectorBase<DimensionVType, DimensionHType, ComponentType, HashMapComparator>::Ve
template <typename DimensionVType, typename DimensionHType, typename ComponentType, typename HashMapComparator>
VectorBase<DimensionVType, DimensionHType, ComponentType, HashMapComparator>::~VectorBase() = default;
-template <typename V>
-V copyAndSync(const V & v) {
- V tmp(v);
- tmp.syncMap();
- return tmp;
+template <typename DimensionVType, typename DimensionHType, typename ComponentType, typename HashMapComparator>
+VectorBase<DimensionVType, DimensionHType, ComponentType, HashMapComparator> &
+VectorBase<DimensionVType, DimensionHType, ComponentType, HashMapComparator>::syncMap() {
+ Converter<DimensionVType, DimensionHType> conv;
+ _dimMap.clear();
+ _dimMap.resize(_vector.size()*2);
+ for (size_t i = 0; i < _vector.size(); ++i) {
+ _dimMap.insert(std::make_pair(conv.convert(_vector[i].first), _vector[i].second));
+ }
+ return *this;
}
+template VectorBase<int64_t, int64_t, double> & VectorBase<int64_t, int64_t, double>::syncMap();
+
+
template <typename Vector, typename Buffer>
-DotProductExecutor<Vector, Buffer>::DotProductExecutor(const IAttributeVector * attribute, const Vector & queryVector) :
+DotProductExecutorByCopy<Vector, Buffer>::DotProductExecutorByCopy(const IAttributeVector * attribute, Vector queryVector) :
FeatureExecutor(),
_attribute(attribute),
- _queryVector(copyAndSync(queryVector)),
- _end(_queryVector.getDimMap().end()),
+ _queryVector(std::move(queryVector)),
+ _end(_queryVector.syncMap().getDimMap().end()),
_buffer()
{
_buffer.allocate(_attribute->getMaxValueCount());
}
template <typename Vector, typename Buffer>
+DotProductExecutorByCopy<Vector, Buffer>::~DotProductExecutorByCopy() = default;
+
+template <typename Vector, typename Buffer>
void
-DotProductExecutor<Vector, Buffer>::execute(uint32_t docId)
+DotProductExecutorByCopy<Vector, Buffer>::execute(uint32_t docId)
{
feature_t val = 0;
if (!_queryVector.getDimMap().empty()) {
@@ -69,6 +80,50 @@ StringVector::StringVector() = default;
StringVector::~StringVector() = default;
+template <typename BaseType>
+DotProductExecutorBase<BaseType>::DotProductExecutorBase(V queryVector)
+ : FeatureExecutor(),
+ _queryVector(std::move(queryVector)),
+ _end(_queryVector.syncMap().getDimMap().end())
+{
+}
+
+template <typename BaseType>
+DotProductExecutorBase<BaseType>::~DotProductExecutorBase() = default;
+
+template <typename BaseType>
+void DotProductExecutorBase<BaseType>::execute(uint32_t docId) {
+ feature_t val = 0;
+ if (!_queryVector.getDimMap().empty()) {
+ const AT * values(nullptr);
+ uint32_t sz = getAttributeValues(docId, values);
+ for (size_t i = 0; i < sz; ++i) {
+ typename V::HashMap::const_iterator itr = _queryVector.getDimMap().find(values[i].value());
+ if (itr != _end) {
+ val += values[i].weight() * itr->second;
+ }
+ }
+ }
+ outputs().set_number(0, val);
+}
+
+template <typename A>
+DotProductExecutor<A>::DotProductExecutor(const A * attribute, V queryVector) :
+ DotProductExecutorBase<typename A::BaseType>(std::move(queryVector)),
+ _attribute(attribute)
+{
+}
+
+template <typename A>
+DotProductExecutor<A>::~DotProductExecutor() = default;
+
+template <typename A>
+size_t
+DotProductExecutor<A>::getAttributeValues(uint32_t docId, const AT * & values)
+{
+ return _attribute->getRawValues(docId, values);
+}
+
}
namespace dotproduct::array {
@@ -507,9 +562,8 @@ createFromObject(const IAttributeVector * attribute, const fef::Anything & objec
return stash.create<SingleZeroValueExecutor>();
}
-FeatureExecutor * createTypedArrayExecutor(const IAttributeVector * attribute,
- const Property & prop,
- vespalib::Stash & stash) {
+FeatureExecutor *
+createTypedArrayExecutor(const IAttributeVector * attribute, const Property & prop, vespalib::Stash & stash) {
if (!attribute->isImported()) {
switch (attribute->getBasicType()) {
case BasicType::INT32:
@@ -542,29 +596,55 @@ FeatureExecutor * createTypedArrayExecutor(const IAttributeVector * attribute,
return nullptr;
}
-FeatureExecutor * createTypedWsetExecutor(const IAttributeVector * attribute,
- const Property & prop,
- vespalib::Stash & stash) {
- if (attribute->isStringType()) {
- if (attribute->hasEnum()) {
- dotproduct::wset::EnumVector vector(attribute);
- WeightedSetParser::parse(prop.get(), vector);
- return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent>>(attribute, vector);
- } else {
- dotproduct::wset::StringVector vector;
- WeightedSetParser::parse(prop.get(), vector);
- return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::StringVector, WeightedConstCharContent>>(attribute, vector);
+template <typename A, typename V>
+FeatureExecutor *
+createForDirectWSetImpl(const IAttributeVector * attribute, V vector, vespalib::Stash & stash)
+{
+ using namespace dotproduct::wset;
+ using T = typename A::BaseType;
+ const A * iattr = dynamic_cast<const A *>(attribute);
+ if (!attribute->isImported() && (iattr != nullptr) && supportsGetRawValues(*iattr)) {
+ using VT = multivalue::WeightedValue<T>;
+ using ExactA = MultiValueNumericAttribute<A, VT>;
+
+ const ExactA * exactA = dynamic_cast<const ExactA *>(iattr);
+ if (exactA != nullptr) {
+ return &stash.create<DotProductExecutor<ExactA>>(exactA, std::move(vector));
}
- } else if (attribute->isIntegerType()) {
- if (attribute->hasEnum()) {
- dotproduct::wset::EnumVector vector(attribute);
- WeightedSetParser::parse(prop.get(), vector);
- return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent>>(attribute, vector);
+ return &stash.create<DotProductExecutor<A>>(iattr, std::move(vector));
+ }
+ return &stash.create<DotProductExecutorByCopy<IntegerVectorT<T>, WeightedIntegerContent>>(attribute, std::move(vector));
+}
- } else {
- dotproduct::wset::IntegerVector vector;
+template <typename T>
+FeatureExecutor *
+createForDirectIntegerWSet(const IAttributeVector * attribute, const Property & prop, vespalib::Stash & stash)
+{
+ using namespace dotproduct::wset;
+ IntegerVectorT<T> vector;
+ WeightedSetParser::parse(prop.get(), vector);
+ return createForDirectWSetImpl<IntegerAttributeTemplate<T>>(attribute, std::move(vector), stash);
+}
+
+
+FeatureExecutor *
+createTypedWsetExecutor(const IAttributeVector * attribute, const Property & prop, vespalib::Stash & stash) {
+ using namespace dotproduct::wset;
+ if (attribute->hasEnum()) {
+ EnumVector vector(attribute);
+ WeightedSetParser::parse(prop.get(), vector);
+ return &stash.create<DotProductExecutorByCopy<EnumVector, WeightedEnumContent>>(attribute, std::move(vector));
+ } else {
+ if (attribute->isStringType()) {
+ StringVector vector;
WeightedSetParser::parse(prop.get(), vector);
- return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::IntegerVector, WeightedIntegerContent>>(attribute, vector);
+ return &stash.create<DotProductExecutorByCopy<StringVector, WeightedConstCharContent>>(attribute, std::move(vector));
+ } else if (attribute->isIntegerType()) {
+ if (attribute->getBasicType() == BasicType::INT32) {
+ return createForDirectIntegerWSet<int32_t>(attribute, prop, stash);
+ } else if (attribute->getBasicType() == BasicType::INT64) {
+ return createForDirectIntegerWSet<int64_t>(attribute, prop, stash);
+ }
}
}
return nullptr;
diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.h b/searchlib/src/vespa/searchlib/features/dotproductfeature.h
index 089066cb5f6..38dcdd54929 100644
--- a/searchlib/src/vespa/searchlib/features/dotproductfeature.h
+++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.h
@@ -55,71 +55,106 @@ protected:
Vector _vector;
HashMap _dimMap; // dimension -> component
public:
+ VectorBase(VectorBase && rhs) = default;
+ VectorBase & operator = (VectorBase && rhs) = default;
~VectorBase();
const Vector & getVector() const { return _vector; }
- void syncMap() {
- Converter<DimensionVType, DimensionHType> conv;
- _dimMap.clear();
- _dimMap.resize(_vector.size()*2);
- for (size_t i = 0; i < _vector.size(); ++i) {
- _dimMap.insert(std::make_pair(conv.convert(_vector[i].first), _vector[i].second));
- }
- }
+ VectorBase & syncMap();
const HashMap & getDimMap() const { return _dimMap; }
};
/**
* Represents a vector where the dimensions are integers.
**/
-class IntegerVector : public VectorBase<int64_t, int64_t, feature_t> {
+template<typename T>
+class IntegerVectorT : public VectorBase<T, T, feature_t> {
public:
void insert(vespalib::stringref label, vespalib::stringref value) {
- _vector.push_back(std::make_pair(util::strToNum<int64_t>(label), util::strToNum<feature_t>(value)));
+ this->_vector.emplace_back(util::strToNum<T>(label), util::strToNum<feature_t>(value));
}
};
+using IntegerVector = IntegerVectorT<int64_t>;
+
/**
* Represents a vector where the dimensions are string values.
**/
class StringVector : public VectorBase<vespalib::string, const char *, feature_t, ConstCharComparator> {
public:
StringVector();
+ StringVector(StringVector &&) = default;
+ StringVector & operator = (StringVector &&) = default;
~StringVector();
void insert(vespalib::stringref label, vespalib::stringref value) {
- _vector.push_back(std::make_pair(label, util::strToNum<feature_t>(value)));
+ _vector.emplace_back(label, util::strToNum<feature_t>(value));
}
};
/**
* Represents a vector where the dimensions are enum values for strings.
**/
-class EnumVector : public VectorBase<search::attribute::EnumHandle, search::attribute::EnumHandle, feature_t> {
+class EnumVector : public VectorBase<attribute::EnumHandle, attribute::EnumHandle, feature_t> {
private:
const attribute::IAttributeVector * _attribute;
public:
EnumVector(const attribute::IAttributeVector * attribute) : _attribute(attribute) {}
void insert(vespalib::stringref label, vespalib::stringref value) {
- search::attribute::EnumHandle e;
+ attribute::EnumHandle e;
if (_attribute->findEnum(label.data(), e)) {
- _vector.push_back(std::make_pair(e, util::strToNum<feature_t>(value)));
+ _vector.emplace_back(e, util::strToNum<feature_t>(value));
}
}
};
+/**
+ * Common base for handling execution for all wset dot product executors.
+ * Only cares about the underlying value type, not the concrete type of the
+ * attribute vector itself.
+ */
+template <typename BaseType>
+class DotProductExecutorBase : public fef::FeatureExecutor {
+public:
+ using AT = multivalue::WeightedValue<BaseType>;
+ using V = VectorBase<BaseType, BaseType, feature_t>;
+private:
+ V _queryVector;
+ const typename V::HashMap::const_iterator _end;
+ virtual size_t getAttributeValues(uint32_t docid, const AT * & count) = 0;
+public:
+ DotProductExecutorBase(V queryVector);
+ ~DotProductExecutorBase() override;
+ void execute(uint32_t docId) override;
+};
+
+template <typename A>
+class DotProductExecutor final : public DotProductExecutorBase<typename A::BaseType> {
+public:
+ using AT = typename DotProductExecutorBase<typename A::BaseType>::AT;
+ using V = typename DotProductExecutorBase<typename A::BaseType>::V;
+protected:
+ const A * _attribute;
+private:
+ size_t getAttributeValues(uint32_t docid, const AT * & count) override;
+public:
+ DotProductExecutor(const A * attribute, V queryVector);
+ ~DotProductExecutor();
+};
+
/**
* Implements the executor for the dotproduct feature.
*/
template <typename Vector, typename Buffer>
-class DotProductExecutor : public fef::FeatureExecutor {
+class DotProductExecutorByCopy final : public fef::FeatureExecutor {
private:
const attribute::IAttributeVector * _attribute;
- const Vector _queryVector;
+ Vector _queryVector;
const typename Vector::HashMap::const_iterator _end;
Buffer _buffer;
public:
- DotProductExecutor(const attribute::IAttributeVector * attribute, const Vector & queryVector);
+ DotProductExecutorByCopy(const attribute::IAttributeVector * attribute, Vector queryVector);
+ ~DotProductExecutorByCopy() override;
void execute(uint32_t docId) override;
};
@@ -143,7 +178,7 @@ private:
virtual size_t getAttributeValues(uint32_t docid, const AT * & count) = 0;
public:
DotProductExecutorBase(const V & queryVector);
- ~DotProductExecutorBase();
+ ~DotProductExecutorBase() override;
void execute(uint32_t docId) final override;
};
diff --git a/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp b/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp
index 9d383e5a03a..eb73cef1f4c 100644
--- a/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/internal_max_reduce_prod_join_feature.cpp
@@ -32,15 +32,15 @@ protected:
IntegerVector _queryVector;
public:
- RawExecutor(const IAttributeVector *attribute, const IntegerVector &queryVector);
+ RawExecutor(const IAttributeVector *attribute, IntegerVector queryVector);
void execute(uint32_t docId) override;
};
template <typename BaseType>
-RawExecutor<BaseType>::RawExecutor(const IAttributeVector *attribute, const IntegerVector &queryVector) :
+RawExecutor<BaseType>::RawExecutor(const IAttributeVector *attribute, IntegerVector queryVector) :
FeatureExecutor(),
_attribute(attribute),
- _queryVector(queryVector)
+ _queryVector(std::move(queryVector))
{
_queryVector.syncMap();
}
@@ -81,13 +81,13 @@ private:
WeightedIntegerContent _buffer;
public:
- BufferedExecutor(const IAttributeVector *attribute, const IntegerVector &queryVector);
+ BufferedExecutor(const IAttributeVector *attribute, IntegerVector queryVector);
void execute(uint32_t docId) override;
};
template <typename BaseType>
-BufferedExecutor<BaseType>::BufferedExecutor(const IAttributeVector *attribute, const IntegerVector &queryVector) :
- RawExecutor<BaseType>(attribute, queryVector),
+BufferedExecutor<BaseType>::BufferedExecutor(const IAttributeVector *attribute, IntegerVector queryVector) :
+ RawExecutor<BaseType>(attribute, std::move(queryVector)),
_buffer()
{
}
@@ -109,20 +109,17 @@ InternalMaxReduceProdJoinBlueprint::InternalMaxReduceProdJoinBlueprint() :
{
}
-InternalMaxReduceProdJoinBlueprint::~InternalMaxReduceProdJoinBlueprint()
-{
-}
+InternalMaxReduceProdJoinBlueprint::~InternalMaxReduceProdJoinBlueprint() = default;
void
-InternalMaxReduceProdJoinBlueprint::visitDumpFeatures(const IIndexEnvironment &,
- IDumpFeatureVisitor &) const
+InternalMaxReduceProdJoinBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const
{
}
Blueprint::UP
InternalMaxReduceProdJoinBlueprint::createInstance() const
{
- return Blueprint::UP(new InternalMaxReduceProdJoinBlueprint());
+ return std::make_unique<InternalMaxReduceProdJoinBlueprint>();
}
ParameterDescriptions
@@ -155,7 +152,7 @@ bool supportsGetRawValues(const A &attr) noexcept {
template <typename BaseType>
FeatureExecutor &
-selectTypedExecutor(const IAttributeVector *attribute, const IntegerVector &vector, vespalib::Stash &stash)
+selectTypedExecutor(const IAttributeVector *attribute, IntegerVector vector, vespalib::Stash &stash)
{
if (!attribute->isImported()) {
using A = IntegerAttributeTemplate<BaseType>;
@@ -166,22 +163,22 @@ selectTypedExecutor(const IAttributeVector *attribute, const IntegerVector &vect
if (supportsGetRawValues(*iattr)) {
const ExactA *exactA = dynamic_cast<const ExactA *>(iattr);
if (exactA != nullptr) {
- return stash.create<RawExecutor<BaseType>>(attribute, vector);
+ return stash.create<RawExecutor<BaseType>>(attribute, std::move(vector));
}
}
}
- return stash.create<BufferedExecutor<BaseType>>(attribute, vector);
+ return stash.create<BufferedExecutor<BaseType>>(attribute, std::move(vector));
}
FeatureExecutor &
-selectExecutor(const IAttributeVector *attribute, const IntegerVector &vector, vespalib::Stash &stash)
+selectExecutor(const IAttributeVector *attribute, IntegerVector vector, vespalib::Stash &stash)
{
if (attribute->getCollectionType() == CollectionType::ARRAY) {
switch (attribute->getBasicType()) {
case BasicType::INT32:
- return selectTypedExecutor<int32_t>(attribute, vector, stash);
+ return selectTypedExecutor<int32_t>(attribute, std::move(vector), stash);
case BasicType::INT64:
- return selectTypedExecutor<int64_t>(attribute, vector, stash);
+ return selectTypedExecutor<int64_t>(attribute, std::move(vector), stash);
default:
break;
}
@@ -207,7 +204,7 @@ InternalMaxReduceProdJoinBlueprint::createExecutor(const IQueryEnvironment &env,
IntegerVector vector;
WeightedSetParser::parse(prop.get(), vector);
if (!vector.getVector().empty()) {
- return selectExecutor(attribute, vector, stash);
+ return selectExecutor(attribute, std::move(vector), stash);
}
}
return stash.create<SingleZeroValueExecutor>();