summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2019-05-07 15:58:11 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2019-05-07 16:22:34 +0000
commitdfbaedb2ac336035ddc5dc259a6af5f2567f061f (patch)
treeb0dbc5eba871ff8df82a55d8a9b2b8e5fcc4ca5c /searchlib
parent6903dc2689554a60702d6fb02f9558c265fe76a2 (diff)
Also make a faster DotproductExecutor when using enumerated values.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/features/prod_features.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.h22
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp24
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivalue.h12
-rw-r--r--searchlib/src/vespa/searchlib/features/dotproductfeature.cpp61
-rw-r--r--searchlib/src/vespa/searchlib/fef/featureexecutor.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/fef/featureexecutor.h7
9 files changed, 133 insertions, 20 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index 1d9433c739f..ce2260e1681 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -1217,9 +1217,7 @@ Test::testDotProduct()
vespalib::Stash stash;
FeatureExecutor &exc = bp.createExecutor(ft.getQueryEnv(), stash);
// check that we have the optimized enum version
- dotproduct::wset::DotProductExecutorByCopy<dotproduct::wset::EnumVector, WeightedEnumContent> * myExc =
- dynamic_cast<dotproduct::wset::DotProductExecutorByCopy<dotproduct::wset::EnumVector, WeightedEnumContent> *>(&exc);
- EXPECT_TRUE(myExc != nullptr);
+ EXPECT_EQUAL("search::features::dotproduct::wset::(anonymous namespace)::DotProductExecutorByEnum", exc.getClassName());
EXPECT_EQUAL(1u, deps.output.size());
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h
index af308044292..58779a17e00 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h
@@ -552,6 +552,7 @@ public:
virtual SearchContext::UP getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams &params) const = 0;
virtual const EnumStoreBase *getEnumStoreBase() const;
virtual const attribute::MultiValueMappingBase *getMultiValueBase() const;
+
private:
/**
* This is called before adding docs will commence.
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
index e7704cf19c1..05e83012421 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
@@ -2,8 +2,14 @@
#include "multienumattribute.h"
#include "multienumattribute.hpp"
+#include <stdexcept>
namespace search {
+uint32_t
+IWeightedIndexVector::getEnumHandles(uint32_t, const WeightedIndex * &) const {
+ throw std::runtime_error("IWeightedIndexVector::getEnumHandles() not implmented");
+}
+
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
index 9300d93168b..ee77baf778f 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
@@ -5,9 +5,25 @@
#include "multivalueattribute.h"
#include "enumstorebase.h"
#include "loadedenumvalue.h"
+#include "multivalue.h"
namespace search {
+class IWeightedIndexVector {
+public:
+ virtual ~IWeightedIndexVector() = default;
+ using WeightedIndex = multivalue::WeightedValue<EnumStoreBase::Index>;
+ /**
+ * Provides a reference to the underlying enum/weight pairs.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param doc document identifier
+ * @param values Reference to values and weights
+ * @return the number of values for this document
+ **/
+ virtual uint32_t getEnumHandles(uint32_t doc, const WeightedIndex * & values) const;
+};
+
class ReaderBase;
/*
@@ -18,7 +34,8 @@ class ReaderBase;
* M: MultiValueType
*/
template <typename B, typename M>
-class MultiValueEnumAttribute : public MultiValueAttribute<B, M>
+class MultiValueEnumAttribute : public MultiValueAttribute<B, M>,
+ public IWeightedIndexVector
{
protected:
typedef typename B::UniqueSet UniqueSet;
@@ -67,6 +84,8 @@ protected:
public:
MultiValueEnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg);
+ uint32_t getEnumHandles(DocId doc, const IWeightedIndexVector::WeightedIndex * & values) const override final;
+
void onCommit() override;
void onUpdateStat() override;
@@ -84,6 +103,7 @@ public:
return indices[0].value().ref();
}
}
+
uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const override {
WeightedIndexArrayRef indices(this->_mvMapping.get(doc));
uint32_t valueCount = indices.size();
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
index f8f8e84b41e..cb31dbb4b14 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
@@ -132,6 +132,30 @@ MultiValueEnumAttribute(const vespalib::string &baseFileName,
{
}
+namespace {
+
+template<typename T>
+const IWeightedIndexVector::WeightedIndex *
+extract(const T *) {
+ throw std::runtime_error("IWeightedIndexVector::getEnumHandles not implemented");
+}
+
+template <>
+inline const IWeightedIndexVector::WeightedIndex *
+extract(const IWeightedIndexVector::WeightedIndex * values) {
+ return values;
+}
+
+}
+
+template <typename B, typename M>
+uint32_t
+MultiValueEnumAttribute<B, M>::getEnumHandles(DocId doc, const IWeightedIndexVector::WeightedIndex * & values) const {
+ WeightedIndexArrayRef indices(this->_mvMapping.get(doc));
+ values = extract(&indices[0]);
+ return indices.size();
+}
+
template <typename B, typename M>
void
MultiValueEnumAttribute<B, M>::onCommit()
diff --git a/searchlib/src/vespa/searchlib/attribute/multivalue.h b/searchlib/src/vespa/searchlib/attribute/multivalue.h
index 330e69a534e..c59f975e00a 100644
--- a/searchlib/src/vespa/searchlib/attribute/multivalue.h
+++ b/searchlib/src/vespa/searchlib/attribute/multivalue.h
@@ -2,11 +2,9 @@
#pragma once
-#include <stdint.h>
+#include <cstdint>
-namespace search {
-
-namespace multivalue {
+namespace search::multivalue {
template <typename T>
class Value {
@@ -29,7 +27,7 @@ public:
bool operator >(const Value<T> & rhs) const { return _v > rhs._v; }
static bool hasWeight() { return false; }
- static const bool _hasWeight = false;
+ static constexpr bool _hasWeight = false;
private:
T _v;
};
@@ -52,12 +50,10 @@ public:
bool operator >(const WeightedValue<T> & rhs) const { return _v > rhs._v; }
static bool hasWeight() { return true; }
- static const bool _hasWeight = true;
+ static constexpr bool _hasWeight = true;
private:
T _v;
int32_t _w;
};
}
-}
-
diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
index 55a550837e1..60a8fb372d7 100644
--- a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
@@ -9,6 +9,7 @@
#include <vespa/searchlib/attribute/imported_attribute_vector_read_guard.h>
#include <vespa/searchlib/attribute/floatbase.h>
#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/multienumattribute.h>
#include <type_traits>
#include <vespa/log/log.h>
@@ -77,7 +78,6 @@ DotProductExecutorByCopy<Vector, Buffer>::execute(uint32_t docId)
}
StringVector::StringVector() = default;
-
StringVector::~StringVector() = default;
template <typename BaseType>
@@ -124,6 +124,48 @@ DotProductExecutor<A>::getAttributeValues(uint32_t docId, const AT * & values)
return _attribute->getRawValues(docId, values);
}
+namespace {
+
+class DotProductExecutorByEnum : public fef::FeatureExecutor {
+public:
+ using V = VectorBase<EnumHandle, EnumHandle, feature_t>;
+private:
+ const IWeightedIndexVector * _attribute;
+ V _queryVector;
+ const typename V::HashMap::const_iterator _end;
+public:
+ DotProductExecutorByEnum(const IWeightedIndexVector * attribute, V queryVector);
+ ~DotProductExecutorByEnum() override;
+ void execute(uint32_t docId) override;
+};
+
+DotProductExecutorByEnum::DotProductExecutorByEnum(const IWeightedIndexVector * attribute, V queryVector)
+ : FeatureExecutor(),
+ _attribute(attribute),
+ _queryVector(std::move(queryVector)),
+ _end(_queryVector.syncMap().getDimMap().end())
+{
+}
+
+DotProductExecutorByEnum::~DotProductExecutorByEnum() = default;
+
+void DotProductExecutorByEnum::execute(uint32_t docId) {
+ feature_t val = 0;
+ if (!_queryVector.getDimMap().empty()) {
+ const IWeightedIndexVector::WeightedIndex *values(nullptr);
+ uint32_t sz = _attribute->getEnumHandles(docId, values);
+ for (size_t i = 0; i < sz; ++i) {
+ typename V::HashMap::const_iterator itr = _queryVector.getDimMap().find(values[i].value().ref());
+ if (itr != _end) {
+ val += values[i].weight() * itr->second;
+ }
+ }
+ }
+ outputs().set_number(0, val);
+}
+
+}
+
}
namespace dotproduct::array {
@@ -431,6 +473,19 @@ bool supportsGetRawValues(const A & attr) noexcept {
}
}
+bool supportsGetEnumHandles(const IWeightedIndexVector * attr) noexcept {
+ if (attr == nullptr) return false;
+ try {
+ const IWeightedIndexVector::WeightedIndex * tmp = nullptr;
+ attr->getEnumHandles(0, tmp); // Throws if unsupported
+ return true;
+ } catch (const std::runtime_error & e) {
+ (void) e;
+ return false;
+ }
+}
+
+
// Precondition: attribute->isImported() == false
template <typename A>
FeatureExecutor &
@@ -633,6 +688,10 @@ createTypedWsetExecutor(const IAttributeVector * attribute, const Property & pro
if (attribute->hasEnum()) {
EnumVector vector(attribute);
WeightedSetParser::parse(prop.get(), vector);
+ const IWeightedIndexVector * getEnumHandles = dynamic_cast<const IWeightedIndexVector *>(attribute);
+ if (supportsGetEnumHandles(getEnumHandles)) {
+ return &stash.create<DotProductExecutorByEnum>(getEnumHandles, std::move(vector));
+ }
return &stash.create<DotProductExecutorByCopy<EnumVector, WeightedEnumContent>>(attribute, std::move(vector));
} else {
if (attribute->isStringType()) {
diff --git a/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp b/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp
index 02d69fbc5ca..1ea3da3939c 100644
--- a/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp
+++ b/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp
@@ -1,14 +1,17 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "featureexecutor.h"
+#include <vespa/vespalib/util/classname.h>
-namespace search {
-namespace fef {
+namespace search::fef {
-FeatureExecutor::FeatureExecutor()
- : _inputs(),
- _outputs()
+FeatureExecutor::FeatureExecutor() = default;
+
+
+vespalib::string
+FeatureExecutor::getClassName() const
{
+ return vespalib::getClassName(*this);
}
bool
@@ -52,5 +55,4 @@ FeatureExecutor::bind_match_data(const MatchData &md)
handle_bind_match_data(md);
}
-} // namespace fef
-} // namespace search
+}
diff --git a/searchlib/src/vespa/searchlib/fef/featureexecutor.h b/searchlib/src/vespa/searchlib/fef/featureexecutor.h
index dfc46230e18..dc8a4ba6075 100644
--- a/searchlib/src/vespa/searchlib/fef/featureexecutor.h
+++ b/searchlib/src/vespa/searchlib/fef/featureexecutor.h
@@ -112,6 +112,13 @@ public:
**/
FeatureExecutor();
+ /**
+ * Obtain the fully qualified name of the concrete class for this object.
+ *
+ * @return fully qualified class name
+ **/
+ vespalib::string getClassName() const;
+
// bind order per executor: inputs, outputs, match_data
void bind_inputs(vespalib::ConstArrayRef<LazyValue> inputs);
void bind_outputs(vespalib::ArrayRef<NumberOrObject> outputs);