summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorLester Solbakken <lesters@yahoo-inc.com>2017-08-17 13:40:50 +0000
committerLester Solbakken <lesters@yahoo-inc.com>2017-08-17 13:40:50 +0000
commitd11233f5c87a8af79a4b86807d334aaece372ca4 (patch)
treed91c09a66dc0e90191a91252a805579dd8a9bd31 /searchlib
parent3985c66b1b52117fb56bb0b3dbd4fe3d85bf91e5 (diff)
Add internal feature for replacement of tensor expression
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/expression_replacement_features/.gitignore1
-rw-r--r--searchlib/src/tests/features/expression_replacement_features/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/expression_replacement_features/expression_replacement_features_test.cpp142
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.cpp229
-rw-r--r--searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.h40
7 files changed, 422 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index c389cbdc8c6..a64c92f4b6b 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -140,6 +140,7 @@ vespa_define_module(
src/tests/features/tensor_from_labels
src/tests/features/tensor_from_weighted_set
src/tests/features/text_similarity_feature
+ src/tests/features/expression_replacement_features
src/tests/features/util
src/tests/fef
src/tests/fef/attributecontent
diff --git a/searchlib/src/tests/features/expression_replacement_features/.gitignore b/searchlib/src/tests/features/expression_replacement_features/.gitignore
new file mode 100644
index 00000000000..b0f9c973af5
--- /dev/null
+++ b/searchlib/src/tests/features/expression_replacement_features/.gitignore
@@ -0,0 +1 @@
+searchlib_expression_replacement_features_test_app
diff --git a/searchlib/src/tests/features/expression_replacement_features/CMakeLists.txt b/searchlib/src/tests/features/expression_replacement_features/CMakeLists.txt
new file mode 100644
index 00000000000..9ce6a1c9dc9
--- /dev/null
+++ b/searchlib/src/tests/features/expression_replacement_features/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_expression_replacement_features_test_app TEST
+ SOURCES
+ expression_replacement_features_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_expression_replacement_features_test_app COMMAND searchlib_expression_replacement_features_test_app)
diff --git a/searchlib/src/tests/features/expression_replacement_features/expression_replacement_features_test.cpp b/searchlib/src/tests/features/expression_replacement_features/expression_replacement_features_test.cpp
new file mode 100644
index 00000000000..11591e5ffc1
--- /dev/null
+++ b/searchlib/src/tests/features/expression_replacement_features/expression_replacement_features_test.cpp
@@ -0,0 +1,142 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/features/max_reduce_prod_join_feature.h>
+#include <vespa/searchlib/attribute/attribute.h>
+
+using search::feature_t;
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+using search::AttributeFactory;
+using search::IntegerAttribute;
+using CollectionType = FieldInfo::CollectionType;
+
+typedef search::attribute::Config AVC;
+typedef search::attribute::BasicType AVBT;
+typedef search::attribute::CollectionType AVCT;
+typedef search::AttributeVector::SP AttributePtr;
+typedef FtTestApp FTA;
+
+struct SetupFixture
+{
+ InternalMaxReduceProdJoinBlueprint blueprint;
+ IndexEnvironment indexEnv;
+ SetupFixture(const vespalib::string &attr)
+ : blueprint(),
+ indexEnv()
+ {
+ FieldInfo attr_info(FieldType::ATTRIBUTE, CollectionType::ARRAY, attr, 0);
+ indexEnv.getFields().push_back(attr_info);
+ }
+};
+
+TEST_F("require that blueprint can be created", SetupFixture("attribute(foo)"))
+{
+ EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "internalMaxReduceProdJoin"));
+}
+
+TEST_F("require that setup fails if source spec is invalid", SetupFixture("attribute(foo)"))
+{
+ FTA::FT_SETUP_FAIL(f.blueprint, f.indexEnv, StringList().add("source(foo)"));
+}
+
+TEST_F("require that setup fails if attribute does not exist", SetupFixture("attribute(foo)"))
+{
+ FTA::FT_SETUP_FAIL(f.blueprint, f.indexEnv, StringList().add("attribute(bar)").add("query(baz)"));
+}
+
+TEST_F("require that setup succeeds with attribute and query parameters", SetupFixture("attribute(foo)"))
+{
+ FTA::FT_SETUP_OK(f.blueprint, f.indexEnv,
+ StringList().add("attribute(foo)").add("query(bar)"),
+ StringList(),
+ StringList().add("scalar"));
+}
+
+struct ExecFixture
+{
+ BlueprintFactory factory;
+ FtFeatureTest test;
+ vespalib::string feature;
+ ExecFixture(const vespalib::string &f)
+ : factory(),
+ test(factory, f),
+ feature(f)
+ {
+ factory.addPrototype(std::make_shared<InternalMaxReduceProdJoinBlueprint>());
+ setupAttributeVectors();
+ setupQueryEnvironment();
+ ASSERT_TRUE(test.setup());
+ }
+
+ void setupAttributeVectors() {
+ vespalib::string attrIntArray = "attribute(intarray)";
+ vespalib::string attrLongArray = "attribute(longarray)";
+
+ test.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, attrLongArray);
+ test.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, attrIntArray);
+
+ std::vector<AttributePtr> attrs;
+ attrs.push_back(AttributeFactory::createAttribute(attrLongArray, AVC(AVBT::INT64, AVCT::ARRAY)));
+ attrs.push_back(AttributeFactory::createAttribute(attrIntArray, AVC(AVBT::INT32, AVCT::ARRAY)));
+ for (const auto &attr : attrs) {
+ attr->addReservedDoc();
+ attr->addDocs(1);
+ test.getIndexEnv().getAttributeMap().add(attr);
+ }
+
+ IntegerAttribute *longArray = static_cast<IntegerAttribute *>(attrs[0].get());
+ longArray->append(1, 1111, 0);
+ longArray->append(1, 2222, 0);
+ longArray->append(1, 78, 0);
+
+ IntegerAttribute *intArray = static_cast<IntegerAttribute *>(attrs[1].get());
+ intArray->append(1, 78, 0);
+ intArray->append(1, 1111, 0);
+
+ for (const auto &attr : attrs) {
+ attr->commit();
+ }
+ }
+
+ void setupQueryEnvironment() {
+ test.getQueryEnv().getProperties().add("query(wset)", "{1111:1234, 2222:2245}");
+ test.getQueryEnv().getProperties().add("query(wsetnomatch)", "{1:1000, 2:2000}");
+ test.getQueryEnv().getProperties().add("query(array)", "[1111,2222]");
+ }
+
+ bool evaluatesTo(feature_t expected) {
+ return test.execute(expected);
+ }
+
+};
+
+TEST_F("require that executor returns correct result for long array",
+ ExecFixture("internalMaxReduceProdJoin(attribute(longarray),query(wset))"))
+{
+ EXPECT_TRUE(f.evaluatesTo(2245));
+}
+
+TEST_F("require that executor returns correct result for int array",
+ ExecFixture("internalMaxReduceProdJoin(attribute(intarray),query(wset))"))
+{
+ EXPECT_TRUE(f.evaluatesTo(1234));
+}
+
+TEST_F("require that executor returns 0 if no items match",
+ ExecFixture("internalMaxReduceProdJoin(attribute(longarray),query(wsetnomatch))"))
+{
+ EXPECT_TRUE(f.evaluatesTo(0.0));
+}
+
+TEST_F("require that executor return 0 if query is not a weighted set",
+ ExecFixture("internalMaxReduceProdJoin(attribute(longarray),query(array))"))
+{
+ EXPECT_TRUE(f.evaluatesTo(0.0));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index 6b212bdee0f..339195774ff 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -62,5 +62,6 @@ vespa_add_library(searchlib_features OBJECT
utils.cpp
valuefeature.cpp
weighted_set_parser.cpp
+ max_reduce_prod_join_feature.cpp
DEPENDS
)
diff --git a/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.cpp b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.cpp
new file mode 100644
index 00000000000..b2c1759527a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.cpp
@@ -0,0 +1,229 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "max_reduce_prod_join_feature.h"
+#include "valuefeature.h"
+#include "weighted_set_parser.h"
+
+#include <vespa/log/log.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/imported_attribute_vector.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/features/dotproductfeature.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+LOG_SETUP(".features.internalmaxreduceprodjoin");
+
+using namespace search::attribute;
+using namespace search::fef;
+
+using search::features::dotproduct::wset::IntegerVector;
+
+namespace search {
+namespace features {
+
+/**
+ * Executor used when array can be accessed directly
+ */
+template <typename BaseType>
+class RawExecutor : public FeatureExecutor {
+public:
+ using A = IntegerAttributeTemplate<BaseType>;
+ using AT = multivalue::Value<BaseType>;
+protected:
+ const IAttributeVector * _attribute;
+ IntegerVector _queryVector;
+
+public:
+ RawExecutor(const IAttributeVector * attribute, const IntegerVector & queryVector);
+ void execute(uint32_t docId) override;
+};
+
+template <typename BaseType>
+RawExecutor<BaseType>::RawExecutor(const IAttributeVector *attribute,
+ const IntegerVector &queryVector) :
+ FeatureExecutor(),
+ _attribute(attribute),
+ _queryVector(queryVector)
+{
+ _queryVector.syncMap();
+}
+
+template <typename BaseType>
+void
+RawExecutor<BaseType>::execute(uint32_t docId)
+{
+ const AT *values(nullptr);
+ const A *iattr = dynamic_cast<const A *>(_attribute);
+ size_t count = iattr->getRawValues(docId, values);
+
+ feature_t val = -DBL_MAX;
+ if (!_queryVector.getDimMap().empty()) {
+ for (size_t i = 0; i < count; ++i) {
+ typename IntegerVector::HashMap::const_iterator itr = _queryVector.getDimMap().find(values[i].value());
+ if (itr != _queryVector.getDimMap().end()) {
+ feature_t v = itr->second; // weight from attribute is assumed to be 1.0
+ if (v > val) val = v;
+ }
+ }
+ }
+ outputs().set_number(0, val == -DBL_MAX ? 0.0 : val);
+}
+
+/**
+ * Executor when array can't be accessed directly
+ */
+template <typename BaseType>
+class BufferedExecutor : public RawExecutor<BaseType> {
+private:
+ WeightedIntegerContent _buffer;
+
+public:
+ BufferedExecutor(const IAttributeVector * attribute, const IntegerVector & queryVector);
+ void execute(uint32_t docId) override;
+};
+
+template <typename BaseType>
+BufferedExecutor<BaseType>::BufferedExecutor(const IAttributeVector *attribute, const IntegerVector &queryVector) :
+ RawExecutor<BaseType>(attribute, queryVector),
+ _buffer()
+{
+}
+
+template <typename BaseType>
+void
+BufferedExecutor<BaseType>::execute(uint32_t docId)
+{
+ feature_t val = -DBL_MAX;
+ _buffer.fill(*RawExecutor<BaseType>::_attribute, docId);
+ for (size_t i = 0; i < _buffer.size(); ++i) {
+ typename IntegerVector::HashMap::const_iterator itr = RawExecutor<BaseType>::_queryVector.getDimMap().find(_buffer[i].getValue());
+ if (itr != RawExecutor<BaseType>::_queryVector.getDimMap().end()) {
+ feature_t v = itr->second; // weight from attribute is assumed to be 1.0
+ if (v > val) val = v;
+ }
+ }
+ RawExecutor<BaseType>::outputs().set_number(0, val == -DBL_MAX ? 0.0 : val);
+}
+
+/**
+ * Blueprint
+ */
+InternalMaxReduceProdJoinBlueprint::InternalMaxReduceProdJoinBlueprint() :
+ Blueprint("internalMaxReduceProdJoin")
+{
+}
+
+InternalMaxReduceProdJoinBlueprint::~InternalMaxReduceProdJoinBlueprint()
+{
+}
+
+void
+InternalMaxReduceProdJoinBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+Blueprint::UP
+InternalMaxReduceProdJoinBlueprint::createInstance() const
+{
+ return Blueprint::UP(new InternalMaxReduceProdJoinBlueprint());
+}
+
+ParameterDescriptions
+InternalMaxReduceProdJoinBlueprint::getDescriptions() const
+{
+ return ParameterDescriptions().desc().attribute(ParameterCollection::ANY).string();
+}
+
+bool
+InternalMaxReduceProdJoinBlueprint::setup(const IIndexEnvironment &env, const ParameterList &params)
+{
+ _attribute = params[0].getValue();
+ _query = params[1].getValue();
+ describeOutput("scalar", "Internal executor for optimized execution of reduce(join(A,Q,f(x,y)(x*y)),max)");
+ env.hintAttributeAccess(_attribute);
+ return true;
+}
+
+bool isImportedAttribute(const IAttributeVector& attribute) noexcept {
+ return dynamic_cast<const ImportedAttributeVector*>(&attribute) != nullptr;
+}
+
+template<typename A>
+bool supportsGetRawValues(const A &attr) noexcept {
+ try {
+ const multivalue::Value<typename A::BaseType> *tmp = nullptr;
+ attr.getRawValues(0, tmp); // Throws if unsupported
+ return true;
+ } catch (const std::runtime_error &e) {
+ (void) e;
+ return false;
+ }
+}
+
+template <typename BaseType>
+FeatureExecutor &
+selectTypedExecutor(const IAttributeVector* attribute, const IntegerVector& vector, vespalib::Stash &stash)
+{
+ if (!isImportedAttribute(*attribute)) {
+ using A = IntegerAttributeTemplate<BaseType>;
+ using VT = multivalue::Value<BaseType>;
+ using ExactA = MultiValueNumericAttribute<A, VT>;
+
+ const A *iattr = dynamic_cast<const A *>(attribute);
+ if (supportsGetRawValues(*iattr)) {
+ const ExactA *exactA = dynamic_cast<const ExactA *>(iattr);
+ if (exactA != nullptr) {
+ return stash.create<RawExecutor<BaseType>>(attribute, vector);
+ }
+ }
+ }
+ return stash.create<BufferedExecutor<BaseType>>(attribute, vector);
+}
+
+FeatureExecutor &
+selectExecutor(const IAttributeVector* attribute, const IntegerVector& vector, vespalib::Stash &stash)
+{
+ if (attribute->getCollectionType() == CollectionType::ARRAY) {
+ switch (attribute->getBasicType()) {
+ case BasicType::INT32:
+ return selectTypedExecutor<int32_t>(attribute, vector, stash);
+ case BasicType::INT64:
+ return selectTypedExecutor<int64_t>(attribute, vector, stash);
+ default:
+ break;
+ }
+ }
+ LOG(warning, "The attribute vector '%s' is not of type "
+ "array<int/long>, returning executor with default value.", attribute->getName().c_str());
+ return stash.create<SingleZeroValueExecutor>();
+}
+
+
+FeatureExecutor &
+InternalMaxReduceProdJoinBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const
+{
+ const IAttributeVector *attribute = env.getAttributeContext().getAttribute(_attribute);
+ if (attribute == nullptr) {
+ LOG(warning, "The attribute vector '%s' was not found in the attribute manager, "
+ "returning executor with default value.",
+ _attribute.c_str());
+ return stash.create<SingleZeroValueExecutor>();
+ }
+ Property prop = env.getProperties().lookup(_query);
+ if (prop.found() && !prop.get().empty()) {
+ IntegerVector vector;
+ WeightedSetParser::parse(prop.get(), vector);
+ if (!vector.getVector().empty()) {
+ return selectExecutor(attribute, vector, stash);
+ }
+ }
+ return stash.create<SingleZeroValueExecutor>();
+}
+
+
+}
+}
+
+
diff --git a/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.h b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.h
new file mode 100644
index 00000000000..5650b91092e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.h
@@ -0,0 +1,40 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Feature for the specific replacement of the expression:
+ *
+ * reduce(join(tensorFromLabels(A),tensorFromWeightedset(Q),f(x,y)(x*y)),max)
+ *
+ * where A is an array attribute of int or long type and Q is a query that parses as
+ * a weighted set. This expression is replaced with this feature to avoid incurring
+ * the cost of creating temporary tensors.
+ */
+class InternalMaxReduceProdJoinBlueprint : public fef::Blueprint {
+private:
+ vespalib::string _attribute;
+ vespalib::string _query;
+
+public:
+ InternalMaxReduceProdJoinBlueprint();
+ ~InternalMaxReduceProdJoinBlueprint();
+
+ fef::ParameterDescriptions getDescriptions() const override;
+ fef::Blueprint::UP createInstance() const override;
+ bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+ fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
+ void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
+
+};
+
+}
+}
+
+
+