diff options
author | Lester Solbakken <lesters@yahoo-inc.com> | 2017-08-17 13:40:50 +0000 |
---|---|---|
committer | Lester Solbakken <lesters@yahoo-inc.com> | 2017-08-17 13:40:50 +0000 |
commit | d11233f5c87a8af79a4b86807d334aaece372ca4 (patch) | |
tree | d91c09a66dc0e90191a91252a805579dd8a9bd31 /searchlib | |
parent | 3985c66b1b52117fb56bb0b3dbd4fe3d85bf91e5 (diff) |
Add internal feature for replacement of tensor expression
Diffstat (limited to 'searchlib')
7 files changed, 422 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index c389cbdc8c6..a64c92f4b6b 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -140,6 +140,7 @@ vespa_define_module( src/tests/features/tensor_from_labels src/tests/features/tensor_from_weighted_set src/tests/features/text_similarity_feature + src/tests/features/expression_replacement_features src/tests/features/util src/tests/fef src/tests/fef/attributecontent diff --git a/searchlib/src/tests/features/expression_replacement_features/.gitignore b/searchlib/src/tests/features/expression_replacement_features/.gitignore new file mode 100644 index 00000000000..b0f9c973af5 --- /dev/null +++ b/searchlib/src/tests/features/expression_replacement_features/.gitignore @@ -0,0 +1 @@ +searchlib_expression_replacement_features_test_app diff --git a/searchlib/src/tests/features/expression_replacement_features/CMakeLists.txt b/searchlib/src/tests/features/expression_replacement_features/CMakeLists.txt new file mode 100644 index 00000000000..9ce6a1c9dc9 --- /dev/null +++ b/searchlib/src/tests/features/expression_replacement_features/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_expression_replacement_features_test_app TEST + SOURCES + expression_replacement_features_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_expression_replacement_features_test_app COMMAND searchlib_expression_replacement_features_test_app) diff --git a/searchlib/src/tests/features/expression_replacement_features/expression_replacement_features_test.cpp b/searchlib/src/tests/features/expression_replacement_features/expression_replacement_features_test.cpp new file mode 100644 index 00000000000..11591e5ffc1 --- /dev/null +++ b/searchlib/src/tests/features/expression_replacement_features/expression_replacement_features_test.cpp @@ -0,0 +1,142 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> + +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/features/max_reduce_prod_join_feature.h> +#include <vespa/searchlib/attribute/attribute.h> + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using search::AttributeFactory; +using search::IntegerAttribute; +using CollectionType = FieldInfo::CollectionType; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; +typedef search::AttributeVector::SP AttributePtr; +typedef FtTestApp FTA; + +struct SetupFixture +{ + InternalMaxReduceProdJoinBlueprint blueprint; + IndexEnvironment indexEnv; + SetupFixture(const vespalib::string &attr) + : blueprint(), + indexEnv() + { + FieldInfo attr_info(FieldType::ATTRIBUTE, CollectionType::ARRAY, attr, 0); + indexEnv.getFields().push_back(attr_info); + } +}; + +TEST_F("require that blueprint can be created", SetupFixture("attribute(foo)")) +{ + EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "internalMaxReduceProdJoin")); +} + +TEST_F("require that setup fails if source spec is invalid", SetupFixture("attribute(foo)")) +{ + FTA::FT_SETUP_FAIL(f.blueprint, f.indexEnv, StringList().add("source(foo)")); +} + +TEST_F("require that setup fails if attribute does not exist", SetupFixture("attribute(foo)")) +{ + FTA::FT_SETUP_FAIL(f.blueprint, f.indexEnv, StringList().add("attribute(bar)").add("query(baz)")); +} + +TEST_F("require that setup succeeds with attribute and query parameters", SetupFixture("attribute(foo)")) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, + StringList().add("attribute(foo)").add("query(bar)"), + StringList(), + StringList().add("scalar")); +} + +struct ExecFixture +{ + BlueprintFactory factory; + FtFeatureTest test; + vespalib::string feature; + ExecFixture(const vespalib::string &f) + : factory(), + test(factory, f), + feature(f) + { + factory.addPrototype(std::make_shared<InternalMaxReduceProdJoinBlueprint>()); + setupAttributeVectors(); + setupQueryEnvironment(); + ASSERT_TRUE(test.setup()); + } + + void setupAttributeVectors() { + vespalib::string attrIntArray = "attribute(intarray)"; + vespalib::string attrLongArray = "attribute(longarray)"; + + test.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, attrLongArray); + test.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, attrIntArray); + + std::vector<AttributePtr> attrs; + attrs.push_back(AttributeFactory::createAttribute(attrLongArray, AVC(AVBT::INT64, AVCT::ARRAY))); + attrs.push_back(AttributeFactory::createAttribute(attrIntArray, AVC(AVBT::INT32, AVCT::ARRAY))); + for (const auto &attr : attrs) { + attr->addReservedDoc(); + attr->addDocs(1); + test.getIndexEnv().getAttributeMap().add(attr); + } + + IntegerAttribute *longArray = static_cast<IntegerAttribute *>(attrs[0].get()); + longArray->append(1, 1111, 0); + longArray->append(1, 2222, 0); + longArray->append(1, 78, 0); + + IntegerAttribute *intArray = static_cast<IntegerAttribute *>(attrs[1].get()); + intArray->append(1, 78, 0); + intArray->append(1, 1111, 0); + + for (const auto &attr : attrs) { + attr->commit(); + } + } + + void setupQueryEnvironment() { + test.getQueryEnv().getProperties().add("query(wset)", "{1111:1234, 2222:2245}"); + test.getQueryEnv().getProperties().add("query(wsetnomatch)", "{1:1000, 2:2000}"); + test.getQueryEnv().getProperties().add("query(array)", "[1111,2222]"); + } + + bool evaluatesTo(feature_t expected) { + return test.execute(expected); + } + +}; + +TEST_F("require that executor returns correct result for long array", + ExecFixture("internalMaxReduceProdJoin(attribute(longarray),query(wset))")) +{ + EXPECT_TRUE(f.evaluatesTo(2245)); +} + +TEST_F("require that executor returns correct result for int array", + ExecFixture("internalMaxReduceProdJoin(attribute(intarray),query(wset))")) +{ + EXPECT_TRUE(f.evaluatesTo(1234)); +} + +TEST_F("require that executor returns 0 if no items match", + ExecFixture("internalMaxReduceProdJoin(attribute(longarray),query(wsetnomatch))")) +{ + EXPECT_TRUE(f.evaluatesTo(0.0)); +} + +TEST_F("require that executor return 0 if query is not a weighted set", + ExecFixture("internalMaxReduceProdJoin(attribute(longarray),query(array))")) +{ + EXPECT_TRUE(f.evaluatesTo(0.0)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index 6b212bdee0f..339195774ff 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -62,5 +62,6 @@ vespa_add_library(searchlib_features OBJECT utils.cpp valuefeature.cpp weighted_set_parser.cpp + max_reduce_prod_join_feature.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.cpp b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.cpp new file mode 100644 index 00000000000..b2c1759527a --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.cpp @@ -0,0 +1,229 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "max_reduce_prod_join_feature.h" +#include "valuefeature.h" +#include "weighted_set_parser.h" + +#include <vespa/log/log.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/imported_attribute_vector.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/features/dotproductfeature.h> +#include <vespa/searchlib/fef/properties.h> +#include <vespa/searchlib/fef/featureexecutor.h> + +LOG_SETUP(".features.internalmaxreduceprodjoin"); + +using namespace search::attribute; +using namespace search::fef; + +using search::features::dotproduct::wset::IntegerVector; + +namespace search { +namespace features { + +/** + * Executor used when array can be accessed directly + */ +template <typename BaseType> +class RawExecutor : public FeatureExecutor { +public: + using A = IntegerAttributeTemplate<BaseType>; + using AT = multivalue::Value<BaseType>; +protected: + const IAttributeVector * _attribute; + IntegerVector _queryVector; + +public: + RawExecutor(const IAttributeVector * attribute, const IntegerVector & queryVector); + void execute(uint32_t docId) override; +}; + +template <typename BaseType> +RawExecutor<BaseType>::RawExecutor(const IAttributeVector *attribute, + const IntegerVector &queryVector) : + FeatureExecutor(), + _attribute(attribute), + _queryVector(queryVector) +{ + _queryVector.syncMap(); +} + +template <typename BaseType> +void +RawExecutor<BaseType>::execute(uint32_t docId) +{ + const AT *values(nullptr); + const A *iattr = dynamic_cast<const A *>(_attribute); + size_t count = iattr->getRawValues(docId, values); + + feature_t val = -DBL_MAX; + if (!_queryVector.getDimMap().empty()) { + for (size_t i = 0; i < count; ++i) { + typename IntegerVector::HashMap::const_iterator itr = _queryVector.getDimMap().find(values[i].value()); + if (itr != _queryVector.getDimMap().end()) { + feature_t v = itr->second; // weight from attribute is assumed to be 1.0 + if (v > val) val = v; + } + } + } + outputs().set_number(0, val == -DBL_MAX ? 0.0 : val); +} + +/** + * Executor when array can't be accessed directly + */ +template <typename BaseType> +class BufferedExecutor : public RawExecutor<BaseType> { +private: + WeightedIntegerContent _buffer; + +public: + BufferedExecutor(const IAttributeVector * attribute, const IntegerVector & queryVector); + void execute(uint32_t docId) override; +}; + +template <typename BaseType> +BufferedExecutor<BaseType>::BufferedExecutor(const IAttributeVector *attribute, const IntegerVector &queryVector) : + RawExecutor<BaseType>(attribute, queryVector), + _buffer() +{ +} + +template <typename BaseType> +void +BufferedExecutor<BaseType>::execute(uint32_t docId) +{ + feature_t val = -DBL_MAX; + _buffer.fill(*RawExecutor<BaseType>::_attribute, docId); + for (size_t i = 0; i < _buffer.size(); ++i) { + typename IntegerVector::HashMap::const_iterator itr = RawExecutor<BaseType>::_queryVector.getDimMap().find(_buffer[i].getValue()); + if (itr != RawExecutor<BaseType>::_queryVector.getDimMap().end()) { + feature_t v = itr->second; // weight from attribute is assumed to be 1.0 + if (v > val) val = v; + } + } + RawExecutor<BaseType>::outputs().set_number(0, val == -DBL_MAX ? 0.0 : val); +} + +/** + * Blueprint + */ +InternalMaxReduceProdJoinBlueprint::InternalMaxReduceProdJoinBlueprint() : + Blueprint("internalMaxReduceProdJoin") +{ +} + +InternalMaxReduceProdJoinBlueprint::~InternalMaxReduceProdJoinBlueprint() +{ +} + +void +InternalMaxReduceProdJoinBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +Blueprint::UP +InternalMaxReduceProdJoinBlueprint::createInstance() const +{ + return Blueprint::UP(new InternalMaxReduceProdJoinBlueprint()); +} + +ParameterDescriptions +InternalMaxReduceProdJoinBlueprint::getDescriptions() const +{ + return ParameterDescriptions().desc().attribute(ParameterCollection::ANY).string(); +} + +bool +InternalMaxReduceProdJoinBlueprint::setup(const IIndexEnvironment &env, const ParameterList ¶ms) +{ + _attribute = params[0].getValue(); + _query = params[1].getValue(); + describeOutput("scalar", "Internal executor for optimized execution of reduce(join(A,Q,f(x,y)(x*y)),max)"); + env.hintAttributeAccess(_attribute); + return true; +} + +bool isImportedAttribute(const IAttributeVector& attribute) noexcept { + return dynamic_cast<const ImportedAttributeVector*>(&attribute) != nullptr; +} + +template<typename A> +bool supportsGetRawValues(const A &attr) noexcept { + try { + const multivalue::Value<typename A::BaseType> *tmp = nullptr; + attr.getRawValues(0, tmp); // Throws if unsupported + return true; + } catch (const std::runtime_error &e) { + (void) e; + return false; + } +} + +template <typename BaseType> +FeatureExecutor & +selectTypedExecutor(const IAttributeVector* attribute, const IntegerVector& vector, vespalib::Stash &stash) +{ + if (!isImportedAttribute(*attribute)) { + using A = IntegerAttributeTemplate<BaseType>; + using VT = multivalue::Value<BaseType>; + using ExactA = MultiValueNumericAttribute<A, VT>; + + const A *iattr = dynamic_cast<const A *>(attribute); + if (supportsGetRawValues(*iattr)) { + const ExactA *exactA = dynamic_cast<const ExactA *>(iattr); + if (exactA != nullptr) { + return stash.create<RawExecutor<BaseType>>(attribute, vector); + } + } + } + return stash.create<BufferedExecutor<BaseType>>(attribute, vector); +} + +FeatureExecutor & +selectExecutor(const IAttributeVector* attribute, const IntegerVector& vector, vespalib::Stash &stash) +{ + if (attribute->getCollectionType() == CollectionType::ARRAY) { + switch (attribute->getBasicType()) { + case BasicType::INT32: + return selectTypedExecutor<int32_t>(attribute, vector, stash); + case BasicType::INT64: + return selectTypedExecutor<int64_t>(attribute, vector, stash); + default: + break; + } + } + LOG(warning, "The attribute vector '%s' is not of type " + "array<int/long>, returning executor with default value.", attribute->getName().c_str()); + return stash.create<SingleZeroValueExecutor>(); +} + + +FeatureExecutor & +InternalMaxReduceProdJoinBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const +{ + const IAttributeVector *attribute = env.getAttributeContext().getAttribute(_attribute); + if (attribute == nullptr) { + LOG(warning, "The attribute vector '%s' was not found in the attribute manager, " + "returning executor with default value.", + _attribute.c_str()); + return stash.create<SingleZeroValueExecutor>(); + } + Property prop = env.getProperties().lookup(_query); + if (prop.found() && !prop.get().empty()) { + IntegerVector vector; + WeightedSetParser::parse(prop.get(), vector); + if (!vector.getVector().empty()) { + return selectExecutor(attribute, vector, stash); + } + } + return stash.create<SingleZeroValueExecutor>(); +} + + +} +} + + diff --git a/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.h b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.h new file mode 100644 index 00000000000..5650b91092e --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_feature.h @@ -0,0 +1,40 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/fef/blueprint.h> + +namespace search { +namespace features { + +/** + * Feature for the specific replacement of the expression: + * + * reduce(join(tensorFromLabels(A),tensorFromWeightedset(Q),f(x,y)(x*y)),max) + * + * where A is an array attribute of int or long type and Q is a query that parses as + * a weighted set. This expression is replaced with this feature to avoid incurring + * the cost of creating temporary tensors. + */ +class InternalMaxReduceProdJoinBlueprint : public fef::Blueprint { +private: + vespalib::string _attribute; + vespalib::string _query; + +public: + InternalMaxReduceProdJoinBlueprint(); + ~InternalMaxReduceProdJoinBlueprint(); + + fef::ParameterDescriptions getDescriptions() const override; + fef::Blueprint::UP createInstance() const override; + bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; + void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; + +}; + +} +} + + + |