From a4e3de1bc975ba00ce38c9e4909bff5bef51156f Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Tue, 22 Aug 2017 13:31:02 +0000 Subject: max reduce prod join replacer with test --- searchlib/CMakeLists.txt | 5 +- .../max_reduce_prod_join_replacer/CMakeLists.txt | 8 ++ .../max_reduce_prod_join_replacer_test.cpp | 103 ++++++++++++++ .../src/vespa/searchlib/features/CMakeLists.txt | 3 +- .../features/max_reduce_prod_join_replacer.cpp | 151 +++++++++++++++++++++ .../features/max_reduce_prod_join_replacer.h | 33 +++++ 6 files changed, 300 insertions(+), 3 deletions(-) create mode 100644 searchlib/src/tests/features/max_reduce_prod_join_replacer/CMakeLists.txt create mode 100644 searchlib/src/tests/features/max_reduce_prod_join_replacer/max_reduce_prod_join_replacer_test.cpp create mode 100644 searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.cpp create mode 100644 searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.h (limited to 'searchlib') diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 0bd70592c3d..4158343af22 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -81,6 +81,8 @@ vespa_define_module( src/tests/attribute/enumstore src/tests/attribute/extendattributes src/tests/attribute/guard + src/tests/attribute/imported_attribute_vector + src/tests/attribute/imported_search_context src/tests/attribute/multi_value_mapping src/tests/attribute/postinglist src/tests/attribute/postinglistattribute @@ -90,8 +92,6 @@ vespa_define_module( src/tests/attribute/sourceselector src/tests/attribute/stringattribute src/tests/attribute/tensorattribute - src/tests/attribute/imported_attribute_vector - src/tests/attribute/imported_search_context src/tests/bitcompression/expgolomb src/tests/bitvector src/tests/btree @@ -133,6 +133,7 @@ vespa_define_module( src/tests/features/imported_dot_product src/tests/features/internal_max_reduce_prod_join_feature src/tests/features/item_raw_score + src/tests/features/max_reduce_prod_join_replacer src/tests/features/native_dot_product src/tests/features/ranking_expression src/tests/features/raw_score diff --git a/searchlib/src/tests/features/max_reduce_prod_join_replacer/CMakeLists.txt b/searchlib/src/tests/features/max_reduce_prod_join_replacer/CMakeLists.txt new file mode 100644 index 00000000000..5a1ac74aed0 --- /dev/null +++ b/searchlib/src/tests/features/max_reduce_prod_join_replacer/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_max_reduce_prod_join_replacer_test_app TEST + SOURCES + max_reduce_prod_join_replacer_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_max_reduce_prod_join_replacer_test_app COMMAND searchlib_max_reduce_prod_join_replacer_test_app) diff --git a/searchlib/src/tests/features/max_reduce_prod_join_replacer/max_reduce_prod_join_replacer_test.cpp b/searchlib/src/tests/features/max_reduce_prod_join_replacer/max_reduce_prod_join_replacer_test.cpp new file mode 100644 index 00000000000..1c6c224cc79 --- /dev/null +++ b/searchlib/src/tests/features/max_reduce_prod_join_replacer/max_reduce_prod_join_replacer_test.cpp @@ -0,0 +1,103 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include + +#include +#include +#include +#include +#include + +using search::features::MaxReduceProdJoinReplacer; +using search::features::rankingexpression::ExpressionReplacer; +using search::features::rankingexpression::FeatureNameExtractor; +using search::fef::Blueprint; +using search::fef::FeatureExecutor; +using search::fef::FeatureType; +using search::fef::IDumpFeatureVisitor; +using search::fef::IIndexEnvironment; +using search::fef::IQueryEnvironment; +using search::fef::test::IndexEnvironment; +using vespalib::Stash; +using vespalib::eval::Function; + +struct MyBlueprint : Blueprint { + bool &was_used; + MyBlueprint(bool &was_used_out) : Blueprint("my_bp"), was_used(was_used_out) {} + void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const override {} + Blueprint::UP createInstance() const override { return std::make_unique(was_used); } + bool setup(const IIndexEnvironment &, const std::vector ¶ms) override { + EXPECT_EQUAL(getName(), "my_bp(foo,bar)"); + ASSERT_TRUE(params.size() == 2); + EXPECT_EQUAL(params[0], "foo"); + EXPECT_EQUAL(params[1], "bar"); + describeOutput("out", "my output", FeatureType::number()); + was_used = true; + return true; + } + FeatureExecutor &createExecutor(const IQueryEnvironment &, vespalib::Stash &) const override { + abort(); + } +}; + +bool replaced(const vespalib::string &expr) { + bool was_used = false; + ExpressionReplacer::UP replacer = MaxReduceProdJoinReplacer::create(std::make_unique(was_used)); + Function rank_function = Function::parse(expr, FeatureNameExtractor()); + if (!EXPECT_TRUE(!rank_function.has_error())) { + fprintf(stderr, "parse error: %s\n", rank_function.dump().c_str()); + } + auto result = replacer->maybe_replace(rank_function, IndexEnvironment()); + EXPECT_EQUAL(bool(result), was_used); + return was_used; +} + +TEST("require that matching expression with appropriate inputs is replaced") { + EXPECT_TRUE(replaced("reduce(tensorFromLabels(attribute(foo),dim)*tensorFromWeightedSet(query(bar),dim),max)")); +} + +TEST("require that matching expression with unrelated inputs is not replaced") { + EXPECT_TRUE(!replaced("reduce(foo*bar,max)")); +} + +TEST("require that input feature parameter lists have flexible matching") { + EXPECT_TRUE(replaced("reduce(tensorFromLabels( attribute ( foo ) , dim )*tensorFromWeightedSet( query ( bar ) , dim ),max)")); +} + +TEST("require that reduce dimension can be specified explicitly") { + EXPECT_TRUE(replaced("reduce(tensorFromLabels(attribute(foo),dim)*tensorFromWeightedSet(query(bar),dim),max,dim)")); +} + +TEST("require that expression using tensor join with lambda can also be replaced") { + EXPECT_TRUE(replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(x*y)),max)")); +} + +TEST("require that parameter ordering does not matter") { + EXPECT_TRUE(replaced("reduce(tensorFromWeightedSet(query(bar),dim)*tensorFromLabels(attribute(foo),dim),max)")); + EXPECT_TRUE(replaced("reduce(join(tensorFromWeightedSet(query(bar),dim),tensorFromLabels(attribute(foo),dim),f(x,y)(x*y)),max)")); + EXPECT_TRUE(replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(y*x)),max)")); +} + +TEST("require that source specifiers must match") { + EXPECT_TRUE(!replaced("reduce(tensorFromLabels(query(foo),dim)*tensorFromWeightedSet(attribute(bar),dim),max)")); +} + +TEST("require that reduce operation must match") { + EXPECT_TRUE(!replaced("reduce(tensorFromLabels(attribute(foo),dim)*tensorFromWeightedSet(query(bar),dim),min)")); +} + +TEST("require that join operation must match") { + EXPECT_TRUE(!replaced("reduce(tensorFromLabels(attribute(foo),dim)+tensorFromWeightedSet(query(bar),dim),max)")); + EXPECT_TRUE(!replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(x+y)),max)")); + EXPECT_TRUE(!replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(x*x)),max)")); + EXPECT_TRUE(!replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(y*y)),max)")); + EXPECT_TRUE(!replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(x*y*1)),max)")); +} + +TEST("require that reduce dimension must match") { + EXPECT_TRUE(!replaced("reduce(tensorFromLabels(attribute(foo),x)*tensorFromWeightedSet(query(bar),x),max,y)")); + EXPECT_TRUE(!replaced("reduce(tensorFromLabels(attribute(foo),x)*tensorFromWeightedSet(query(bar),y),max)")); + EXPECT_TRUE(!replaced("reduce(tensorFromLabels(attribute(foo),x)*tensorFromWeightedSet(query(bar),x),max,x,y)")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index 9227b845966..bd847fe35b5 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -31,6 +31,7 @@ vespa_add_library(searchlib_features OBJECT matchcountfeature.cpp matchesfeature.cpp matchfeature.cpp + max_reduce_prod_join_replacer.cpp native_dot_product_feature.cpp nativeattributematchfeature.cpp nativefieldmatchfeature.cpp @@ -42,8 +43,8 @@ vespa_add_library(searchlib_features OBJECT queryfeature.cpp queryterm.cpp querytermcountfeature.cpp - randomfeature.cpp random_normal_feature.cpp + randomfeature.cpp rankingexpressionfeature.cpp raw_score_feature.cpp reverseproximityfeature.cpp diff --git a/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.cpp b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.cpp new file mode 100644 index 00000000000..f69bc064327 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.cpp @@ -0,0 +1,151 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "max_reduce_prod_join_replacer.h" +#include +#include +#include +#include +#include +#include + +#include +LOG_SETUP(".features.max_reduce_prod_join_replacer"); + +namespace search::features { + +using fef::Blueprint; +using fef::FeatureNameParser; +using fef::IIndexEnvironment; +using rankingexpression::ExpressionReplacer; +using rankingexpression::IntrinsicBlueprintAdapter; +using rankingexpression::IntrinsicExpression; +using vespalib::eval::Aggr; +using vespalib::eval::Function; +using vespalib::eval::nodes::Mul; +using vespalib::eval::nodes::Node; +using vespalib::eval::nodes::Symbol; +using vespalib::eval::nodes::TensorJoin; +using vespalib::eval::nodes::TensorReduce; +using vespalib::eval::nodes::as; + +namespace { + +bool match_params(const Node &a, const Node &b) { + bool first = false; + bool second = false; + for (int i = 0; i < 2; ++i) { + const Node &node = (i == 0) ? a : b; + if (auto symbol = as(node)) { + if (symbol->id() == 0) { + first = true; + } else if (symbol->id() == 1) { + second = true; + } + } + } + return (first && second); +}; + +bool match_prod_join(const Node &node) { + if (auto join = as(node)) { + const Node &root = join->lambda().root(); + if (as(root)) { + return match_params(root.get_child(0), root.get_child(1)); + } + } + return false; +} + +bool match_max_reduce(const Node &node, vespalib::string &reduce_dim) { + auto reduce = as(node); + if (!reduce || (reduce->aggr() != Aggr::MAX) || (reduce->dimensions().size() > 1)) { + return false; + } + if (reduce->dimensions().size() == 1) { + reduce_dim = reduce->dimensions()[0]; + } + return true; +} + +bool match_function(const Function &function, vespalib::string &reduce_dim) { + const Node &expect_max = function.root(); + if ((function.num_params() == 2) && match_max_reduce(expect_max, reduce_dim)) { + const Node &expect_mul = expect_max.get_child(0); + if (as(expect_mul) || match_prod_join(expect_mul)) { + return match_params(expect_mul.get_child(0), expect_mul.get_child(1)); + } + } + return false; +} + +void try_extract_param(const vespalib::string &feature, const vespalib::string &wanted_wrapper, + vespalib::string ¶m, vespalib::string &dim) +{ + FeatureNameParser parser(feature); + if (parser.valid() && + (parser.parameters().size() >= 1) && + (parser.parameters().size() <= 2)) + { + vespalib::string wrapper; + vespalib::string body; + vespalib::string error; + if (Function::unwrap(parser.parameters()[0], wrapper, body, error) && + (wrapper == wanted_wrapper)) + { + param = body; + if (parser.parameters().size() == 2) { + dim = parser.parameters()[1]; + } else { + dim = param; + } + } + } +} + +struct MatchInputs { + vespalib::string attribute; + vespalib::string attribute_dim; + vespalib::string query; + vespalib::string query_dim; + MatchInputs() : attribute(), attribute_dim(), query(), query_dim() {} + void process(const vespalib::string ¶m) { + if (starts_with(param, "tensorFromLabels")) { + try_extract_param(param, "attribute", attribute, attribute_dim); + } else if (starts_with(param, "tensorFromWeightedSet")) { + try_extract_param(param, "query", query, query_dim); + } + } + bool matched() const { + return (!attribute.empty() && !query.empty() && (attribute_dim == query_dim)); + } +}; + +struct MaxReduceProdJoinReplacerImpl : ExpressionReplacer { + Blueprint::UP proto; + MaxReduceProdJoinReplacerImpl(Blueprint::UP proto_in) + : proto(std::move(proto_in)) {} + IntrinsicExpression::UP maybe_replace(const Function &function, + const IIndexEnvironment &env) const override + { + vespalib::string reduce_dim; + if (match_function(function, reduce_dim)) { + MatchInputs match_inputs; + match_inputs.process(function.param_name(0)); + match_inputs.process(function.param_name(1)); + if (match_inputs.matched() && (reduce_dim.empty() || (reduce_dim == match_inputs.attribute_dim))) { + return IntrinsicBlueprintAdapter::try_create(*proto, env, {match_inputs.attribute, match_inputs.query}); + } + } + return IntrinsicExpression::UP(nullptr); + } +}; + +} // namespace search::features:: + +ExpressionReplacer::UP +MaxReduceProdJoinReplacer::create(Blueprint::UP proto) +{ + return std::make_unique(std::move(proto)); +} + +} // namespace search::features diff --git a/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.h b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.h new file mode 100644 index 00000000000..e0d39fe93e0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.h @@ -0,0 +1,33 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "internal_max_reduce_prod_join_feature.h" +#include + +namespace search::features { + +/** + * ExpressionReplacer that will replacing expressions on the form: + * + * reduce( + * join( + * tensorFromLabels(attribute(A), dim), + * tensorFromWeightedset(query(Q), dim), + * f(x,y)(x*y) + * ), + * max + * ) + * + * With a parameterized (A, Q) adaption of the given blueprint + * (default: InternalMaxReduceProdJoinBlueprint). + **/ +struct MaxReduceProdJoinReplacer { + using ExpressionReplacer = rankingexpression::ExpressionReplacer; + static ExpressionReplacer::UP create(fef::Blueprint::UP proto); + static ExpressionReplacer::UP create() { + return create(std::make_unique()); + } +}; + +} // namespace search::features -- cgit v1.2.3