aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2017-08-22 13:31:02 +0000
committerHåvard Pettersen <havardpe@oath.com>2017-08-23 13:17:53 +0000
commita4e3de1bc975ba00ce38c9e4909bff5bef51156f (patch)
treeba42543fe83973a82b491675509c1728eb6dc360 /searchlib
parentc9714fd5f0da37b73c075b81da8fe55c007afcc8 (diff)
max reduce prod join replacer with test
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt5
-rw-r--r--searchlib/src/tests/features/max_reduce_prod_join_replacer/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/max_reduce_prod_join_replacer/max_reduce_prod_join_replacer_test.cpp103
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt3
-rw-r--r--searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.cpp151
-rw-r--r--searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.h33
6 files changed, 300 insertions, 3 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 0bd70592c3d..4158343af22 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -81,6 +81,8 @@ vespa_define_module(
src/tests/attribute/enumstore
src/tests/attribute/extendattributes
src/tests/attribute/guard
+ src/tests/attribute/imported_attribute_vector
+ src/tests/attribute/imported_search_context
src/tests/attribute/multi_value_mapping
src/tests/attribute/postinglist
src/tests/attribute/postinglistattribute
@@ -90,8 +92,6 @@ vespa_define_module(
src/tests/attribute/sourceselector
src/tests/attribute/stringattribute
src/tests/attribute/tensorattribute
- src/tests/attribute/imported_attribute_vector
- src/tests/attribute/imported_search_context
src/tests/bitcompression/expgolomb
src/tests/bitvector
src/tests/btree
@@ -133,6 +133,7 @@ vespa_define_module(
src/tests/features/imported_dot_product
src/tests/features/internal_max_reduce_prod_join_feature
src/tests/features/item_raw_score
+ src/tests/features/max_reduce_prod_join_replacer
src/tests/features/native_dot_product
src/tests/features/ranking_expression
src/tests/features/raw_score
diff --git a/searchlib/src/tests/features/max_reduce_prod_join_replacer/CMakeLists.txt b/searchlib/src/tests/features/max_reduce_prod_join_replacer/CMakeLists.txt
new file mode 100644
index 00000000000..5a1ac74aed0
--- /dev/null
+++ b/searchlib/src/tests/features/max_reduce_prod_join_replacer/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_max_reduce_prod_join_replacer_test_app TEST
+ SOURCES
+ max_reduce_prod_join_replacer_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_max_reduce_prod_join_replacer_test_app COMMAND searchlib_max_reduce_prod_join_replacer_test_app)
diff --git a/searchlib/src/tests/features/max_reduce_prod_join_replacer/max_reduce_prod_join_replacer_test.cpp b/searchlib/src/tests/features/max_reduce_prod_join_replacer/max_reduce_prod_join_replacer_test.cpp
new file mode 100644
index 00000000000..1c6c224cc79
--- /dev/null
+++ b/searchlib/src/tests/features/max_reduce_prod_join_replacer/max_reduce_prod_join_replacer_test.cpp
@@ -0,0 +1,103 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include <vespa/eval/eval/function.h>
+#include <vespa/searchlib/features/max_reduce_prod_join_replacer.h>
+#include <vespa/searchlib/features/rankingexpression/feature_name_extractor.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/blueprint.h>
+
+using search::features::MaxReduceProdJoinReplacer;
+using search::features::rankingexpression::ExpressionReplacer;
+using search::features::rankingexpression::FeatureNameExtractor;
+using search::fef::Blueprint;
+using search::fef::FeatureExecutor;
+using search::fef::FeatureType;
+using search::fef::IDumpFeatureVisitor;
+using search::fef::IIndexEnvironment;
+using search::fef::IQueryEnvironment;
+using search::fef::test::IndexEnvironment;
+using vespalib::Stash;
+using vespalib::eval::Function;
+
+struct MyBlueprint : Blueprint {
+ bool &was_used;
+ MyBlueprint(bool &was_used_out) : Blueprint("my_bp"), was_used(was_used_out) {}
+ void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const override {}
+ Blueprint::UP createInstance() const override { return std::make_unique<MyBlueprint>(was_used); }
+ bool setup(const IIndexEnvironment &, const std::vector<vespalib::string> &params) override {
+ EXPECT_EQUAL(getName(), "my_bp(foo,bar)");
+ ASSERT_TRUE(params.size() == 2);
+ EXPECT_EQUAL(params[0], "foo");
+ EXPECT_EQUAL(params[1], "bar");
+ describeOutput("out", "my output", FeatureType::number());
+ was_used = true;
+ return true;
+ }
+ FeatureExecutor &createExecutor(const IQueryEnvironment &, vespalib::Stash &) const override {
+ abort();
+ }
+};
+
+bool replaced(const vespalib::string &expr) {
+ bool was_used = false;
+ ExpressionReplacer::UP replacer = MaxReduceProdJoinReplacer::create(std::make_unique<MyBlueprint>(was_used));
+ Function rank_function = Function::parse(expr, FeatureNameExtractor());
+ if (!EXPECT_TRUE(!rank_function.has_error())) {
+ fprintf(stderr, "parse error: %s\n", rank_function.dump().c_str());
+ }
+ auto result = replacer->maybe_replace(rank_function, IndexEnvironment());
+ EXPECT_EQUAL(bool(result), was_used);
+ return was_used;
+}
+
+TEST("require that matching expression with appropriate inputs is replaced") {
+ EXPECT_TRUE(replaced("reduce(tensorFromLabels(attribute(foo),dim)*tensorFromWeightedSet(query(bar),dim),max)"));
+}
+
+TEST("require that matching expression with unrelated inputs is not replaced") {
+ EXPECT_TRUE(!replaced("reduce(foo*bar,max)"));
+}
+
+TEST("require that input feature parameter lists have flexible matching") {
+ EXPECT_TRUE(replaced("reduce(tensorFromLabels( attribute ( foo ) , dim )*tensorFromWeightedSet( query ( bar ) , dim ),max)"));
+}
+
+TEST("require that reduce dimension can be specified explicitly") {
+ EXPECT_TRUE(replaced("reduce(tensorFromLabels(attribute(foo),dim)*tensorFromWeightedSet(query(bar),dim),max,dim)"));
+}
+
+TEST("require that expression using tensor join with lambda can also be replaced") {
+ EXPECT_TRUE(replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(x*y)),max)"));
+}
+
+TEST("require that parameter ordering does not matter") {
+ EXPECT_TRUE(replaced("reduce(tensorFromWeightedSet(query(bar),dim)*tensorFromLabels(attribute(foo),dim),max)"));
+ EXPECT_TRUE(replaced("reduce(join(tensorFromWeightedSet(query(bar),dim),tensorFromLabels(attribute(foo),dim),f(x,y)(x*y)),max)"));
+ EXPECT_TRUE(replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(y*x)),max)"));
+}
+
+TEST("require that source specifiers must match") {
+ EXPECT_TRUE(!replaced("reduce(tensorFromLabels(query(foo),dim)*tensorFromWeightedSet(attribute(bar),dim),max)"));
+}
+
+TEST("require that reduce operation must match") {
+ EXPECT_TRUE(!replaced("reduce(tensorFromLabels(attribute(foo),dim)*tensorFromWeightedSet(query(bar),dim),min)"));
+}
+
+TEST("require that join operation must match") {
+ EXPECT_TRUE(!replaced("reduce(tensorFromLabels(attribute(foo),dim)+tensorFromWeightedSet(query(bar),dim),max)"));
+ EXPECT_TRUE(!replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(x+y)),max)"));
+ EXPECT_TRUE(!replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(x*x)),max)"));
+ EXPECT_TRUE(!replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(y*y)),max)"));
+ EXPECT_TRUE(!replaced("reduce(join(tensorFromLabels(attribute(foo),dim),tensorFromWeightedSet(query(bar),dim),f(x,y)(x*y*1)),max)"));
+}
+
+TEST("require that reduce dimension must match") {
+ EXPECT_TRUE(!replaced("reduce(tensorFromLabels(attribute(foo),x)*tensorFromWeightedSet(query(bar),x),max,y)"));
+ EXPECT_TRUE(!replaced("reduce(tensorFromLabels(attribute(foo),x)*tensorFromWeightedSet(query(bar),y),max)"));
+ EXPECT_TRUE(!replaced("reduce(tensorFromLabels(attribute(foo),x)*tensorFromWeightedSet(query(bar),x),max,x,y)"));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index 9227b845966..bd847fe35b5 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -31,6 +31,7 @@ vespa_add_library(searchlib_features OBJECT
matchcountfeature.cpp
matchesfeature.cpp
matchfeature.cpp
+ max_reduce_prod_join_replacer.cpp
native_dot_product_feature.cpp
nativeattributematchfeature.cpp
nativefieldmatchfeature.cpp
@@ -42,8 +43,8 @@ vespa_add_library(searchlib_features OBJECT
queryfeature.cpp
queryterm.cpp
querytermcountfeature.cpp
- randomfeature.cpp
random_normal_feature.cpp
+ randomfeature.cpp
rankingexpressionfeature.cpp
raw_score_feature.cpp
reverseproximityfeature.cpp
diff --git a/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.cpp b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.cpp
new file mode 100644
index 00000000000..f69bc064327
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.cpp
@@ -0,0 +1,151 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "max_reduce_prod_join_replacer.h"
+#include <vespa/eval/eval/function.h>
+#include <vespa/eval/eval/basic_nodes.h>
+#include <vespa/eval/eval/operator_nodes.h>
+#include <vespa/eval/eval/tensor_nodes.h>
+#include <vespa/searchlib/features/rankingexpression/intrinsic_blueprint_adapter.h>
+#include <vespa/searchlib/fef/featurenameparser.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".features.max_reduce_prod_join_replacer");
+
+namespace search::features {
+
+using fef::Blueprint;
+using fef::FeatureNameParser;
+using fef::IIndexEnvironment;
+using rankingexpression::ExpressionReplacer;
+using rankingexpression::IntrinsicBlueprintAdapter;
+using rankingexpression::IntrinsicExpression;
+using vespalib::eval::Aggr;
+using vespalib::eval::Function;
+using vespalib::eval::nodes::Mul;
+using vespalib::eval::nodes::Node;
+using vespalib::eval::nodes::Symbol;
+using vespalib::eval::nodes::TensorJoin;
+using vespalib::eval::nodes::TensorReduce;
+using vespalib::eval::nodes::as;
+
+namespace {
+
+bool match_params(const Node &a, const Node &b) {
+ bool first = false;
+ bool second = false;
+ for (int i = 0; i < 2; ++i) {
+ const Node &node = (i == 0) ? a : b;
+ if (auto symbol = as<Symbol>(node)) {
+ if (symbol->id() == 0) {
+ first = true;
+ } else if (symbol->id() == 1) {
+ second = true;
+ }
+ }
+ }
+ return (first && second);
+};
+
+bool match_prod_join(const Node &node) {
+ if (auto join = as<TensorJoin>(node)) {
+ const Node &root = join->lambda().root();
+ if (as<Mul>(root)) {
+ return match_params(root.get_child(0), root.get_child(1));
+ }
+ }
+ return false;
+}
+
+bool match_max_reduce(const Node &node, vespalib::string &reduce_dim) {
+ auto reduce = as<TensorReduce>(node);
+ if (!reduce || (reduce->aggr() != Aggr::MAX) || (reduce->dimensions().size() > 1)) {
+ return false;
+ }
+ if (reduce->dimensions().size() == 1) {
+ reduce_dim = reduce->dimensions()[0];
+ }
+ return true;
+}
+
+bool match_function(const Function &function, vespalib::string &reduce_dim) {
+ const Node &expect_max = function.root();
+ if ((function.num_params() == 2) && match_max_reduce(expect_max, reduce_dim)) {
+ const Node &expect_mul = expect_max.get_child(0);
+ if (as<Mul>(expect_mul) || match_prod_join(expect_mul)) {
+ return match_params(expect_mul.get_child(0), expect_mul.get_child(1));
+ }
+ }
+ return false;
+}
+
+void try_extract_param(const vespalib::string &feature, const vespalib::string &wanted_wrapper,
+ vespalib::string &param, vespalib::string &dim)
+{
+ FeatureNameParser parser(feature);
+ if (parser.valid() &&
+ (parser.parameters().size() >= 1) &&
+ (parser.parameters().size() <= 2))
+ {
+ vespalib::string wrapper;
+ vespalib::string body;
+ vespalib::string error;
+ if (Function::unwrap(parser.parameters()[0], wrapper, body, error) &&
+ (wrapper == wanted_wrapper))
+ {
+ param = body;
+ if (parser.parameters().size() == 2) {
+ dim = parser.parameters()[1];
+ } else {
+ dim = param;
+ }
+ }
+ }
+}
+
+struct MatchInputs {
+ vespalib::string attribute;
+ vespalib::string attribute_dim;
+ vespalib::string query;
+ vespalib::string query_dim;
+ MatchInputs() : attribute(), attribute_dim(), query(), query_dim() {}
+ void process(const vespalib::string &param) {
+ if (starts_with(param, "tensorFromLabels")) {
+ try_extract_param(param, "attribute", attribute, attribute_dim);
+ } else if (starts_with(param, "tensorFromWeightedSet")) {
+ try_extract_param(param, "query", query, query_dim);
+ }
+ }
+ bool matched() const {
+ return (!attribute.empty() && !query.empty() && (attribute_dim == query_dim));
+ }
+};
+
+struct MaxReduceProdJoinReplacerImpl : ExpressionReplacer {
+ Blueprint::UP proto;
+ MaxReduceProdJoinReplacerImpl(Blueprint::UP proto_in)
+ : proto(std::move(proto_in)) {}
+ IntrinsicExpression::UP maybe_replace(const Function &function,
+ const IIndexEnvironment &env) const override
+ {
+ vespalib::string reduce_dim;
+ if (match_function(function, reduce_dim)) {
+ MatchInputs match_inputs;
+ match_inputs.process(function.param_name(0));
+ match_inputs.process(function.param_name(1));
+ if (match_inputs.matched() && (reduce_dim.empty() || (reduce_dim == match_inputs.attribute_dim))) {
+ return IntrinsicBlueprintAdapter::try_create(*proto, env, {match_inputs.attribute, match_inputs.query});
+ }
+ }
+ return IntrinsicExpression::UP(nullptr);
+ }
+};
+
+} // namespace search::features::<unnamed>
+
+ExpressionReplacer::UP
+MaxReduceProdJoinReplacer::create(Blueprint::UP proto)
+{
+ return std::make_unique<MaxReduceProdJoinReplacerImpl>(std::move(proto));
+}
+
+} // namespace search::features
diff --git a/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.h b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.h
new file mode 100644
index 00000000000..e0d39fe93e0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/max_reduce_prod_join_replacer.h
@@ -0,0 +1,33 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "internal_max_reduce_prod_join_feature.h"
+#include <vespa/searchlib/features/rankingexpression/expression_replacer.h>
+
+namespace search::features {
+
+/**
+ * ExpressionReplacer that will replacing expressions on the form:
+ *
+ * reduce(
+ * join(
+ * tensorFromLabels(attribute(A), dim),
+ * tensorFromWeightedset(query(Q), dim),
+ * f(x,y)(x*y)
+ * ),
+ * max
+ * )
+ *
+ * With a parameterized (A, Q) adaption of the given blueprint
+ * (default: InternalMaxReduceProdJoinBlueprint).
+ **/
+struct MaxReduceProdJoinReplacer {
+ using ExpressionReplacer = rankingexpression::ExpressionReplacer;
+ static ExpressionReplacer::UP create(fef::Blueprint::UP proto);
+ static ExpressionReplacer::UP create() {
+ return create(std::make_unique<InternalMaxReduceProdJoinBlueprint>());
+ }
+};
+
+} // namespace search::features