summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2019-10-04 14:15:47 +0000
committerHåvard Pettersen <havardpe@oath.com>2019-10-04 14:15:47 +0000
commit6b54f52fbaf2a108818033f6060d74e77618961d (patch)
tree18c8fc25cdf5a48a176ca1a350eebb7f782e08f4 /searchlib
parent1c0f26aa1793c2fcfee88bc95220e3cd63db2b8c (diff)
enable use of fast forest gbdt evaluation for ranking
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/fef/properties/properties_test.cpp8
-rw-r--r--searchlib/src/tests/fef/rank_program/rank_program_test.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp70
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h2
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h7
-rw-r--r--searchlib/src/vespa/searchlib/fef/rank_program.h1
7 files changed, 110 insertions, 13 deletions
diff --git a/searchlib/src/tests/fef/properties/properties_test.cpp b/searchlib/src/tests/fef/properties/properties_test.cpp
index df868de3a97..b7478da3f71 100644
--- a/searchlib/src/tests/fef/properties/properties_test.cpp
+++ b/searchlib/src/tests/fef/properties/properties_test.cpp
@@ -226,6 +226,14 @@ TEST("test stuff") {
EXPECT_TRUE(!eval::LazyExpressions::check(p, true));
EXPECT_TRUE(!eval::LazyExpressions::check(p, false));
}
+ { // vespa.eval.use_fast_forest
+ EXPECT_EQUAL(eval::UseFastForest::NAME, vespalib::string("vespa.eval.use_fast_forest"));
+ EXPECT_EQUAL(eval::UseFastForest::DEFAULT_VALUE, false);
+ Properties p;
+ EXPECT_EQUAL(eval::UseFastForest::check(p), false);
+ p.add("vespa.eval.use_fast_forest", "true");
+ EXPECT_EQUAL(eval::UseFastForest::check(p), true);
+ }
{ // vespa.rank.firstphase
EXPECT_EQUAL(rank::FirstPhase::NAME, vespalib::string("vespa.rank.firstphase"));
EXPECT_EQUAL(rank::FirstPhase::DEFAULT_VALUE, vespalib::string("nativeRank"));
diff --git a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp
index 7e28178e5f7..d1b0f8112f3 100644
--- a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp
+++ b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp
@@ -90,6 +90,10 @@ struct Fixture {
value ? "true" : "false");
return *this;
}
+ Fixture &use_fast_forest() {
+ indexEnv.getProperties().add(indexproperties::eval::UseFastForest::NAME, "true");
+ return *this;
+ }
Fixture &add_expr(const vespalib::string &name, const vespalib::string &expr) {
vespalib::string feature_name = expr_feature(name);
vespalib::string expr_name = feature_name + ".rankingScript";
@@ -113,6 +117,11 @@ struct Fixture {
program.setup(*match_data, queryEnv, overrides);
return *this;
}
+ vespalib::string final_executor_name() const {
+ size_t n = program.num_executors();
+ ASSERT_TRUE(n > 0);
+ return program.get_executor(n-1).getClassName();
+ }
double get(uint32_t docid = default_docid) {
auto result = program.get_seeds();
EXPECT_EQUAL(1u, result.num_features());
@@ -360,4 +369,26 @@ TEST_F("require that interpreted ranking expressions are pure", Fixture()) {
EXPECT_EQUAL(f1.get(), 7.0);
}
+const vespalib::string tree_expr = "if(value(1)<2,1,2)+if(value(2)<1,10,20)";
+
+TEST_F("require that fast-forest gbdt evaluation can be enabled", Fixture()) {
+ f1.use_fast_forest().add_expr("rank", tree_expr).compile();
+ EXPECT_EQUAL(f1.get(), 21.0);
+ EXPECT_EQUAL(f1.final_executor_name(), "search::features::FastForestExecutor");
+}
+
+TEST_F("require that fast-forest gbdt evaluation is disabled by default", Fixture()) {
+ f1.add_expr("rank", tree_expr).compile();
+ EXPECT_EQUAL(f1.get(), 21.0);
+ EXPECT_EQUAL(f1.final_executor_name(), "search::features::CompiledRankingExpressionExecutor");
+}
+
+TEST_F("require that fast-forest gbdt evaluation is pure", Fixture()) {
+ f1.use_fast_forest().add_expr("rank", tree_expr).compile();
+ EXPECT_EQUAL(3u, count_features(f1.program));
+ EXPECT_EQUAL(3u, count_const_features(f1.program));
+ EXPECT_EQUAL(f1.get(), 21.0);
+ EXPECT_EQUAL(f1.final_executor_name(), "search::features::FastForestExecutor");
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp
index 2733ec62105..a4b2280fa57 100644
--- a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp
@@ -11,20 +11,21 @@
#include <vespa/log/log.h>
LOG_SETUP(".features.rankingexpression");
-using vespalib::eval::Function;
-using vespalib::eval::PassParams;
+using search::fef::FeatureType;
+using vespalib::ArrayRef;
+using vespalib::ConstArrayRef;
using vespalib::eval::CompileCache;
using vespalib::eval::CompiledFunction;
+using vespalib::eval::DoubleValue;
+using vespalib::eval::Function;
using vespalib::eval::InterpretedFunction;
using vespalib::eval::LazyParams;
-using vespalib::eval::ValueType;
-using vespalib::eval::Value;
-using vespalib::eval::DoubleValue;
using vespalib::eval::NodeTypes;
+using vespalib::eval::PassParams;
+using vespalib::eval::Value;
+using vespalib::eval::ValueType;
+using vespalib::eval::gbdt::FastForest;
using vespalib::tensor::DefaultTensorEngine;
-using search::fef::FeatureType;
-using vespalib::ArrayRef;
-using vespalib::ConstArrayRef;
namespace search::features {
@@ -43,6 +44,23 @@ vespalib::string list_issues(const std::vector<vespalib::string> &issues) {
//-----------------------------------------------------------------------------
/**
+ * Implements the executor for fast forest gbdt evaluation
+ **/
+class FastForestExecutor : public fef::FeatureExecutor
+{
+private:
+ const FastForest &_forest;
+ FastForest::Context _ctx;
+
+public:
+ FastForestExecutor(const FastForest &forest);
+ bool isPure() override { return true; }
+ void execute(uint32_t docId) override;
+};
+
+//-----------------------------------------------------------------------------
+
+/**
* Implements the executor for compiled ranking expressions
**/
class CompiledRankingExpressionExecutor : public fef::FeatureExecutor
@@ -110,6 +128,22 @@ public:
//-----------------------------------------------------------------------------
+FastForestExecutor::FastForestExecutor(const FastForest &forest)
+ : _forest(forest),
+ _ctx(_forest)
+{
+}
+
+void
+FastForestExecutor::execute(uint32_t)
+{
+ const auto &params = inputs();
+ double result = _forest.eval(_ctx, [&params](size_t p){ return params.get_number(p); });
+ outputs().set_number(0, result);
+}
+
+//-----------------------------------------------------------------------------
+
CompiledRankingExpressionExecutor::CompiledRankingExpressionExecutor(const CompiledFunction &compiled_function)
: _ranking_function(compiled_function.get_function()),
_params(compiled_function.num_params(), 0.0)
@@ -178,6 +212,7 @@ RankingExpressionBlueprint::RankingExpressionBlueprint(rankingexpression::Expres
: fef::Blueprint("rankingExpression"),
_expression_replacer(std::move(replacer)),
_intrinsic_expression(),
+ _fast_forest(),
_interpreted_function(),
_compile_token(),
_input_is_object()
@@ -259,11 +294,17 @@ RankingExpressionBlueprint::setup(const fef::IIndexEnvironment &env,
// avoid costly compilation when only verifying setup
if (env.getFeatureMotivation() != env.FeatureMotivation::VERIFY_SETUP) {
if (do_compile) {
- bool suggest_lazy = CompiledFunction::should_use_lazy_params(rank_function);
- if (fef::indexproperties::eval::LazyExpressions::check(env.getProperties(), suggest_lazy)) {
- _compile_token = CompileCache::compile(rank_function, PassParams::LAZY);
- } else {
- _compile_token = CompileCache::compile(rank_function, PassParams::ARRAY);
+ // fast forest evaluation is a possible replacement for compiled tree models
+ if (fef::indexproperties::eval::UseFastForest::check(env.getProperties())) {
+ _fast_forest = FastForest::try_convert(rank_function);
+ }
+ if (!_fast_forest) {
+ bool suggest_lazy = CompiledFunction::should_use_lazy_params(rank_function);
+ if (fef::indexproperties::eval::LazyExpressions::check(env.getProperties(), suggest_lazy)) {
+ _compile_token = CompileCache::compile(rank_function, PassParams::LAZY);
+ } else {
+ _compile_token = CompileCache::compile(rank_function, PassParams::ARRAY);
+ }
}
} else {
_interpreted_function.reset(new InterpretedFunction(DefaultTensorEngine::ref(), rank_function, node_types));
@@ -300,6 +341,9 @@ RankingExpressionBlueprint::createExecutor(const fef::IQueryEnvironment &env, ve
ConstArrayRef<char> input_is_object = stash.copy_array<char>(_input_is_object);
return stash.create<InterpretedRankingExpressionExecutor>(*_interpreted_function, input_is_object);
}
+ if (_fast_forest) {
+ return stash.create<FastForestExecutor>(*_fast_forest);
+ }
assert(_compile_token.get() != nullptr); // will be nullptr for VERIFY_SETUP feature motivation
if (_compile_token->get().pass_params() == PassParams::ARRAY) {
return stash.create<CompiledRankingExpressionExecutor>(_compile_token->get());
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h
index 104e8d63a70..579c8cf91a7 100644
--- a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h
+++ b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h
@@ -2,6 +2,7 @@
#pragma once
#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/eval/eval/fast_forest.h>
#include <vespa/eval/eval/interpreted_function.h>
#include <vespa/eval/eval/llvm/compile_cache.h>
#include <vespa/searchlib/features/rankingexpression/expression_replacer.h>
@@ -19,6 +20,7 @@ class RankingExpressionBlueprint : public fef::Blueprint
private:
rankingexpression::ExpressionReplacer::SP _expression_replacer;
rankingexpression::IntrinsicExpression::UP _intrinsic_expression;
+ vespalib::eval::gbdt::FastForest::UP _fast_forest;
vespalib::eval::InterpretedFunction::UP _interpreted_function;
vespalib::eval::CompileCache::Token::UP _compile_token;
std::vector<char> _input_is_object;
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index a7df39faf2f..ce1bd69cc4c 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -84,6 +84,10 @@ LazyExpressions::check(const Properties &props, bool default_value)
return lookupBool(props, NAME, default_value);
}
+const vespalib::string UseFastForest::NAME("vespa.eval.use_fast_forest");
+const bool UseFastForest::DEFAULT_VALUE(false);
+bool UseFastForest::check(const Properties &props) { return lookupBool(props, NAME, DEFAULT_VALUE); }
+
} // namespace eval
namespace rank {
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index 9adf4487ec5..57aa24222a3 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -26,6 +26,13 @@ struct LazyExpressions {
static bool check(const Properties &props, bool default_value);
};
+// use fast-forest evaluation for gbdt expressions. affects rank/summary/dump
+struct UseFastForest {
+ static const vespalib::string NAME;
+ static const bool DEFAULT_VALUE;
+ static bool check(const Properties &props);
+};
+
} // namespace eval
namespace rank {
diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.h b/searchlib/src/vespa/searchlib/fef/rank_program.h
index 3a92fc874a4..e1014df5ee5 100644
--- a/searchlib/src/vespa/searchlib/fef/rank_program.h
+++ b/searchlib/src/vespa/searchlib/fef/rank_program.h
@@ -59,6 +59,7 @@ public:
~RankProgram();
size_t num_executors() const { return _executors.size(); }
+ const FeatureExecutor &get_executor(size_t i) const { return *_executors[i]; }
/**
* Set up this rank program by creating the needed feature