diff options
author | Håvard Pettersen <havardpe@oath.com> | 2019-10-04 14:15:47 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2019-10-04 14:15:47 +0000 |
commit | 6b54f52fbaf2a108818033f6060d74e77618961d (patch) | |
tree | 18c8fc25cdf5a48a176ca1a350eebb7f782e08f4 /searchlib/src | |
parent | 1c0f26aa1793c2fcfee88bc95220e3cd63db2b8c (diff) |
enable use of fast forest gbdt evaluation for ranking
Diffstat (limited to 'searchlib/src')
7 files changed, 110 insertions, 13 deletions
diff --git a/searchlib/src/tests/fef/properties/properties_test.cpp b/searchlib/src/tests/fef/properties/properties_test.cpp index df868de3a97..b7478da3f71 100644 --- a/searchlib/src/tests/fef/properties/properties_test.cpp +++ b/searchlib/src/tests/fef/properties/properties_test.cpp @@ -226,6 +226,14 @@ TEST("test stuff") { EXPECT_TRUE(!eval::LazyExpressions::check(p, true)); EXPECT_TRUE(!eval::LazyExpressions::check(p, false)); } + { // vespa.eval.use_fast_forest + EXPECT_EQUAL(eval::UseFastForest::NAME, vespalib::string("vespa.eval.use_fast_forest")); + EXPECT_EQUAL(eval::UseFastForest::DEFAULT_VALUE, false); + Properties p; + EXPECT_EQUAL(eval::UseFastForest::check(p), false); + p.add("vespa.eval.use_fast_forest", "true"); + EXPECT_EQUAL(eval::UseFastForest::check(p), true); + } { // vespa.rank.firstphase EXPECT_EQUAL(rank::FirstPhase::NAME, vespalib::string("vespa.rank.firstphase")); EXPECT_EQUAL(rank::FirstPhase::DEFAULT_VALUE, vespalib::string("nativeRank")); diff --git a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp index 7e28178e5f7..d1b0f8112f3 100644 --- a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp +++ b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp @@ -90,6 +90,10 @@ struct Fixture { value ? "true" : "false"); return *this; } + Fixture &use_fast_forest() { + indexEnv.getProperties().add(indexproperties::eval::UseFastForest::NAME, "true"); + return *this; + } Fixture &add_expr(const vespalib::string &name, const vespalib::string &expr) { vespalib::string feature_name = expr_feature(name); vespalib::string expr_name = feature_name + ".rankingScript"; @@ -113,6 +117,11 @@ struct Fixture { program.setup(*match_data, queryEnv, overrides); return *this; } + vespalib::string final_executor_name() const { + size_t n = program.num_executors(); + ASSERT_TRUE(n > 0); + return program.get_executor(n-1).getClassName(); + } double get(uint32_t docid = default_docid) { auto result = program.get_seeds(); EXPECT_EQUAL(1u, result.num_features()); @@ -360,4 +369,26 @@ TEST_F("require that interpreted ranking expressions are pure", Fixture()) { EXPECT_EQUAL(f1.get(), 7.0); } +const vespalib::string tree_expr = "if(value(1)<2,1,2)+if(value(2)<1,10,20)"; + +TEST_F("require that fast-forest gbdt evaluation can be enabled", Fixture()) { + f1.use_fast_forest().add_expr("rank", tree_expr).compile(); + EXPECT_EQUAL(f1.get(), 21.0); + EXPECT_EQUAL(f1.final_executor_name(), "search::features::FastForestExecutor"); +} + +TEST_F("require that fast-forest gbdt evaluation is disabled by default", Fixture()) { + f1.add_expr("rank", tree_expr).compile(); + EXPECT_EQUAL(f1.get(), 21.0); + EXPECT_EQUAL(f1.final_executor_name(), "search::features::CompiledRankingExpressionExecutor"); +} + +TEST_F("require that fast-forest gbdt evaluation is pure", Fixture()) { + f1.use_fast_forest().add_expr("rank", tree_expr).compile(); + EXPECT_EQUAL(3u, count_features(f1.program)); + EXPECT_EQUAL(3u, count_const_features(f1.program)); + EXPECT_EQUAL(f1.get(), 21.0); + EXPECT_EQUAL(f1.final_executor_name(), "search::features::FastForestExecutor"); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp index 2733ec62105..a4b2280fa57 100644 --- a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp @@ -11,20 +11,21 @@ #include <vespa/log/log.h> LOG_SETUP(".features.rankingexpression"); -using vespalib::eval::Function; -using vespalib::eval::PassParams; +using search::fef::FeatureType; +using vespalib::ArrayRef; +using vespalib::ConstArrayRef; using vespalib::eval::CompileCache; using vespalib::eval::CompiledFunction; +using vespalib::eval::DoubleValue; +using vespalib::eval::Function; using vespalib::eval::InterpretedFunction; using vespalib::eval::LazyParams; -using vespalib::eval::ValueType; -using vespalib::eval::Value; -using vespalib::eval::DoubleValue; using vespalib::eval::NodeTypes; +using vespalib::eval::PassParams; +using vespalib::eval::Value; +using vespalib::eval::ValueType; +using vespalib::eval::gbdt::FastForest; using vespalib::tensor::DefaultTensorEngine; -using search::fef::FeatureType; -using vespalib::ArrayRef; -using vespalib::ConstArrayRef; namespace search::features { @@ -43,6 +44,23 @@ vespalib::string list_issues(const std::vector<vespalib::string> &issues) { //----------------------------------------------------------------------------- /** + * Implements the executor for fast forest gbdt evaluation + **/ +class FastForestExecutor : public fef::FeatureExecutor +{ +private: + const FastForest &_forest; + FastForest::Context _ctx; + +public: + FastForestExecutor(const FastForest &forest); + bool isPure() override { return true; } + void execute(uint32_t docId) override; +}; + +//----------------------------------------------------------------------------- + +/** * Implements the executor for compiled ranking expressions **/ class CompiledRankingExpressionExecutor : public fef::FeatureExecutor @@ -110,6 +128,22 @@ public: //----------------------------------------------------------------------------- +FastForestExecutor::FastForestExecutor(const FastForest &forest) + : _forest(forest), + _ctx(_forest) +{ +} + +void +FastForestExecutor::execute(uint32_t) +{ + const auto ¶ms = inputs(); + double result = _forest.eval(_ctx, [¶ms](size_t p){ return params.get_number(p); }); + outputs().set_number(0, result); +} + +//----------------------------------------------------------------------------- + CompiledRankingExpressionExecutor::CompiledRankingExpressionExecutor(const CompiledFunction &compiled_function) : _ranking_function(compiled_function.get_function()), _params(compiled_function.num_params(), 0.0) @@ -178,6 +212,7 @@ RankingExpressionBlueprint::RankingExpressionBlueprint(rankingexpression::Expres : fef::Blueprint("rankingExpression"), _expression_replacer(std::move(replacer)), _intrinsic_expression(), + _fast_forest(), _interpreted_function(), _compile_token(), _input_is_object() @@ -259,11 +294,17 @@ RankingExpressionBlueprint::setup(const fef::IIndexEnvironment &env, // avoid costly compilation when only verifying setup if (env.getFeatureMotivation() != env.FeatureMotivation::VERIFY_SETUP) { if (do_compile) { - bool suggest_lazy = CompiledFunction::should_use_lazy_params(rank_function); - if (fef::indexproperties::eval::LazyExpressions::check(env.getProperties(), suggest_lazy)) { - _compile_token = CompileCache::compile(rank_function, PassParams::LAZY); - } else { - _compile_token = CompileCache::compile(rank_function, PassParams::ARRAY); + // fast forest evaluation is a possible replacement for compiled tree models + if (fef::indexproperties::eval::UseFastForest::check(env.getProperties())) { + _fast_forest = FastForest::try_convert(rank_function); + } + if (!_fast_forest) { + bool suggest_lazy = CompiledFunction::should_use_lazy_params(rank_function); + if (fef::indexproperties::eval::LazyExpressions::check(env.getProperties(), suggest_lazy)) { + _compile_token = CompileCache::compile(rank_function, PassParams::LAZY); + } else { + _compile_token = CompileCache::compile(rank_function, PassParams::ARRAY); + } } } else { _interpreted_function.reset(new InterpretedFunction(DefaultTensorEngine::ref(), rank_function, node_types)); @@ -300,6 +341,9 @@ RankingExpressionBlueprint::createExecutor(const fef::IQueryEnvironment &env, ve ConstArrayRef<char> input_is_object = stash.copy_array<char>(_input_is_object); return stash.create<InterpretedRankingExpressionExecutor>(*_interpreted_function, input_is_object); } + if (_fast_forest) { + return stash.create<FastForestExecutor>(*_fast_forest); + } assert(_compile_token.get() != nullptr); // will be nullptr for VERIFY_SETUP feature motivation if (_compile_token->get().pass_params() == PassParams::ARRAY) { return stash.create<CompiledRankingExpressionExecutor>(_compile_token->get()); diff --git a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h index 104e8d63a70..579c8cf91a7 100644 --- a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h +++ b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h @@ -2,6 +2,7 @@ #pragma once #include <vespa/searchlib/fef/blueprint.h> +#include <vespa/eval/eval/fast_forest.h> #include <vespa/eval/eval/interpreted_function.h> #include <vespa/eval/eval/llvm/compile_cache.h> #include <vespa/searchlib/features/rankingexpression/expression_replacer.h> @@ -19,6 +20,7 @@ class RankingExpressionBlueprint : public fef::Blueprint private: rankingexpression::ExpressionReplacer::SP _expression_replacer; rankingexpression::IntrinsicExpression::UP _intrinsic_expression; + vespalib::eval::gbdt::FastForest::UP _fast_forest; vespalib::eval::InterpretedFunction::UP _interpreted_function; vespalib::eval::CompileCache::Token::UP _compile_token; std::vector<char> _input_is_object; diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index a7df39faf2f..ce1bd69cc4c 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -84,6 +84,10 @@ LazyExpressions::check(const Properties &props, bool default_value) return lookupBool(props, NAME, default_value); } +const vespalib::string UseFastForest::NAME("vespa.eval.use_fast_forest"); +const bool UseFastForest::DEFAULT_VALUE(false); +bool UseFastForest::check(const Properties &props) { return lookupBool(props, NAME, DEFAULT_VALUE); } + } // namespace eval namespace rank { diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 9adf4487ec5..57aa24222a3 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -26,6 +26,13 @@ struct LazyExpressions { static bool check(const Properties &props, bool default_value); }; +// use fast-forest evaluation for gbdt expressions. affects rank/summary/dump +struct UseFastForest { + static const vespalib::string NAME; + static const bool DEFAULT_VALUE; + static bool check(const Properties &props); +}; + } // namespace eval namespace rank { diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.h b/searchlib/src/vespa/searchlib/fef/rank_program.h index 3a92fc874a4..e1014df5ee5 100644 --- a/searchlib/src/vespa/searchlib/fef/rank_program.h +++ b/searchlib/src/vespa/searchlib/fef/rank_program.h @@ -59,6 +59,7 @@ public: ~RankProgram(); size_t num_executors() const { return _executors.size(); } + const FeatureExecutor &get_executor(size_t i) const { return *_executors[i]; } /** * Set up this rank program by creating the needed feature |