summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-10-02 13:42:57 +0200
committerGitHub <noreply@github.com>2019-10-02 13:42:57 +0200
commit87dabd85693ee4d607c67c1d1433a80f9bfb256f (patch)
tree894d24f5b0d48966fac4de92d295244e8df9e3a7 /eval
parent1a7a7468485c84b61b5b919dd75bb01220faa913 (diff)
parent65d64069597882cf273d72fe872ee1fe6bd9de23 (diff)
Merge pull request #10840 from vespa-engine/havardpe/faster-boosted-models
faster gbdt forest evaluation
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/eval/gbdt/gbdt_test.cpp33
-rw-r--r--eval/src/tests/eval/gbdt/model.cpp17
-rw-r--r--eval/src/vespa/eval/eval/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/eval/fast_forest.cpp232
-rw-r--r--eval/src/vespa/eval/eval/fast_forest.h141
5 files changed, 417 insertions, 7 deletions
diff --git a/eval/src/tests/eval/gbdt/gbdt_test.cpp b/eval/src/tests/eval/gbdt/gbdt_test.cpp
index 865b01b861b..14fa4510f4d 100644
--- a/eval/src/tests/eval/gbdt/gbdt_test.cpp
+++ b/eval/src/tests/eval/gbdt/gbdt_test.cpp
@@ -3,6 +3,7 @@
#include <vespa/eval/eval/gbdt.h>
#include <vespa/eval/eval/vm_forest.h>
#include <vespa/eval/eval/function.h>
+#include <vespa/eval/eval/fast_forest.h>
#include <vespa/eval/eval/llvm/deinline_forest.h>
#include <vespa/eval/eval/llvm/compiled_function.h>
#include <vespa/eval/eval/interpreted_function.h>
@@ -295,6 +296,24 @@ TEST("require that models with too large sets are rejected by general vm optimiz
EXPECT_TRUE(!Optimize::apply_chain(general_vm_chain, stats, trees).valid());
}
+TEST("require that FastForest model evaluation works") {
+ Function function = Function::parse("if((a<2),1.0,if((b<2),if((c<2),2.0,3.0),4.0))+"
+ "if(!(c>=1),10.0,if((a<1),if((b<1),20.0,30.0),40.0))");
+ CompiledFunction compiled(function, PassParams::ARRAY, Optimize::none);
+ auto f = compiled.get_function();
+ EXPECT_TRUE(compiled.get_forests().empty());
+ auto forest = FastForest::try_convert(function);
+ ASSERT_TRUE(forest);
+ FastForest::Context ctx(*forest);
+ std::vector<double> p1({0.5, 0.5, 0.5}); // all true: 1.0 + 10.0
+ std::vector<double> p2({2.5, 2.5, 2.5}); // all false: 4.0 + 40.0
+ std::vector<double> pn(3, std::numeric_limits<double>::quiet_NaN()); // default: 4.0 + 10.0
+ EXPECT_EQUAL(forest->eval(ctx, [&p1](size_t i){return p1[i];}), f(&p1[0]));
+ EXPECT_EQUAL(forest->eval(ctx, [&p2](size_t i){return p2[i];}), f(&p2[0]));
+ EXPECT_EQUAL(forest->eval(ctx, [&pn](size_t i){return pn[i];}), f(&pn[0]));
+ EXPECT_EQUAL(forest->eval(ctx, [&p1](size_t i){return p1[i];}), f(&p1[0]));
+}
+
//-----------------------------------------------------------------------------
double eval_compiled(const CompiledFunction &cfun, std::vector<double> &params) {
@@ -311,11 +330,13 @@ double eval_compiled(const CompiledFunction &cfun, std::vector<double> &params)
TEST("require that forests evaluate to approximately the same for all evaluation options") {
for (PassParams pass_params: {PassParams::ARRAY, PassParams::LAZY}) {
for (size_t tree_size: std::vector<size_t>({20})) {
- for (size_t num_trees: std::vector<size_t>({10, 60})) {
+ for (size_t num_trees: std::vector<size_t>({60})) {
for (size_t less_percent: std::vector<size_t>({100, 80})) {
for (size_t invert_percent: std::vector<size_t>({0, 50})) {
vespalib::string expression = Model().less_percent(less_percent).invert_percent(invert_percent).make_forest(num_trees, tree_size);
Function function = Function::parse(expression);
+ auto forest = FastForest::try_convert(function);
+ EXPECT_EQUAL(bool(forest), bool(less_percent == 100));
CompiledFunction none(function, pass_params, Optimize::none);
CompiledFunction deinline(function, pass_params, DeinlineForest::optimize_chain);
CompiledFunction vm_forest(function, pass_params, VMForest::optimize_chain);
@@ -325,10 +346,20 @@ TEST("require that forests evaluate to approximately the same for all evaluation
ASSERT_EQUAL(1u, vm_forest.get_forests().size());
EXPECT_TRUE(dynamic_cast<VMForest*>(vm_forest.get_forests()[0].get()) != nullptr);
std::vector<double> inputs(function.num_params(), 0.5);
+ std::vector<double> inputs_nan(function.num_params(), std::numeric_limits<double>::quiet_NaN());
double expected = eval_double(function, inputs);
+ double expected_nan = eval_double(function, inputs_nan);
EXPECT_EQUAL(expected, eval_compiled(none, inputs));
EXPECT_EQUAL(expected, eval_compiled(deinline, inputs));
EXPECT_EQUAL(expected, eval_compiled(vm_forest, inputs));
+ EXPECT_EQUAL(expected_nan, eval_compiled(none, inputs_nan));
+ EXPECT_EQUAL(expected_nan, eval_compiled(deinline, inputs_nan));
+ EXPECT_EQUAL(expected_nan, eval_compiled(vm_forest, inputs_nan));
+ if (forest) {
+ FastForest::Context ctx(*forest);
+ EXPECT_EQUAL(expected, forest->eval(ctx, [&inputs](size_t i){return inputs[i];}));
+ EXPECT_EQUAL(expected_nan, forest->eval(ctx, [&inputs_nan](size_t i){return inputs_nan[i];}));
+ }
}
}
}
diff --git a/eval/src/tests/eval/gbdt/model.cpp b/eval/src/tests/eval/gbdt/model.cpp
index e531b327e89..ae1c9bea437 100644
--- a/eval/src/tests/eval/gbdt/model.cpp
+++ b/eval/src/tests/eval/gbdt/model.cpp
@@ -21,9 +21,14 @@ private:
return dist(_gen);
}
- double get_real(double min, double max) {
- std::uniform_real_distribution<double> dist(min, max);
- return dist(_gen);
+ double get_real() {
+ std::uniform_real_distribution<double> dist(0.0, 1.0);
+ double result = dist(_gen);
+ // avoid different decisions based on using float vs. double split values
+ while (float(result) == 0.5) {
+ result = dist(_gen);
+ }
+ return result;
}
std::string make_feature_name() {
@@ -45,11 +50,11 @@ private:
if (get_int(1,100) > _invert_percent) {
return make_string("(%s<%g)",
make_feature_name().c_str(),
- get_real(0.0, 1.0));
+ get_real());
} else {
return make_string("(!(%s>=%g))",
make_feature_name().c_str(),
- get_real(0.0, 1.0));
+ get_real());
}
}
}
@@ -70,7 +75,7 @@ public:
std::string make_tree(size_t size) {
assert(size > 0);
if (size == 1) {
- return make_string("%g", get_real(0.0, 1.0));
+ return make_string("%g", get_real());
}
size_t pivot = get_int(1, size - 1);
return make_string("if(%s,%s,%s)",
diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt
index 1baa1b3b4bf..23aa504f31e 100644
--- a/eval/src/vespa/eval/eval/CMakeLists.txt
+++ b/eval/src/vespa/eval/eval/CMakeLists.txt
@@ -6,6 +6,7 @@ vespa_add_library(eval_eval OBJECT
call_nodes.cpp
compile_tensor_function.cpp
delete_node.cpp
+ fast_forest.cpp
function.cpp
gbdt.cpp
interpreted_function.cpp
diff --git a/eval/src/vespa/eval/eval/fast_forest.cpp b/eval/src/vespa/eval/eval/fast_forest.cpp
new file mode 100644
index 00000000000..927c5355e6a
--- /dev/null
+++ b/eval/src/vespa/eval/eval/fast_forest.cpp
@@ -0,0 +1,232 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "fast_forest.h"
+#include "gbdt.h"
+#include <vespa/eval/eval/basic_nodes.h>
+#include <vespa/eval/eval/call_nodes.h>
+#include <vespa/eval/eval/operator_nodes.h>
+#include <vespa/vespalib/util/benchmark_timer.h>
+#include <algorithm>
+#include <cassert>
+
+namespace vespalib::eval::gbdt {
+
+namespace {
+
+struct BitRange {
+ uint32_t first;
+ uint32_t last;
+ BitRange(uint32_t bit) : first(bit), last(bit) {}
+ BitRange(uint32_t a, uint32_t b) : first(a), last(b) {}
+ static BitRange join(const BitRange &a, const BitRange &b) {
+ assert((a.last + 1) == b.first);
+ return BitRange(a.first, b.last);
+ }
+ ~BitRange() = default;
+};
+
+struct CmpNode {
+ float value;
+ uint32_t tree_id;
+ BitRange false_mask;
+ bool false_is_default;
+ CmpNode(float v, uint32_t t, BitRange m, bool f_def)
+ : value(v), tree_id(t), false_mask(m), false_is_default(f_def) {}
+ bool operator<(const CmpNode &rhs) const {
+ return (value < rhs.value);
+ }
+ ~CmpNode() = default;
+};
+
+struct State {
+ using Leafs = std::vector<float>;
+ using CmpNodes = std::vector<CmpNode>;
+ std::vector<CmpNodes> cmp_nodes;
+ std::vector<Leafs> leafs;
+ BitRange encode_node(uint32_t tree_id, const nodes::Node &node);
+ State(size_t num_params, const std::vector<const nodes::Node *> &trees);
+ ~State() = default;
+};
+
+BitRange
+State::encode_node(uint32_t tree_id, const nodes::Node &node)
+{
+ auto if_node = nodes::as<nodes::If>(node);
+ if (if_node) {
+ BitRange true_leafs = encode_node(tree_id, if_node->true_expr());
+ BitRange false_leafs = encode_node(tree_id, if_node->false_expr());
+ auto less = nodes::as<nodes::Less>(if_node->cond());
+ auto inverted = nodes::as<nodes::Not>(if_node->cond());
+ if (less) {
+ auto symbol = nodes::as<nodes::Symbol>(less->lhs());
+ assert(symbol);
+ assert(less->rhs().is_const());
+ size_t feature = symbol->id();
+ assert(feature < cmp_nodes.size());
+ cmp_nodes[feature].emplace_back(less->rhs().get_const_value(), tree_id, true_leafs, true);
+ } else {
+ assert(inverted);
+ auto ge = nodes::as<nodes::GreaterEqual>(inverted->child());
+ assert(ge);
+ auto symbol = nodes::as<nodes::Symbol>(ge->lhs());
+ assert(symbol);
+ assert(ge->rhs().is_const());
+ size_t feature = symbol->id();
+ assert(feature < cmp_nodes.size());
+ cmp_nodes[feature].emplace_back(ge->rhs().get_const_value(), tree_id, true_leafs, false);
+ }
+ return BitRange::join(true_leafs, false_leafs);
+ } else {
+ assert(node.is_const());
+ BitRange leaf_range(leafs[tree_id].size());
+ leafs[tree_id].push_back(node.get_const_value());
+ return leaf_range;
+ }
+}
+
+State::State(size_t num_params, const std::vector<const nodes::Node *> &trees)
+ : cmp_nodes(num_params),
+ leafs(trees.size())
+{
+ for (uint32_t tree_id = 0; tree_id < trees.size(); ++tree_id) {
+ BitRange leaf_range = encode_node(tree_id, *trees[tree_id]);
+ assert(leaf_range.first == 0);
+ assert((leaf_range.last + 1) == leafs[tree_id].size());
+ }
+ for (CmpNodes &cmp_range: cmp_nodes) {
+ assert(!cmp_range.empty());
+ std::sort(cmp_range.begin(), cmp_range.end());
+ }
+}
+
+}
+
+struct FastForestBuilder {
+
+ static FastForest::MaskType get_mask_type(uint32_t idx1, uint32_t idx2) {
+ assert(idx1 <= idx2);
+ if (idx1 == idx2) {
+ return FastForest::MaskType::ONE;
+ } else if ((idx1 + 1) == idx2) {
+ return FastForest::MaskType::TWO;
+ } else {
+ return FastForest::MaskType::MANY;
+ }
+ }
+
+ static FastForest::Mask make_mask(const CmpNode &cmp_node) {
+ BitRange range = cmp_node.false_mask;
+ assert(range.last < (8 * 256));
+ assert(range.first <= range.last);
+ uint32_t idx1 = (range.first / 8);
+ uint32_t idx2 = (range.last / 8);
+ uint8_t bits1 = 0;
+ uint8_t bits2 = 0;
+ for (uint32_t i = 0; i < 8; ++i) {
+ uint32_t bit1 = (idx1 * 8) + i;
+ if ((bit1 < range.first) || (bit1 > range.last)) {
+ bits1 |= (1 << i);
+ }
+ uint32_t bit2 = (idx2 * 8) + i;
+ if ((bit2 < range.first) || (bit2 > range.last)) {
+ bits2 |= (1 << i);
+ }
+ }
+ assert(cmp_node.tree_id < (256 * 256));
+ return FastForest::Mask(cmp_node.tree_id, get_mask_type(idx1, idx2), cmp_node.false_is_default,
+ idx1, bits1, idx2, bits2);
+ }
+
+ static void build(State &state, FastForest &ff) {
+ for (const auto &cmp_nodes: state.cmp_nodes) {
+ ff._feature_sizes.push_back(cmp_nodes.size());
+ for (const CmpNode &cmp_node: cmp_nodes) {
+ ff._values.push_back(cmp_node.value);
+ ff._masks.push_back(make_mask(cmp_node));
+ }
+ }
+ for (const auto &leafs: state.leafs) {
+ ff._tree_sizes.push_back(leafs.size());
+ for (float leaf: leafs) {
+ ff._leafs.push_back(leaf);
+ }
+ }
+ }
+};
+
+FastForest::Context::Context(const FastForest &ff)
+ : _forest(&ff),
+ _bytes_per_tree((ff.max_leafs() + 7) / 8),
+ _bits(_bytes_per_tree * ff.num_trees()) {}
+
+FastForest::Context::~Context() = default;
+
+FastForest::FastForest() = default;
+FastForest::~FastForest() = default;
+
+size_t
+FastForest::num_params() const
+{
+ return _feature_sizes.size();
+}
+
+size_t
+FastForest::num_trees() const
+{
+ return _tree_sizes.size();
+}
+
+size_t
+FastForest::max_leafs() const
+{
+ size_t res = 0;
+ size_t sum = 0;
+ for (size_t sz: _tree_sizes) {
+ res = std::max(res, sz);
+ sum += sz;
+ }
+ assert(res <= (8 * 256));
+ assert(sum == _leafs.size());
+ return res;
+}
+
+FastForest::UP
+FastForest::try_convert(const Function &fun)
+{
+ const auto &root = fun.root();
+ if (!root.is_forest()) {
+ // must be only forest
+ return FastForest::UP();
+ }
+ auto trees = gbdt::extract_trees(root);
+ if (trees.size() > (256 * 256)) {
+ // too many trees
+ return FastForest::UP();
+ }
+ gbdt::ForestStats stats(trees);
+ if (stats.total_in_checks > 0) {
+ // set membership not supported
+ return FastForest::UP();
+ }
+ if (stats.tree_sizes.back().size > (8 * 256)) {
+ // too many leaf nodes per tree
+ return FastForest::UP();
+ }
+ State state(fun.num_params(), trees);
+ FastForest::UP res = FastForest::UP(new FastForest());
+ FastForestBuilder::build(state, *res);
+ assert(fun.num_params() == res->num_params());
+ assert(trees.size() == res->num_trees());
+ return res;
+}
+
+double
+FastForest::estimate_cost_us(const std::vector<double> &params, double budget) const
+{
+ Context ctx(*this);
+ auto get_param = [&params](size_t i)->float{ return params[i]; };
+ auto self_eval = [&](){ this->eval(ctx, get_param); };
+ return BenchmarkTimer::benchmark(self_eval, budget) * 1000.0 * 1000.0;
+}
+
+}
diff --git a/eval/src/vespa/eval/eval/fast_forest.h b/eval/src/vespa/eval/eval/fast_forest.h
new file mode 100644
index 00000000000..df3be2c4aef
--- /dev/null
+++ b/eval/src/vespa/eval/eval/fast_forest.h
@@ -0,0 +1,141 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "function.h"
+#include <vespa/vespalib/util/optimized.h>
+#include <memory>
+#include <cassert>
+#include <cmath>
+
+namespace vespalib::eval::gbdt {
+
+/**
+ * Use modern optimization strategies to improve evaluation
+ * performance of GBDT forests.
+ *
+ * This model evaluation supports up to 65536 trees with up to 2048
+ * leaf nodes in each tree. Comparisons must be on the form 'feature <
+ * const' or '!(feature >= const)'. The inverted form is used to
+ * signal that the true branch should be selected when the feature
+ * values is missing (NaN).
+ **/
+class FastForest
+{
+public:
+ class Context {
+ friend class FastForest;
+ private:
+ const FastForest *_forest;
+ size_t _bytes_per_tree;
+ std::vector<uint8_t> _bits;
+ public:
+ explicit Context(const FastForest &ff);
+ ~Context();
+ };
+
+private:
+ friend struct FastForestBuilder;
+
+ enum class MaskType : uint8_t { ONE, TWO, MANY };
+
+ struct Mask {
+ uint16_t tree_id;
+ MaskType type;
+ uint8_t false_is_default;
+ uint8_t first_idx;
+ uint8_t first_bits;
+ uint8_t last_idx;
+ uint8_t last_bits;
+ Mask(uint16_t tree, MaskType mt, bool def_false,
+ uint8_t idx1, uint8_t bits1, uint8_t idx2, uint8_t bits2)
+ : tree_id(tree), type(mt), false_is_default(def_false),
+ first_idx(idx1), first_bits(bits1), last_idx(idx2), last_bits(bits2) {}
+ };
+
+ std::vector<uint32_t> _feature_sizes;
+ std::vector<float> _values;
+ std::vector<Mask> _masks;
+ std::vector<uint32_t> _tree_sizes;
+ std::vector<float> _leafs;
+
+ FastForest();
+
+ static void apply_mask(uint8_t *bits, size_t bytes_per_tree, const Mask &mask) {
+ uint8_t *dst = (bits + (mask.tree_id * bytes_per_tree) + mask.first_idx);
+ *dst &= mask.first_bits;
+ if (__builtin_expect(mask.type != MaskType::ONE, false)) {
+ if (__builtin_expect(mask.type == MaskType::MANY, false)) {
+ size_t n = (mask.last_idx - mask.first_idx - 1);
+ while (n-- > 0) {
+ *++dst = 0x00;
+ }
+ }
+ *++dst &= mask.last_bits;
+ }
+ }
+
+ static float find_leaf(const uint8_t *bits, const float *leafs) {
+ while (__builtin_expect(*bits == 0, true)) {
+ ++bits;
+ leafs += 8;
+ }
+ return leafs[vespalib::Optimized::lsbIdx(uint32_t(*bits))];
+ }
+
+public:
+ ~FastForest();
+ size_t num_params() const;
+ size_t num_trees() const;
+ size_t max_leafs() const;
+ using UP = std::unique_ptr<FastForest>;
+ static UP try_convert(const Function &fun);
+
+ template <typename F>
+ double eval(Context &ctx, F &&f) const {
+ assert(ctx._forest == this);
+ size_t bytes_per_tree = ctx._bytes_per_tree;
+ uint8_t *bits = &ctx._bits[0];
+ const float *value_ptr = &_values[0];
+ const Mask *mask_ptr = &_masks[0];
+ memset(bits, 0xff, ctx._bits.size());
+ for (size_t f_idx = 0; f_idx < _feature_sizes.size(); ++f_idx) {
+ size_t feature_size = _feature_sizes[f_idx];
+ float feature_value = f(f_idx); // get param
+ if (__builtin_expect(std::isnan(feature_value), false)) {
+ // handle 'missing' input feature
+ for (size_t i = 0; i < feature_size; ++i) {
+ const Mask &mask = mask_ptr[i];
+ if (mask.false_is_default) {
+ apply_mask(bits, bytes_per_tree, mask);
+ }
+ }
+ } else {
+ for (size_t i = 0; i < feature_size; ++i) {
+ if (__builtin_expect(feature_value < value_ptr[i], false)) {
+ break;
+ } else {
+ apply_mask(bits, bytes_per_tree, mask_ptr[i]);
+ }
+ }
+ }
+ value_ptr += feature_size;
+ mask_ptr += feature_size;
+ }
+ assert(value_ptr == &*_values.end());
+ assert(mask_ptr == &*_masks.end());
+ const float *leafs = &_leafs[0];
+ double result = 0.0;
+ for (size_t tree_size: _tree_sizes) {
+ result += find_leaf(bits, leafs);
+ bits += bytes_per_tree;
+ leafs += tree_size;
+ }
+ assert(bits == &*ctx._bits.end());
+ assert(leafs == &*_leafs.end());
+ return result;
+ }
+ double estimate_cost_us(const std::vector<double> &params, double budget = 5.0) const;
+};
+
+}