5 files changed, 132 insertions, 23 deletions
diff --git a/eval/src/tests/eval/gbdt/.gitignore b/eval/src/tests/eval/gbdt/.gitignore
index d0ee762745c..952736e3543 100644
--- a/eval/src/tests/eval/gbdt/.gitignore
+++ b/eval/src/tests/eval/gbdt/.gitignore
@@ -1 +1,2 @@
 /eval_gbdt_benchmark_app
+/eval_fast_forest_bench_app
diff --git a/eval/src/tests/eval/gbdt/CMakeLists.txt b/eval/src/tests/eval/gbdt/CMakeLists.txt
index edbe56e3143..874a2d7bd02 100644
--- a/eval/src/tests/eval/gbdt/CMakeLists.txt
+++ b/eval/src/tests/eval/gbdt/CMakeLists.txt
@@ -13,3 +13,9 @@ vespa_add_executable(eval_gbdt_benchmark_app
     vespaeval
 )
 vespa_add_test(NAME eval_gbdt_benchmark_app COMMAND eval_gbdt_benchmark_app BENCHMARK)
+vespa_add_executable(eval_fast_forest_bench_app
+    SOURCES
+    fast_forest_bench.cpp
+    DEPENDS
+    vespaeval
+)
diff --git a/eval/src/tests/eval/gbdt/fast_forest_bench.cpp b/eval/src/tests/eval/gbdt/fast_forest_bench.cpp
new file mode 100644
index 00000000000..76a56bec50c
--- /dev/null
+++ b/eval/src/tests/eval/gbdt/fast_forest_bench.cpp
@@ -0,0 +1,56 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/function.h>
+#include <vespa/eval/eval/fast_forest.h>
+#include <vespa/eval/eval/vm_forest.h>
+#include <vespa/eval/eval/llvm/compiled_function.h>
+#include "model.cpp"
+
+using namespace vespalib::eval;
+using namespace vespalib::eval::gbdt;
+
+template <typename T>
+void estimate_cost(size_t num_params, const char *label, const T &impl) {
+    std::vector<double> inputs_min(num_params, 0.25);
+    std::vector<double> inputs_med(num_params, 0.50);
+    std::vector<double> inputs_max(num_params, 0.75);
+    std::vector<double> inputs_nan(num_params, std::numeric_limits<double>::quiet_NaN());
+    double us_min = impl.estimate_cost_us(inputs_min, 5.0);
+    double us_med = impl.estimate_cost_us(inputs_med, 5.0);
+    double us_max = impl.estimate_cost_us(inputs_max, 5.0);
+    double us_nan = impl.estimate_cost_us(inputs_nan, 5.0);
+    fprintf(stderr, "[%12s] (per 100 eval): [low values] %6.3f ms, [medium values] %6.3f ms, [high values] %6.3f ms, [nan values] %6.3f ms\n",
+            label, (us_min / 10.0), (us_med / 10.0), (us_max / 10.0), (us_nan / 10.0));
+}
+
+void run_fast_forest_bench() {
+    for (size_t tree_size: std::vector<size_t>({8,16,32,64,128,256})) {
+        for (size_t num_trees: std::vector<size_t>({100, 500, 2500, 5000, 10000})) {
+            for (size_t max_features: std::vector<size_t>({200})) {
+                for (size_t less_percent: std::vector<size_t>({100})) {
+                    for (size_t invert_percent: std::vector<size_t>({50})) {
+                        fprintf(stderr, "\n=== features: %zu, num leafs: %zu, num trees: %zu\n", max_features, tree_size, num_trees);
+                        vespalib::string expression = Model().max_features(max_features).less_percent(less_percent).invert_percent(invert_percent).make_forest(num_trees, tree_size);
+                        Function function = Function::parse(expression);
+                        for (size_t min_bits = std::max(size_t(8), tree_size); true; min_bits *= 2) {
+                            auto forest = FastForest::try_convert(function, min_bits, 64);
+                            if (forest) {
+                                estimate_cost(function.num_params(), forest->impl_name().c_str(), *forest);
+                            }
+                            if (min_bits > 64) {
+                                break;
+                            }
+                        }
+                        estimate_cost(function.num_params(), "vm forest", CompiledFunction(function, PassParams::ARRAY, VMForest::optimize_chain));
+                    }
+                }
+            }
+        }
+    }
+    fprintf(stderr, "\n");
+}
+
+int main(int, char **) {
+    run_fast_forest_bench();
+    return 0;
+}
diff --git a/eval/src/tests/eval/gbdt/gbdt_test.cpp b/eval/src/tests/eval/gbdt/gbdt_test.cpp
index 14fa4510f4d..adb3d22847a 100644
--- a/eval/src/tests/eval/gbdt/gbdt_test.cpp
+++ b/eval/src/tests/eval/gbdt/gbdt_test.cpp
@@ -17,6 +17,14 @@ using namespace vespalib::eval::gbdt;
 
 //-----------------------------------------------------------------------------
 
+bool is_little_endian() {
+    uint32_t value = 0;
+    uint8_t bytes[4] = {0, 1, 2, 3};
+    static_assert(sizeof(bytes) == sizeof(value));
+    memcpy(&value, bytes, sizeof(bytes));
+    return (value == 0x03020100);
+}
+
 double eval_double(const Function &function, const std::vector<double> &params) {
     InterpretedFunction ifun(SimpleTensorEngine::ref(), function, NodeTypes());
     InterpretedFunction::Context ctx(ifun);
@@ -26,6 +34,22 @@ double eval_double(const Function &function, const std::vector<double> &params)
 
 double my_resolve(void *ctx, size_t idx) { return ((double*)ctx)[idx]; }
 
+double eval_compiled(const CompiledFunction &cfun, std::vector<double> &params) {
+    ASSERT_EQUAL(params.size(), cfun.num_params());
+    if (cfun.pass_params() == PassParams::ARRAY) {
+        return cfun.get_function()(&params[0]);
+    }
+    if (cfun.pass_params() == PassParams::LAZY) {
+        return cfun.get_lazy_function()(my_resolve, &params[0]);
+    }
+    return 31212.0;
+}
+
+double eval_ff(const FastForest &ff, FastForest::Context &ctx, const std::vector<double> &params) {
+    std::vector<float> my_params(params.begin(), params.end());
+    return ff.eval(ctx, &my_params[0]);
+}
+
 //-----------------------------------------------------------------------------
 
 TEST("require that tree stats can be calculated") {
@@ -304,29 +328,18 @@ TEST("require that FastForest model evaluation works") {
     EXPECT_TRUE(compiled.get_forests().empty());
     auto forest = FastForest::try_convert(function);
     ASSERT_TRUE(forest);
-    FastForest::Context ctx(*forest);
+    auto ctx = forest->create_context();
     std::vector<double> p1({0.5, 0.5, 0.5}); // all true: 1.0 + 10.0
     std::vector<double> p2({2.5, 2.5, 2.5}); // all false: 4.0 + 40.0
     std::vector<double> pn(3, std::numeric_limits<double>::quiet_NaN()); // default: 4.0 + 10.0
-    EXPECT_EQUAL(forest->eval(ctx, [&p1](size_t i){return p1[i];}), f(&p1[0]));
-    EXPECT_EQUAL(forest->eval(ctx, [&p2](size_t i){return p2[i];}), f(&p2[0]));
-    EXPECT_EQUAL(forest->eval(ctx, [&pn](size_t i){return pn[i];}), f(&pn[0]));
-    EXPECT_EQUAL(forest->eval(ctx, [&p1](size_t i){return p1[i];}), f(&p1[0]));
+    EXPECT_EQUAL(eval_ff(*forest, *ctx, p1), f(&p1[0]));
+    EXPECT_EQUAL(eval_ff(*forest, *ctx, p2), f(&p2[0]));
+    EXPECT_EQUAL(eval_ff(*forest, *ctx, pn), f(&pn[0]));
+    EXPECT_EQUAL(eval_ff(*forest, *ctx, p1), f(&p1[0]));
 }
 
 //-----------------------------------------------------------------------------
 
-double eval_compiled(const CompiledFunction &cfun, std::vector<double> &params) {
-    ASSERT_EQUAL(params.size(), cfun.num_params());
-    if (cfun.pass_params() == PassParams::ARRAY) {
-        return cfun.get_function()(&params[0]);
-    }
-    if (cfun.pass_params() == PassParams::LAZY) {
-        return cfun.get_lazy_function()(my_resolve, &params[0]);
-    }
-    return 31212.0;
-}
-
 TEST("require that forests evaluate to approximately the same for all evaluation options") {
     for (PassParams pass_params: {PassParams::ARRAY, PassParams::LAZY}) {
         for (size_t tree_size: std::vector<size_t>({20})) {
@@ -356,9 +369,36 @@ TEST("require that forests evaluate to approximately the same for all evaluation
                         EXPECT_EQUAL(expected_nan, eval_compiled(deinline, inputs_nan));
                         EXPECT_EQUAL(expected_nan, eval_compiled(vm_forest, inputs_nan));
                         if (forest) {
-                            FastForest::Context ctx(*forest);
-                            EXPECT_EQUAL(expected, forest->eval(ctx, [&inputs](size_t i){return inputs[i];}));
-                            EXPECT_EQUAL(expected_nan, forest->eval(ctx, [&inputs_nan](size_t i){return inputs_nan[i];}));
+                            auto ctx = forest->create_context();
+                            EXPECT_EQUAL(expected, eval_ff(*forest, *ctx, inputs));
+                            EXPECT_EQUAL(expected_nan, eval_ff(*forest, *ctx, inputs_nan));
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+TEST("require that fast forest evaluation is correct for all tree size categories") {
+    for (size_t tree_size: std::vector<size_t>({7,15,30,61,127})) {
+        for (size_t num_trees: std::vector<size_t>({127})) {
+            for (size_t num_features: std::vector<size_t>({35})) {
+                for (size_t less_percent: std::vector<size_t>({100})) {
+                    for (size_t invert_percent: std::vector<size_t>({50})) {
+                        vespalib::string expression = Model().max_features(num_features).less_percent(less_percent).invert_percent(invert_percent).make_forest(num_trees, tree_size);
+                        Function function = Function::parse(expression);
+                        auto forest = FastForest::try_convert(function);
+                        if ((tree_size <= 64) || is_little_endian()) {
+                            ASSERT_TRUE(forest);
+                            TEST_STATE(forest->impl_name().c_str());
+                            std::vector<double> inputs(function.num_params(), 0.5);
+                            std::vector<double> inputs_nan(function.num_params(), std::numeric_limits<double>::quiet_NaN());
+                            double expected = eval_double(function, inputs);
+                            double expected_nan = eval_double(function, inputs_nan);
+                            auto ctx = forest->create_context();
+                            EXPECT_EQUAL(expected, eval_ff(*forest, *ctx, inputs));
+                            EXPECT_EQUAL(expected_nan, eval_ff(*forest, *ctx, inputs_nan));
                         }
                     }
                 }
diff --git a/eval/src/tests/eval/gbdt/model.cpp b/eval/src/tests/eval/gbdt/model.cpp
index ae1c9bea437..8f0d87a4020 100644
--- a/eval/src/tests/eval/gbdt/model.cpp
+++ b/eval/src/tests/eval/gbdt/model.cpp
@@ -13,6 +13,7 @@ class Model
 {
 private:
     std::mt19937 _gen;
+    size_t _max_features;
     size_t _less_percent;
     size_t _invert_percent;
 
@@ -32,9 +33,9 @@ private:
     }
 
     std::string make_feature_name() {
-        size_t max_feature = 2;
-        while ((max_feature < 1024) && (get_int(0, 99) < 55)) {
-            max_feature *= 2;
+        size_t max_feature = 7;
+        while ((max_feature < _max_features) && (get_int(0, 99) < 55)) {
+            max_feature = std::min(max_feature * 2, _max_features);
         }
         return make_string("feature_%zu", get_int(1, max_feature));
     }
@@ -60,7 +61,12 @@ private:
     }
 
 public:
-    explicit Model(size_t seed = 5489u) : _gen(seed), _less_percent(80), _invert_percent(0) {}
+    explicit Model(size_t seed = 5489u) : _gen(seed), _max_features(1024), _less_percent(80), _invert_percent(0) {}
+
+    Model &max_features(size_t value) {
+        _max_features = value;
+        return *this;
+    }
 
     Model &less_percent(size_t value) {
         _less_percent = value;