summaryrefslogtreecommitdiffstats
path: root/eval/src/tests
diff options
context:
space:
mode:
Diffstat (limited to 'eval/src/tests')
-rw-r--r--eval/src/tests/eval/gbdt/.gitignore1
-rw-r--r--eval/src/tests/eval/gbdt/CMakeLists.txt6
-rw-r--r--eval/src/tests/eval/gbdt/fast_forest_bench.cpp56
-rw-r--r--eval/src/tests/eval/gbdt/gbdt_test.cpp78
-rw-r--r--eval/src/tests/eval/gbdt/model.cpp14
5 files changed, 132 insertions, 23 deletions
diff --git a/eval/src/tests/eval/gbdt/.gitignore b/eval/src/tests/eval/gbdt/.gitignore
index d0ee762745c..952736e3543 100644
--- a/eval/src/tests/eval/gbdt/.gitignore
+++ b/eval/src/tests/eval/gbdt/.gitignore
@@ -1 +1,2 @@
/eval_gbdt_benchmark_app
+/eval_fast_forest_bench_app
diff --git a/eval/src/tests/eval/gbdt/CMakeLists.txt b/eval/src/tests/eval/gbdt/CMakeLists.txt
index edbe56e3143..874a2d7bd02 100644
--- a/eval/src/tests/eval/gbdt/CMakeLists.txt
+++ b/eval/src/tests/eval/gbdt/CMakeLists.txt
@@ -13,3 +13,9 @@ vespa_add_executable(eval_gbdt_benchmark_app
vespaeval
)
vespa_add_test(NAME eval_gbdt_benchmark_app COMMAND eval_gbdt_benchmark_app BENCHMARK)
+vespa_add_executable(eval_fast_forest_bench_app
+ SOURCES
+ fast_forest_bench.cpp
+ DEPENDS
+ vespaeval
+)
diff --git a/eval/src/tests/eval/gbdt/fast_forest_bench.cpp b/eval/src/tests/eval/gbdt/fast_forest_bench.cpp
new file mode 100644
index 00000000000..76a56bec50c
--- /dev/null
+++ b/eval/src/tests/eval/gbdt/fast_forest_bench.cpp
@@ -0,0 +1,56 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/function.h>
+#include <vespa/eval/eval/fast_forest.h>
+#include <vespa/eval/eval/vm_forest.h>
+#include <vespa/eval/eval/llvm/compiled_function.h>
+#include "model.cpp"
+
+using namespace vespalib::eval;
+using namespace vespalib::eval::gbdt;
+
+template <typename T>
+void estimate_cost(size_t num_params, const char *label, const T &impl) {
+ std::vector<double> inputs_min(num_params, 0.25);
+ std::vector<double> inputs_med(num_params, 0.50);
+ std::vector<double> inputs_max(num_params, 0.75);
+ std::vector<double> inputs_nan(num_params, std::numeric_limits<double>::quiet_NaN());
+ double us_min = impl.estimate_cost_us(inputs_min, 5.0);
+ double us_med = impl.estimate_cost_us(inputs_med, 5.0);
+ double us_max = impl.estimate_cost_us(inputs_max, 5.0);
+ double us_nan = impl.estimate_cost_us(inputs_nan, 5.0);
+ fprintf(stderr, "[%12s] (per 100 eval): [low values] %6.3f ms, [medium values] %6.3f ms, [high values] %6.3f ms, [nan values] %6.3f ms\n",
+ label, (us_min / 10.0), (us_med / 10.0), (us_max / 10.0), (us_nan / 10.0));
+}
+
+void run_fast_forest_bench() {
+ for (size_t tree_size: std::vector<size_t>({8,16,32,64,128,256})) {
+ for (size_t num_trees: std::vector<size_t>({100, 500, 2500, 5000, 10000})) {
+ for (size_t max_features: std::vector<size_t>({200})) {
+ for (size_t less_percent: std::vector<size_t>({100})) {
+ for (size_t invert_percent: std::vector<size_t>({50})) {
+ fprintf(stderr, "\n=== features: %zu, num leafs: %zu, num trees: %zu\n", max_features, tree_size, num_trees);
+ vespalib::string expression = Model().max_features(max_features).less_percent(less_percent).invert_percent(invert_percent).make_forest(num_trees, tree_size);
+ Function function = Function::parse(expression);
+ for (size_t min_bits = std::max(size_t(8), tree_size); true; min_bits *= 2) {
+ auto forest = FastForest::try_convert(function, min_bits, 64);
+ if (forest) {
+ estimate_cost(function.num_params(), forest->impl_name().c_str(), *forest);
+ }
+ if (min_bits > 64) {
+ break;
+ }
+ }
+ estimate_cost(function.num_params(), "vm forest", CompiledFunction(function, PassParams::ARRAY, VMForest::optimize_chain));
+ }
+ }
+ }
+ }
+ }
+ fprintf(stderr, "\n");
+}
+
+int main(int, char **) {
+ run_fast_forest_bench();
+ return 0;
+}
diff --git a/eval/src/tests/eval/gbdt/gbdt_test.cpp b/eval/src/tests/eval/gbdt/gbdt_test.cpp
index 14fa4510f4d..adb3d22847a 100644
--- a/eval/src/tests/eval/gbdt/gbdt_test.cpp
+++ b/eval/src/tests/eval/gbdt/gbdt_test.cpp
@@ -17,6 +17,14 @@ using namespace vespalib::eval::gbdt;
//-----------------------------------------------------------------------------
+bool is_little_endian() {
+ uint32_t value = 0;
+ uint8_t bytes[4] = {0, 1, 2, 3};
+ static_assert(sizeof(bytes) == sizeof(value));
+ memcpy(&value, bytes, sizeof(bytes));
+ return (value == 0x03020100);
+}
+
double eval_double(const Function &function, const std::vector<double> &params) {
InterpretedFunction ifun(SimpleTensorEngine::ref(), function, NodeTypes());
InterpretedFunction::Context ctx(ifun);
@@ -26,6 +34,22 @@ double eval_double(const Function &function, const std::vector<double> &params)
double my_resolve(void *ctx, size_t idx) { return ((double*)ctx)[idx]; }
+double eval_compiled(const CompiledFunction &cfun, std::vector<double> &params) {
+ ASSERT_EQUAL(params.size(), cfun.num_params());
+ if (cfun.pass_params() == PassParams::ARRAY) {
+ return cfun.get_function()(&params[0]);
+ }
+ if (cfun.pass_params() == PassParams::LAZY) {
+ return cfun.get_lazy_function()(my_resolve, &params[0]);
+ }
+ return 31212.0;
+}
+
+double eval_ff(const FastForest &ff, FastForest::Context &ctx, const std::vector<double> &params) {
+ std::vector<float> my_params(params.begin(), params.end());
+ return ff.eval(ctx, &my_params[0]);
+}
+
//-----------------------------------------------------------------------------
TEST("require that tree stats can be calculated") {
@@ -304,29 +328,18 @@ TEST("require that FastForest model evaluation works") {
EXPECT_TRUE(compiled.get_forests().empty());
auto forest = FastForest::try_convert(function);
ASSERT_TRUE(forest);
- FastForest::Context ctx(*forest);
+ auto ctx = forest->create_context();
std::vector<double> p1({0.5, 0.5, 0.5}); // all true: 1.0 + 10.0
std::vector<double> p2({2.5, 2.5, 2.5}); // all false: 4.0 + 40.0
std::vector<double> pn(3, std::numeric_limits<double>::quiet_NaN()); // default: 4.0 + 10.0
- EXPECT_EQUAL(forest->eval(ctx, [&p1](size_t i){return p1[i];}), f(&p1[0]));
- EXPECT_EQUAL(forest->eval(ctx, [&p2](size_t i){return p2[i];}), f(&p2[0]));
- EXPECT_EQUAL(forest->eval(ctx, [&pn](size_t i){return pn[i];}), f(&pn[0]));
- EXPECT_EQUAL(forest->eval(ctx, [&p1](size_t i){return p1[i];}), f(&p1[0]));
+ EXPECT_EQUAL(eval_ff(*forest, *ctx, p1), f(&p1[0]));
+ EXPECT_EQUAL(eval_ff(*forest, *ctx, p2), f(&p2[0]));
+ EXPECT_EQUAL(eval_ff(*forest, *ctx, pn), f(&pn[0]));
+ EXPECT_EQUAL(eval_ff(*forest, *ctx, p1), f(&p1[0]));
}
//-----------------------------------------------------------------------------
-double eval_compiled(const CompiledFunction &cfun, std::vector<double> &params) {
- ASSERT_EQUAL(params.size(), cfun.num_params());
- if (cfun.pass_params() == PassParams::ARRAY) {
- return cfun.get_function()(&params[0]);
- }
- if (cfun.pass_params() == PassParams::LAZY) {
- return cfun.get_lazy_function()(my_resolve, &params[0]);
- }
- return 31212.0;
-}
-
TEST("require that forests evaluate to approximately the same for all evaluation options") {
for (PassParams pass_params: {PassParams::ARRAY, PassParams::LAZY}) {
for (size_t tree_size: std::vector<size_t>({20})) {
@@ -356,9 +369,36 @@ TEST("require that forests evaluate to approximately the same for all evaluation
EXPECT_EQUAL(expected_nan, eval_compiled(deinline, inputs_nan));
EXPECT_EQUAL(expected_nan, eval_compiled(vm_forest, inputs_nan));
if (forest) {
- FastForest::Context ctx(*forest);
- EXPECT_EQUAL(expected, forest->eval(ctx, [&inputs](size_t i){return inputs[i];}));
- EXPECT_EQUAL(expected_nan, forest->eval(ctx, [&inputs_nan](size_t i){return inputs_nan[i];}));
+ auto ctx = forest->create_context();
+ EXPECT_EQUAL(expected, eval_ff(*forest, *ctx, inputs));
+ EXPECT_EQUAL(expected_nan, eval_ff(*forest, *ctx, inputs_nan));
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST("require that fast forest evaluation is correct for all tree size categories") {
+ for (size_t tree_size: std::vector<size_t>({7,15,30,61,127})) {
+ for (size_t num_trees: std::vector<size_t>({127})) {
+ for (size_t num_features: std::vector<size_t>({35})) {
+ for (size_t less_percent: std::vector<size_t>({100})) {
+ for (size_t invert_percent: std::vector<size_t>({50})) {
+ vespalib::string expression = Model().max_features(num_features).less_percent(less_percent).invert_percent(invert_percent).make_forest(num_trees, tree_size);
+ Function function = Function::parse(expression);
+ auto forest = FastForest::try_convert(function);
+ if ((tree_size <= 64) || is_little_endian()) {
+ ASSERT_TRUE(forest);
+ TEST_STATE(forest->impl_name().c_str());
+ std::vector<double> inputs(function.num_params(), 0.5);
+ std::vector<double> inputs_nan(function.num_params(), std::numeric_limits<double>::quiet_NaN());
+ double expected = eval_double(function, inputs);
+ double expected_nan = eval_double(function, inputs_nan);
+ auto ctx = forest->create_context();
+ EXPECT_EQUAL(expected, eval_ff(*forest, *ctx, inputs));
+ EXPECT_EQUAL(expected_nan, eval_ff(*forest, *ctx, inputs_nan));
}
}
}
diff --git a/eval/src/tests/eval/gbdt/model.cpp b/eval/src/tests/eval/gbdt/model.cpp
index ae1c9bea437..8f0d87a4020 100644
--- a/eval/src/tests/eval/gbdt/model.cpp
+++ b/eval/src/tests/eval/gbdt/model.cpp
@@ -13,6 +13,7 @@ class Model
{
private:
std::mt19937 _gen;
+ size_t _max_features;
size_t _less_percent;
size_t _invert_percent;
@@ -32,9 +33,9 @@ private:
}
std::string make_feature_name() {
- size_t max_feature = 2;
- while ((max_feature < 1024) && (get_int(0, 99) < 55)) {
- max_feature *= 2;
+ size_t max_feature = 7;
+ while ((max_feature < _max_features) && (get_int(0, 99) < 55)) {
+ max_feature = std::min(max_feature * 2, _max_features);
}
return make_string("feature_%zu", get_int(1, max_feature));
}
@@ -60,7 +61,12 @@ private:
}
public:
- explicit Model(size_t seed = 5489u) : _gen(seed), _less_percent(80), _invert_percent(0) {}
+ explicit Model(size_t seed = 5489u) : _gen(seed), _max_features(1024), _less_percent(80), _invert_percent(0) {}
+
+ Model &max_features(size_t value) {
+ _max_features = value;
+ return *this;
+ }
Model &less_percent(size_t value) {
_less_percent = value;