diff options
Diffstat (limited to 'eval/src/tests')
-rw-r--r-- | eval/src/tests/eval/gbdt/.gitignore | 1 | ||||
-rw-r--r-- | eval/src/tests/eval/gbdt/CMakeLists.txt | 6 | ||||
-rw-r--r-- | eval/src/tests/eval/gbdt/fast_forest_bench.cpp | 56 | ||||
-rw-r--r-- | eval/src/tests/eval/gbdt/gbdt_test.cpp | 78 | ||||
-rw-r--r-- | eval/src/tests/eval/gbdt/model.cpp | 14 |
5 files changed, 132 insertions, 23 deletions
diff --git a/eval/src/tests/eval/gbdt/.gitignore b/eval/src/tests/eval/gbdt/.gitignore index d0ee762745c..952736e3543 100644 --- a/eval/src/tests/eval/gbdt/.gitignore +++ b/eval/src/tests/eval/gbdt/.gitignore @@ -1 +1,2 @@ /eval_gbdt_benchmark_app +/eval_fast_forest_bench_app diff --git a/eval/src/tests/eval/gbdt/CMakeLists.txt b/eval/src/tests/eval/gbdt/CMakeLists.txt index edbe56e3143..874a2d7bd02 100644 --- a/eval/src/tests/eval/gbdt/CMakeLists.txt +++ b/eval/src/tests/eval/gbdt/CMakeLists.txt @@ -13,3 +13,9 @@ vespa_add_executable(eval_gbdt_benchmark_app vespaeval ) vespa_add_test(NAME eval_gbdt_benchmark_app COMMAND eval_gbdt_benchmark_app BENCHMARK) +vespa_add_executable(eval_fast_forest_bench_app + SOURCES + fast_forest_bench.cpp + DEPENDS + vespaeval +) diff --git a/eval/src/tests/eval/gbdt/fast_forest_bench.cpp b/eval/src/tests/eval/gbdt/fast_forest_bench.cpp new file mode 100644 index 00000000000..76a56bec50c --- /dev/null +++ b/eval/src/tests/eval/gbdt/fast_forest_bench.cpp @@ -0,0 +1,56 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/function.h> +#include <vespa/eval/eval/fast_forest.h> +#include <vespa/eval/eval/vm_forest.h> +#include <vespa/eval/eval/llvm/compiled_function.h> +#include "model.cpp" + +using namespace vespalib::eval; +using namespace vespalib::eval::gbdt; + +template <typename T> +void estimate_cost(size_t num_params, const char *label, const T &impl) { + std::vector<double> inputs_min(num_params, 0.25); + std::vector<double> inputs_med(num_params, 0.50); + std::vector<double> inputs_max(num_params, 0.75); + std::vector<double> inputs_nan(num_params, std::numeric_limits<double>::quiet_NaN()); + double us_min = impl.estimate_cost_us(inputs_min, 5.0); + double us_med = impl.estimate_cost_us(inputs_med, 5.0); + double us_max = impl.estimate_cost_us(inputs_max, 5.0); + double us_nan = impl.estimate_cost_us(inputs_nan, 5.0); + fprintf(stderr, "[%12s] (per 100 eval): [low values] %6.3f ms, [medium values] %6.3f ms, [high values] %6.3f ms, [nan values] %6.3f ms\n", + label, (us_min / 10.0), (us_med / 10.0), (us_max / 10.0), (us_nan / 10.0)); +} + +void run_fast_forest_bench() { + for (size_t tree_size: std::vector<size_t>({8,16,32,64,128,256})) { + for (size_t num_trees: std::vector<size_t>({100, 500, 2500, 5000, 10000})) { + for (size_t max_features: std::vector<size_t>({200})) { + for (size_t less_percent: std::vector<size_t>({100})) { + for (size_t invert_percent: std::vector<size_t>({50})) { + fprintf(stderr, "\n=== features: %zu, num leafs: %zu, num trees: %zu\n", max_features, tree_size, num_trees); + vespalib::string expression = Model().max_features(max_features).less_percent(less_percent).invert_percent(invert_percent).make_forest(num_trees, tree_size); + Function function = Function::parse(expression); + for (size_t min_bits = std::max(size_t(8), tree_size); true; min_bits *= 2) { + auto forest = FastForest::try_convert(function, min_bits, 64); + if (forest) { + estimate_cost(function.num_params(), forest->impl_name().c_str(), *forest); + } + if (min_bits > 64) { + break; + } + } + estimate_cost(function.num_params(), "vm forest", CompiledFunction(function, PassParams::ARRAY, VMForest::optimize_chain)); + } + } + } + } + } + fprintf(stderr, "\n"); +} + +int main(int, char **) { + run_fast_forest_bench(); + return 0; +} diff --git a/eval/src/tests/eval/gbdt/gbdt_test.cpp b/eval/src/tests/eval/gbdt/gbdt_test.cpp index 14fa4510f4d..adb3d22847a 100644 --- a/eval/src/tests/eval/gbdt/gbdt_test.cpp +++ b/eval/src/tests/eval/gbdt/gbdt_test.cpp @@ -17,6 +17,14 @@ using namespace vespalib::eval::gbdt; //----------------------------------------------------------------------------- +bool is_little_endian() { + uint32_t value = 0; + uint8_t bytes[4] = {0, 1, 2, 3}; + static_assert(sizeof(bytes) == sizeof(value)); + memcpy(&value, bytes, sizeof(bytes)); + return (value == 0x03020100); +} + double eval_double(const Function &function, const std::vector<double> ¶ms) { InterpretedFunction ifun(SimpleTensorEngine::ref(), function, NodeTypes()); InterpretedFunction::Context ctx(ifun); @@ -26,6 +34,22 @@ double eval_double(const Function &function, const std::vector<double> ¶ms) double my_resolve(void *ctx, size_t idx) { return ((double*)ctx)[idx]; } +double eval_compiled(const CompiledFunction &cfun, std::vector<double> ¶ms) { + ASSERT_EQUAL(params.size(), cfun.num_params()); + if (cfun.pass_params() == PassParams::ARRAY) { + return cfun.get_function()(¶ms[0]); + } + if (cfun.pass_params() == PassParams::LAZY) { + return cfun.get_lazy_function()(my_resolve, ¶ms[0]); + } + return 31212.0; +} + +double eval_ff(const FastForest &ff, FastForest::Context &ctx, const std::vector<double> ¶ms) { + std::vector<float> my_params(params.begin(), params.end()); + return ff.eval(ctx, &my_params[0]); +} + //----------------------------------------------------------------------------- TEST("require that tree stats can be calculated") { @@ -304,29 +328,18 @@ TEST("require that FastForest model evaluation works") { EXPECT_TRUE(compiled.get_forests().empty()); auto forest = FastForest::try_convert(function); ASSERT_TRUE(forest); - FastForest::Context ctx(*forest); + auto ctx = forest->create_context(); std::vector<double> p1({0.5, 0.5, 0.5}); // all true: 1.0 + 10.0 std::vector<double> p2({2.5, 2.5, 2.5}); // all false: 4.0 + 40.0 std::vector<double> pn(3, std::numeric_limits<double>::quiet_NaN()); // default: 4.0 + 10.0 - EXPECT_EQUAL(forest->eval(ctx, [&p1](size_t i){return p1[i];}), f(&p1[0])); - EXPECT_EQUAL(forest->eval(ctx, [&p2](size_t i){return p2[i];}), f(&p2[0])); - EXPECT_EQUAL(forest->eval(ctx, [&pn](size_t i){return pn[i];}), f(&pn[0])); - EXPECT_EQUAL(forest->eval(ctx, [&p1](size_t i){return p1[i];}), f(&p1[0])); + EXPECT_EQUAL(eval_ff(*forest, *ctx, p1), f(&p1[0])); + EXPECT_EQUAL(eval_ff(*forest, *ctx, p2), f(&p2[0])); + EXPECT_EQUAL(eval_ff(*forest, *ctx, pn), f(&pn[0])); + EXPECT_EQUAL(eval_ff(*forest, *ctx, p1), f(&p1[0])); } //----------------------------------------------------------------------------- -double eval_compiled(const CompiledFunction &cfun, std::vector<double> ¶ms) { - ASSERT_EQUAL(params.size(), cfun.num_params()); - if (cfun.pass_params() == PassParams::ARRAY) { - return cfun.get_function()(¶ms[0]); - } - if (cfun.pass_params() == PassParams::LAZY) { - return cfun.get_lazy_function()(my_resolve, ¶ms[0]); - } - return 31212.0; -} - TEST("require that forests evaluate to approximately the same for all evaluation options") { for (PassParams pass_params: {PassParams::ARRAY, PassParams::LAZY}) { for (size_t tree_size: std::vector<size_t>({20})) { @@ -356,9 +369,36 @@ TEST("require that forests evaluate to approximately the same for all evaluation EXPECT_EQUAL(expected_nan, eval_compiled(deinline, inputs_nan)); EXPECT_EQUAL(expected_nan, eval_compiled(vm_forest, inputs_nan)); if (forest) { - FastForest::Context ctx(*forest); - EXPECT_EQUAL(expected, forest->eval(ctx, [&inputs](size_t i){return inputs[i];})); - EXPECT_EQUAL(expected_nan, forest->eval(ctx, [&inputs_nan](size_t i){return inputs_nan[i];})); + auto ctx = forest->create_context(); + EXPECT_EQUAL(expected, eval_ff(*forest, *ctx, inputs)); + EXPECT_EQUAL(expected_nan, eval_ff(*forest, *ctx, inputs_nan)); + } + } + } + } + } + } +} + +TEST("require that fast forest evaluation is correct for all tree size categories") { + for (size_t tree_size: std::vector<size_t>({7,15,30,61,127})) { + for (size_t num_trees: std::vector<size_t>({127})) { + for (size_t num_features: std::vector<size_t>({35})) { + for (size_t less_percent: std::vector<size_t>({100})) { + for (size_t invert_percent: std::vector<size_t>({50})) { + vespalib::string expression = Model().max_features(num_features).less_percent(less_percent).invert_percent(invert_percent).make_forest(num_trees, tree_size); + Function function = Function::parse(expression); + auto forest = FastForest::try_convert(function); + if ((tree_size <= 64) || is_little_endian()) { + ASSERT_TRUE(forest); + TEST_STATE(forest->impl_name().c_str()); + std::vector<double> inputs(function.num_params(), 0.5); + std::vector<double> inputs_nan(function.num_params(), std::numeric_limits<double>::quiet_NaN()); + double expected = eval_double(function, inputs); + double expected_nan = eval_double(function, inputs_nan); + auto ctx = forest->create_context(); + EXPECT_EQUAL(expected, eval_ff(*forest, *ctx, inputs)); + EXPECT_EQUAL(expected_nan, eval_ff(*forest, *ctx, inputs_nan)); } } } diff --git a/eval/src/tests/eval/gbdt/model.cpp b/eval/src/tests/eval/gbdt/model.cpp index ae1c9bea437..8f0d87a4020 100644 --- a/eval/src/tests/eval/gbdt/model.cpp +++ b/eval/src/tests/eval/gbdt/model.cpp @@ -13,6 +13,7 @@ class Model { private: std::mt19937 _gen; + size_t _max_features; size_t _less_percent; size_t _invert_percent; @@ -32,9 +33,9 @@ private: } std::string make_feature_name() { - size_t max_feature = 2; - while ((max_feature < 1024) && (get_int(0, 99) < 55)) { - max_feature *= 2; + size_t max_feature = 7; + while ((max_feature < _max_features) && (get_int(0, 99) < 55)) { + max_feature = std::min(max_feature * 2, _max_features); } return make_string("feature_%zu", get_int(1, max_feature)); } @@ -60,7 +61,12 @@ private: } public: - explicit Model(size_t seed = 5489u) : _gen(seed), _less_percent(80), _invert_percent(0) {} + explicit Model(size_t seed = 5489u) : _gen(seed), _max_features(1024), _less_percent(80), _invert_percent(0) {} + + Model &max_features(size_t value) { + _max_features = value; + return *this; + } Model &less_percent(size_t value) { _less_percent = value; |