From b4f91197f2e3e3a7c767aa223825684337ab2cee Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Mon, 4 Mar 2024 16:12:44 +0000 Subject: Prepare benchmarks for analyzing term search in attributes and disk index. --- .../iterator_benchmark/iterator_benchmark_test.cpp | 72 +++++++++++++--------- 1 file changed, 43 insertions(+), 29 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index 65fa55174d4..5134ca575ca 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -578,7 +578,7 @@ struct BenchmarkCase { {} vespalib::string to_string() const { return "op=" + ::to_string(query_op) + ", cfg=" + field_cfg.to_string() + - ", strict_context=" + ::to_string(strict_context) + ", force_strict=" + ::to_string(force_strict); + ", strict_context=" + ::to_string(strict_context) + (force_strict ? (", force_strict=" + ::to_string(force_strict)) : ""); } }; @@ -622,6 +622,7 @@ public: } } const std::vector& cases() const { return _cases; } + bool empty() const { return _cases.empty(); } }; void @@ -632,7 +633,6 @@ print_summary(const BenchmarkSummary& summary) std::cout << std::fixed << std::setprecision(3) << "" << std::setw(50) << std::left << c.bcase.to_string() << ": " << "ms_per_act_cost=" << std::setw(7) << std::right << c.result.ms_per_actual_cost_stats().to_string() - << ", ms_per_alt_cost=" << std::setw(7) << std::right << c.result.ms_per_alt_cost_stats().to_string() << ", scaled_cost=" << std::setw(7) << c.scaled_cost << std::endl; } } @@ -754,9 +754,8 @@ run_benchmark_case(const BenchmarkCaseSetup& setup) } void -run_benchmarks(const BenchmarkSetup& setup) +run_benchmarks(const BenchmarkSetup& setup, BenchmarkSummary& summary) { - BenchmarkSummary summary; for (const auto& field_cfg : setup.field_cfgs) { for (auto query_op : setup.query_ops) { for (bool strict : setup.strictness) { @@ -767,6 +766,13 @@ run_benchmarks(const BenchmarkSetup& setup) } } } +} + +void +run_benchmarks(const BenchmarkSetup& setup) +{ + BenchmarkSummary summary; + run_benchmarks(setup, summary); summary.calc_scaled_costs(); print_summary(summary); } @@ -788,7 +794,8 @@ make_index_config() } constexpr uint32_t num_docs = 10'000'000; -const std::vector base_hit_ratios = {0.001, 0.01, 0.1, 0.5}; +const std::vector base_hit_ratios = {0.0001, 0.001, 0.01, 0.1, 0.5, 1.0}; +const std::vector filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; const auto int32 = make_attr_config(BasicType::INT32, CollectionType::SINGLE, false); const auto int32_fs = make_attr_config(BasicType::INT32, CollectionType::SINGLE, true); const auto int32_array = make_attr_config(BasicType::INT32, CollectionType::ARRAY, false); @@ -802,43 +809,42 @@ const auto str_array_fs = make_attr_config(BasicType::STRING, CollectionType::AR const auto str_wset = make_attr_config(BasicType::STRING, CollectionType::WSET, false); const auto str_index = make_index_config(); +BenchmarkSummary global_summary; + TEST(IteratorBenchmark, analyze_term_search_in_disk_index) { - const std::vector hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; - BenchmarkSetup setup(num_docs, {str_index}, {QueryOperator::Term}, {true, false}, hit_ratios); - setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; - run_benchmarks(setup); + BenchmarkSetup setup(num_docs, {str_index}, {QueryOperator::Term}, {true, false}, base_hit_ratios); + setup.filter_hit_ratios = filter_hit_ratios; + setup.filter_crossover_factor = 1.0; + run_benchmarks(setup, global_summary); } -TEST(IteratorBenchmark, analyze_term_search_in_attributes_without_fast_search) +TEST(IteratorBenchmark, analyze_term_search_in_attributes_non_strict) { std::vector field_cfgs = {int32, int32_array, int32_wset, str, str_array, str_wset}; - const std::vector hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; - BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {true, false}, hit_ratios); + BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {false}, base_hit_ratios); setup.default_values_per_document = 1; - setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; - run_benchmarks(setup); + setup.filter_hit_ratios = filter_hit_ratios; + setup.filter_crossover_factor = 1.0; + run_benchmarks(setup, global_summary); } -TEST(IteratorBenchmark, analyze_term_search_in_attributes_with_fast_search) +TEST(IteratorBenchmark, analyze_term_search_in_attributes_strict) { - std::vector field_cfgs = {int32_fs, int32_array_fs, str_fs, str_array_fs}; - const std::vector hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; - BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {true, false}, hit_ratios); - setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; - run_benchmarks(setup); + std::vector field_cfgs = {int32, int32_array, int32_wset, str, str_array, str_wset}; + // Note: This hit ratio matches the estimate of such attributes (0.5). + BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {true}, {0.5}); + setup.default_values_per_document = 1; + run_benchmarks(setup, global_summary); } -TEST(IteratorBenchmark, analyze_term_search_in_attributes_combined) +TEST(IteratorBenchmark, analyze_term_search_in_fast_search_attributes) { - // Note: all fast-search attributes has similar performance, so only needed to include one. - std::vector field_cfgs = {int32_fs, int32, int32_array, int32_wset, str, str_array, str_wset}; - const std::vector hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; - BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {true, false}, hit_ratios); - setup.default_values_per_document = 1; - setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; + std::vector field_cfgs = {int32_fs, int32_array_fs, str_fs, str_array_fs}; + BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {true, false}, base_hit_ratios); + setup.filter_hit_ratios = filter_hit_ratios; setup.filter_crossover_factor = 1.0; - run_benchmarks(setup); + run_benchmarks(setup, global_summary); } TEST(IteratorBenchmark, analyze_complex_leaf_operators) @@ -868,4 +874,12 @@ TEST(IteratorBenchmark, or_benchmark) run_benchmarks(setup); } -GTEST_MAIN_RUN_ALL_TESTS() +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + int res = RUN_ALL_TESTS(); + if (!global_summary.empty()) { + global_summary.calc_scaled_costs(); + print_summary(global_summary); + } + return res; +} -- cgit v1.2.3