diff options
author | HÃ¥vard Pettersen <3535158+havardpe@users.noreply.github.com> | 2024-04-19 15:19:44 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-19 15:19:44 +0200 |
commit | 3809a09b0e33ea2203f5165430631d4f1aa74adc (patch) | |
tree | ecb6acffec03107e46c1d8794720b8de95291280 /searchlib | |
parent | 433cb01e19f6bb51d6a2d029482a6e16431cb055 (diff) | |
parent | 4d66b9a6ac8c6991bed43ae195820a8c7f8a74d6 (diff) |
Merge pull request #30973 from vespa-engine/geirst/better-non-strict-cost-estimates-for-btree-iterators
Use better non-strict cost estimates for btree and disk index iterators.
Diffstat (limited to 'searchlib')
6 files changed, 154 insertions, 90 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index f7a358efb26..f4a1ade8a66 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -750,19 +750,36 @@ void run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs) { print_intermediate_blueprint_result_header(2); + double max_speedup = 0.0; + double min_speedup = std::numeric_limits<double>::max(); for (double b_hit_ratio: b.op_hit_ratios) { auto b_factory = b.make_factory_shared(num_docs, b_hit_ratio); for (double a_hit_ratio : a.op_hit_ratios) { IntermediateBlueprintFactoryType factory; factory.add_child(a.make_factory(num_docs, a_hit_ratio)); factory.add_child(b_factory); - for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost, PlanningAlgo::CostForceStrict}) { + double time_ms_esti = 0.0; + for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost, + PlanningAlgo::CostForceStrict}) { auto res = benchmark_search(factory, num_docs + 1, true, false, false, 1.0, algo); print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, num_docs); + if (algo == PlanningAlgo::Estimate) { + time_ms_esti = res.time_ms; + } + if (algo == PlanningAlgo::CostForceStrict) { + double speedup = time_ms_esti / res.time_ms; + if (speedup > max_speedup) { + max_speedup = speedup; + } + if (speedup < min_speedup) { + min_speedup = speedup; + } + std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl; + } } - std::cout << std::endl; } } + std::cout << "max_speedup=" << max_speedup << ", min_speedup=" << min_speedup << std::endl << std::endl; } void @@ -787,6 +804,12 @@ gen_ratios(double middle, double range_multiplier, size_t num_samples) for (size_t i = 0; i < num_samples; ++i) { res.push_back(ratio); ratio *= factor; + if (ratio > 1.0) { + if (res.size() < num_samples) { + res.push_back(1.0); + } + break; + } } return res; } @@ -831,7 +854,6 @@ TEST(IteratorBenchmark, analyze_term_search_in_disk_index) { BenchmarkSetup setup(num_docs, {str_index}, {QueryOperator::Term}, {true, false}, base_hit_ratios); setup.filter_hit_ratios = filter_hit_ratios; - setup.filter_crossover_factor = 1.0; run_benchmarks(setup, global_summary); } @@ -863,31 +885,24 @@ TEST(IteratorBenchmark, analyze_term_search_in_fast_search_attributes) run_benchmarks(setup, global_summary); } -TEST(IteratorBenchmark, analyze_in_operator_non_strict) +TEST(IteratorBenchmark, analyze_IN_non_strict) { - const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8}; - BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {false}, hit_ratios, {5, 9, 10, 100, 1000, 10000}); - setup.disjunct_children = true; - run_benchmarks(setup); + for (auto in_hit_ratio : {0.01, 0.1, 0.5}) { + BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {false}, {in_hit_ratio}, {2, 5, 9, 10, 100, 1000, 10000}); + setup.filter_hit_ratios = gen_ratios(in_hit_ratio, 10.0, 13); + setup.disjunct_children = true; + run_benchmarks(setup); + } } -TEST(IteratorBenchmark, analyze_in_operator_strict) +TEST(IteratorBenchmark, analyze_IN_strict) { const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8}; - BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {true}, hit_ratios, {5, 9, 10, 100, 1000, 10000}); + BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {true}, hit_ratios, {2, 5, 9, 10, 100, 1000, 10000}); setup.disjunct_children = true; run_benchmarks(setup); } -TEST(IteratorBenchmark, analyze_complex_leaf_operators) -{ - std::vector<FieldConfig> field_cfgs = {int32_array_fs}; - std::vector<QueryOperator> query_ops = {QueryOperator::In, QueryOperator::DotProduct}; - const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8}; - BenchmarkSetup setup(num_docs, field_cfgs, query_ops, {true, false}, hit_ratios, {1, 2, 10, 100}); - run_benchmarks(setup); -} - TEST(IteratorBenchmark, analyze_weak_and_operators) { std::vector<FieldConfig> field_cfgs = {int32_wset_fs}; @@ -897,24 +912,6 @@ TEST(IteratorBenchmark, analyze_weak_and_operators) run_benchmarks(setup); } -TEST(IteratorBenchmark, term_benchmark) -{ - BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Term}, {true, false}, base_hit_ratios); - run_benchmarks(setup); -} - -TEST(IteratorBenchmark, and_benchmark) -{ - BenchmarkSetup setup(num_docs, {int32_array_fs}, {QueryOperator::And}, {true, false}, base_hit_ratios, {1, 2, 4, 8}); - run_benchmarks(setup); -} - -TEST(IteratorBenchmark, or_benchmark) -{ - BenchmarkSetup setup(num_docs, {int32_array_fs}, {QueryOperator::Or}, {true, false}, base_hit_ratios, {1, 10, 100, 1000}); - run_benchmarks(setup); -} - TEST(IteratorBenchmark, or_vs_filter_crossover) { auto fixed_or = make_blueprint_factory(int32_array_fs, QueryOperator::Or, num_docs, 0, 0.1, 100, false); @@ -933,16 +930,38 @@ TEST(IteratorBenchmark, or_vs_filter_crossover_with_allow_force_strict) analyze_crossover(*fixed_or, variable_term, num_docs + 1, true, 0.0001); } -TEST(IteratorBenchmark, analyze_and_with_filter_vs_in) +TEST(IteratorBenchmark, analyze_AND_filter_vs_IN) +{ + for (auto in_filter_ratio : {0.01, 0.1, 0.5}) { + for (uint32_t children: {2, 10, 100, 1000}) { + run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(in_filter_ratio, 10.0, 13)}, + {int32_fs, QueryOperator::In, {in_filter_ratio}, children, false}, + num_docs); + } + } +} + +TEST(IteratorBenchmark, analyze_AND_filter_vs_OR) +{ + for (auto or_filter_ratio : {0.01, 0.1, 0.5}) { + for (uint32_t children: {2, 10, 100, 1000}) { + run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(or_filter_ratio, 10, 13)}, + {int32_fs, QueryOperator::Or, {or_filter_ratio}, children, false}, + num_docs); + } + } +} + +TEST(IteratorBenchmark, analyze_AND_filter_vs_IN_array) { - for (uint32_t children: {10, 100, 1000}) { - run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, - {int32_fs, QueryOperator::In, {0.1}, children, false}, + for (uint32_t children: {2, 10, 100, 1000}) { + run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 10.0, 13)}, + {int32_array_fs, QueryOperator::In, {0.1}, children, false}, num_docs); } } -TEST(IteratorBenchmark, analyze_and_with_bitvector_vs_in) +TEST(IteratorBenchmark, analyze_AND_bitvector_vs_IN) { for (uint32_t children: {10, 100, 1000, 10000}) { run_and_benchmark({int32_fs, QueryOperator::In, {0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60}, children, true}, @@ -951,21 +970,35 @@ TEST(IteratorBenchmark, analyze_and_with_bitvector_vs_in) } } -TEST(IteratorBenchmark, analyze_and_with_filter_vs_in_array) +TEST(IteratorBenchmark, analyze_OR_non_strict_fs) { - for (uint32_t children: {10, 100, 1000}) { - run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, - {int32_array_fs, QueryOperator::In, {0.1}, children, false}, - num_docs); + for (auto or_hit_ratio : {0.01, 0.1, 0.5}) { + BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {false}, {or_hit_ratio}, + {2, 4, 6, 8, 10, 100, 1000}); + setup.filter_hit_ratios = gen_ratios(or_hit_ratio, 10.0, 13); + run_benchmarks(setup); } } -TEST(IteratorBenchmark, analyze_and_with_filter_vs_or) +TEST(IteratorBenchmark, analyze_OR_non_strict_non_fs) { - for (uint32_t children: {10, 100, 1000}) { - run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, - {int32_fs, QueryOperator::Or, {0.1}, children, false}, - num_docs); + BenchmarkSetup setup(num_docs, {int32}, {QueryOperator::Or}, {false}, {0.1}, {2, 4, 6, 8, 10}); + setup.filter_hit_ratios = gen_ratios(0.1, 10.0, 13); + run_benchmarks(setup); +} + +TEST(IteratorBenchmark, analyze_OR_strict) +{ + BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {true}, {0.01, 0.1, 0.5}, {2, 4, 6, 8, 10, 100, 1000}); + run_benchmarks(setup); +} + +TEST(IteratorBenchmark, analyze_btree_iterator_non_strict) +{ + for (auto term_ratio : {0.01, 0.1, 0.5, 1.0}) { + BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Term}, {false}, {term_ratio}, {1}); + setup.filter_hit_ratios = gen_ratios(term_ratio, 10.0, 15); + run_benchmarks(setup); } } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index aebfda8fffd..569edc0a4ab 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -166,7 +166,7 @@ public: return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)}; } else { double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit); - return {rel_est, btree_cost(), btree_strict_cost(rel_est)}; + return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)}; } } @@ -519,8 +519,9 @@ public: double estimate(const IDirectPostingStore::LookupResult &term) const noexcept { return abs_to_rel_est(term.posting_size, docid_limit); } - double cost(const IDirectPostingStore::LookupResult &) const noexcept { - return btree_cost(); + double cost(const IDirectPostingStore::LookupResult &term) const noexcept { + double rel_est = abs_to_rel_est(term.posting_size, docid_limit); + return btree_cost(rel_est); } double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept { double rel_est = abs_to_rel_est(term.posting_size, docid_limit); diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp index 160bb199fb8..4ede8957f4e 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp @@ -192,8 +192,9 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::calculate_flow_stats(uin double estimate(const IDirectPostingStore::LookupResult &term) const noexcept { return abs_to_rel_est(term.posting_size, docid_limit); } - double cost(const IDirectPostingStore::LookupResult &) const noexcept { - return search::queryeval::flow::btree_cost(); + double cost(const IDirectPostingStore::LookupResult &term) const noexcept { + double rel_est = abs_to_rel_est(term.posting_size, docid_limit); + return search::queryeval::flow::btree_cost(rel_est); } double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept { double rel_est = abs_to_rel_est(term.posting_size, docid_limit); diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp index c0d0b1baa8c..c30b02a0c37 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp @@ -72,7 +72,7 @@ queryeval::FlowStats DiskTermBlueprint::calculate_flow_stats(uint32_t docid_limit) const { double rel_est = abs_to_rel_est(_lookupRes->counts._numDocs, docid_limit); - return {rel_est, disk_index_cost(), disk_index_strict_cost(rel_est)}; + return {rel_est, disk_index_cost(rel_est), disk_index_strict_cost(rel_est)}; } SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index eba135d8519..2bc94073c92 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -261,7 +261,7 @@ public: queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override { double rel_est = abs_to_rel_est(_posting_itr.size(), docid_limit); - return {rel_est, btree_cost(), btree_strict_cost(rel_est)}; + return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)}; } SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray& tfmda) const override { diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h index 356ecd4c992..e8a58bba9dc 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h @@ -7,60 +7,87 @@ namespace search::queryeval::flow { /** - * This function is used when calculating the strict cost of - * intermediate and complex leaf blueprints that use a heap for their strict iterator implementation. - * - * Iterator benchmarking has shown the need to increase the strict cost - * of complex blueprints, to avoid that they are forced strict too early. - * The 5.0 multiplier reflects this. - * - * Program used: searchlib/src/tests/queryeval/iterator_benchmark - * Tests used: analyze_and_with_filter_vs_* - */ -inline double heap_cost(double my_est, size_t num_children) { - return 5.0 * my_est * std::log2(std::max(size_t(1), num_children)); -} - -/** - * The following constants and formulas were derived after analyzing term search over attributes - * (with and without fast-search) and disk index by using the iterator benchmark program: + * The following constants and formulas were derived after benchmarking and analyzing + * using the following benchmark program: * searchlib/src/tests/queryeval/iterator_benchmark * - * The following tests were executed on a machine with an Intel Xeon 2.5 GHz CPU with 48 cores and 256 Gb of memory: - * ./searchlib_iterator_benchmark_test_app --gtest_filter='*analyze_term_search*' - * - * TODO: Add details on OR benchmarking. + * The tests were executed on: + * - Machine with an Intel Xeon 2.5 GHz CPU with 48 cores and 256 Gb of memory. + * - Apple M1 MacBook Pro (2021) 32 GB. * * The benchmark summary shows the 'average ms per cost' of the different benchmark cases. - * The following constants and formulas were derived to balance 'average ms per cost' to be similar + * The constants and formulas are adjusted to balance 'average ms per cost' to be similar * across the different benchmark cases. + * + * The AND benchmark cases outputs the ratio (esti/forc) of the time_ms used by two query planning algorithms: + * 'estimate' (legacy) and 'cost with allowed force strict' (new). + * 'max_speedup' indicates the gain of using the new cost model, while 'min_speedup' indicates the loss. + * The constants and formulas are also adjusted to maximize speedup, while reducing loss. + * Tests used: + * - IteratorBenchmark::analyze_AND_filter_vs_IN + * - IteratorBenchmark::analyze_AND_filter_vs_OR + * - IteratorBenchmark::analyze_AND_filter_vs_IN_array + * - IteratorBenchmark::analyze_AND_bitvector_vs_IN */ +/** + * This function is used when calculating the strict cost of + * intermediate and complex leaf blueprints that use a heap for their strict iterator implementation. + * Tests used: + * - IteratorBenchmark::analyze_IN_strict + * - IteratorBenchmark::analyze_OR_strict + */ +inline double heap_cost(double my_est, size_t num_children) { + return my_est * std::log2(std::max(size_t(1), num_children)); +} + // Non-strict cost of lookup based matching in an attribute (not fast-search). +// Test used: IteratorBenchmark::analyze_term_search_in_attributes_non_strict inline double lookup_cost(size_t num_indirections) { return 1.0 + (num_indirections * 1.0); } // Non-strict cost of reverse lookup into a hash table (containing terms from a multi-term operator). +// Test used: IteratorBenchmark::analyze_IN_non_strict inline double reverse_hash_lookup() { - return 5.0; + return 1.0; } // Strict cost of lookup based matching in an attribute (not fast-search). +// IteratorBenchmark::analyze_term_search_in_attributes_strict inline double lookup_strict_cost(size_t num_indirections) { return lookup_cost(num_indirections); } -// Non-strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field). -inline double btree_cost() { - return 1.0; +/** + * Estimates the cost of evaluating an always strict iterator (e.g. btree) in a non-strict context. + * + * When the estimate and strict cost is low, this models the cost of checking whether + * the seek docid matches the docid the iterator is already positioned at (the 0.2 factor). + * + * The resulting non-strict cost is most accurate when the inflow is 1.0. + * The resulting non-strict cost is highly underestimated when the inflow goes to 0.0. + * It is important to have a better estimate at higher inflows, + * as the latency (time) penalty is higher if choosing wrong. + * + * Note: This formula is equal to forced_strict_cost() in flow.h. + */ +inline double non_strict_cost_of_strict_iterator(double estimate, double strict_cost) { + return 0.2 * (1.0 - estimate) + strict_cost; } // Strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field). +// Test used: IteratorBenchmark::analyze_term_search_in_fast_search_attributes inline double btree_strict_cost(double my_est) { return my_est; } +// Non-strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field). +// Test used: IteratorBenchmark::analyze_btree_iterator_non_strict +inline double btree_cost(double my_est) { + return non_strict_cost_of_strict_iterator(my_est, btree_strict_cost(my_est)); +} + // Non-strict cost of matching in a bitvector. inline double bitvector_cost() { return 1.0; @@ -72,14 +99,16 @@ inline double bitvector_strict_cost(double my_est) { return 1.5 * my_est; } -// Non-strict cost of matching in a disk index posting list. -inline double disk_index_cost() { - return 1.5; -} - // Strict cost of matching in a disk index posting list. +// Test used: IteratorBenchmark::analyze_term_search_in_disk_index inline double disk_index_strict_cost(double my_est) { return 1.5 * my_est; } +// Non-strict cost of matching in a disk index posting list. +// Test used: IteratorBenchmark::analyze_term_search_in_disk_index +inline double disk_index_cost(double my_est) { + return non_strict_cost_of_strict_iterator(my_est, disk_index_strict_cost(my_est)); +} + } |