aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHÃ¥vard Pettersen <3535158+havardpe@users.noreply.github.com>2024-04-19 15:19:44 +0200
committerGitHub <noreply@github.com>2024-04-19 15:19:44 +0200
commit3809a09b0e33ea2203f5165430631d4f1aa74adc (patch)
treeecb6acffec03107e46c1d8794720b8de95291280 /searchlib
parent433cb01e19f6bb51d6a2d029482a6e16431cb055 (diff)
parent4d66b9a6ac8c6991bed43ae195820a8c7f8a74d6 (diff)
Merge pull request #30973 from vespa-engine/geirst/better-non-strict-cost-estimates-for-btree-iterators
Use better non-strict cost estimates for btree and disk index iterators.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp137
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp5
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow_tuning.h91
6 files changed, 154 insertions, 90 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index f7a358efb26..f4a1ade8a66 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -750,19 +750,36 @@ void
run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
{
print_intermediate_blueprint_result_header(2);
+ double max_speedup = 0.0;
+ double min_speedup = std::numeric_limits<double>::max();
for (double b_hit_ratio: b.op_hit_ratios) {
auto b_factory = b.make_factory_shared(num_docs, b_hit_ratio);
for (double a_hit_ratio : a.op_hit_ratios) {
IntermediateBlueprintFactoryType factory;
factory.add_child(a.make_factory(num_docs, a_hit_ratio));
factory.add_child(b_factory);
- for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost, PlanningAlgo::CostForceStrict}) {
+ double time_ms_esti = 0.0;
+ for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost,
+ PlanningAlgo::CostForceStrict}) {
auto res = benchmark_search(factory, num_docs + 1, true, false, false, 1.0, algo);
print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, num_docs);
+ if (algo == PlanningAlgo::Estimate) {
+ time_ms_esti = res.time_ms;
+ }
+ if (algo == PlanningAlgo::CostForceStrict) {
+ double speedup = time_ms_esti / res.time_ms;
+ if (speedup > max_speedup) {
+ max_speedup = speedup;
+ }
+ if (speedup < min_speedup) {
+ min_speedup = speedup;
+ }
+ std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl;
+ }
}
- std::cout << std::endl;
}
}
+ std::cout << "max_speedup=" << max_speedup << ", min_speedup=" << min_speedup << std::endl << std::endl;
}
void
@@ -787,6 +804,12 @@ gen_ratios(double middle, double range_multiplier, size_t num_samples)
for (size_t i = 0; i < num_samples; ++i) {
res.push_back(ratio);
ratio *= factor;
+ if (ratio > 1.0) {
+ if (res.size() < num_samples) {
+ res.push_back(1.0);
+ }
+ break;
+ }
}
return res;
}
@@ -831,7 +854,6 @@ TEST(IteratorBenchmark, analyze_term_search_in_disk_index)
{
BenchmarkSetup setup(num_docs, {str_index}, {QueryOperator::Term}, {true, false}, base_hit_ratios);
setup.filter_hit_ratios = filter_hit_ratios;
- setup.filter_crossover_factor = 1.0;
run_benchmarks(setup, global_summary);
}
@@ -863,31 +885,24 @@ TEST(IteratorBenchmark, analyze_term_search_in_fast_search_attributes)
run_benchmarks(setup, global_summary);
}
-TEST(IteratorBenchmark, analyze_in_operator_non_strict)
+TEST(IteratorBenchmark, analyze_IN_non_strict)
{
- const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8};
- BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {false}, hit_ratios, {5, 9, 10, 100, 1000, 10000});
- setup.disjunct_children = true;
- run_benchmarks(setup);
+ for (auto in_hit_ratio : {0.01, 0.1, 0.5}) {
+ BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {false}, {in_hit_ratio}, {2, 5, 9, 10, 100, 1000, 10000});
+ setup.filter_hit_ratios = gen_ratios(in_hit_ratio, 10.0, 13);
+ setup.disjunct_children = true;
+ run_benchmarks(setup);
+ }
}
-TEST(IteratorBenchmark, analyze_in_operator_strict)
+TEST(IteratorBenchmark, analyze_IN_strict)
{
const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8};
- BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {true}, hit_ratios, {5, 9, 10, 100, 1000, 10000});
+ BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {true}, hit_ratios, {2, 5, 9, 10, 100, 1000, 10000});
setup.disjunct_children = true;
run_benchmarks(setup);
}
-TEST(IteratorBenchmark, analyze_complex_leaf_operators)
-{
- std::vector<FieldConfig> field_cfgs = {int32_array_fs};
- std::vector<QueryOperator> query_ops = {QueryOperator::In, QueryOperator::DotProduct};
- const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8};
- BenchmarkSetup setup(num_docs, field_cfgs, query_ops, {true, false}, hit_ratios, {1, 2, 10, 100});
- run_benchmarks(setup);
-}
-
TEST(IteratorBenchmark, analyze_weak_and_operators)
{
std::vector<FieldConfig> field_cfgs = {int32_wset_fs};
@@ -897,24 +912,6 @@ TEST(IteratorBenchmark, analyze_weak_and_operators)
run_benchmarks(setup);
}
-TEST(IteratorBenchmark, term_benchmark)
-{
- BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Term}, {true, false}, base_hit_ratios);
- run_benchmarks(setup);
-}
-
-TEST(IteratorBenchmark, and_benchmark)
-{
- BenchmarkSetup setup(num_docs, {int32_array_fs}, {QueryOperator::And}, {true, false}, base_hit_ratios, {1, 2, 4, 8});
- run_benchmarks(setup);
-}
-
-TEST(IteratorBenchmark, or_benchmark)
-{
- BenchmarkSetup setup(num_docs, {int32_array_fs}, {QueryOperator::Or}, {true, false}, base_hit_ratios, {1, 10, 100, 1000});
- run_benchmarks(setup);
-}
-
TEST(IteratorBenchmark, or_vs_filter_crossover)
{
auto fixed_or = make_blueprint_factory(int32_array_fs, QueryOperator::Or, num_docs, 0, 0.1, 100, false);
@@ -933,16 +930,38 @@ TEST(IteratorBenchmark, or_vs_filter_crossover_with_allow_force_strict)
analyze_crossover(*fixed_or, variable_term, num_docs + 1, true, 0.0001);
}
-TEST(IteratorBenchmark, analyze_and_with_filter_vs_in)
+TEST(IteratorBenchmark, analyze_AND_filter_vs_IN)
+{
+ for (auto in_filter_ratio : {0.01, 0.1, 0.5}) {
+ for (uint32_t children: {2, 10, 100, 1000}) {
+ run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(in_filter_ratio, 10.0, 13)},
+ {int32_fs, QueryOperator::In, {in_filter_ratio}, children, false},
+ num_docs);
+ }
+ }
+}
+
+TEST(IteratorBenchmark, analyze_AND_filter_vs_OR)
+{
+ for (auto or_filter_ratio : {0.01, 0.1, 0.5}) {
+ for (uint32_t children: {2, 10, 100, 1000}) {
+ run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(or_filter_ratio, 10, 13)},
+ {int32_fs, QueryOperator::Or, {or_filter_ratio}, children, false},
+ num_docs);
+ }
+ }
+}
+
+TEST(IteratorBenchmark, analyze_AND_filter_vs_IN_array)
{
- for (uint32_t children: {10, 100, 1000}) {
- run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
- {int32_fs, QueryOperator::In, {0.1}, children, false},
+ for (uint32_t children: {2, 10, 100, 1000}) {
+ run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 10.0, 13)},
+ {int32_array_fs, QueryOperator::In, {0.1}, children, false},
num_docs);
}
}
-TEST(IteratorBenchmark, analyze_and_with_bitvector_vs_in)
+TEST(IteratorBenchmark, analyze_AND_bitvector_vs_IN)
{
for (uint32_t children: {10, 100, 1000, 10000}) {
run_and_benchmark({int32_fs, QueryOperator::In, {0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60}, children, true},
@@ -951,21 +970,35 @@ TEST(IteratorBenchmark, analyze_and_with_bitvector_vs_in)
}
}
-TEST(IteratorBenchmark, analyze_and_with_filter_vs_in_array)
+TEST(IteratorBenchmark, analyze_OR_non_strict_fs)
{
- for (uint32_t children: {10, 100, 1000}) {
- run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
- {int32_array_fs, QueryOperator::In, {0.1}, children, false},
- num_docs);
+ for (auto or_hit_ratio : {0.01, 0.1, 0.5}) {
+ BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {false}, {or_hit_ratio},
+ {2, 4, 6, 8, 10, 100, 1000});
+ setup.filter_hit_ratios = gen_ratios(or_hit_ratio, 10.0, 13);
+ run_benchmarks(setup);
}
}
-TEST(IteratorBenchmark, analyze_and_with_filter_vs_or)
+TEST(IteratorBenchmark, analyze_OR_non_strict_non_fs)
{
- for (uint32_t children: {10, 100, 1000}) {
- run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
- {int32_fs, QueryOperator::Or, {0.1}, children, false},
- num_docs);
+ BenchmarkSetup setup(num_docs, {int32}, {QueryOperator::Or}, {false}, {0.1}, {2, 4, 6, 8, 10});
+ setup.filter_hit_ratios = gen_ratios(0.1, 10.0, 13);
+ run_benchmarks(setup);
+}
+
+TEST(IteratorBenchmark, analyze_OR_strict)
+{
+ BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {true}, {0.01, 0.1, 0.5}, {2, 4, 6, 8, 10, 100, 1000});
+ run_benchmarks(setup);
+}
+
+TEST(IteratorBenchmark, analyze_btree_iterator_non_strict)
+{
+ for (auto term_ratio : {0.01, 0.1, 0.5, 1.0}) {
+ BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Term}, {false}, {term_ratio}, {1});
+ setup.filter_hit_ratios = gen_ratios(term_ratio, 10.0, 15);
+ run_benchmarks(setup);
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index aebfda8fffd..569edc0a4ab 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -166,7 +166,7 @@ public:
return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)};
} else {
double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit);
- return {rel_est, btree_cost(), btree_strict_cost(rel_est)};
+ return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)};
}
}
@@ -519,8 +519,9 @@ public:
double estimate(const IDirectPostingStore::LookupResult &term) const noexcept {
return abs_to_rel_est(term.posting_size, docid_limit);
}
- double cost(const IDirectPostingStore::LookupResult &) const noexcept {
- return btree_cost();
+ double cost(const IDirectPostingStore::LookupResult &term) const noexcept {
+ double rel_est = abs_to_rel_est(term.posting_size, docid_limit);
+ return btree_cost(rel_est);
}
double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept {
double rel_est = abs_to_rel_est(term.posting_size, docid_limit);
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
index 160bb199fb8..4ede8957f4e 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
@@ -192,8 +192,9 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::calculate_flow_stats(uin
double estimate(const IDirectPostingStore::LookupResult &term) const noexcept {
return abs_to_rel_est(term.posting_size, docid_limit);
}
- double cost(const IDirectPostingStore::LookupResult &) const noexcept {
- return search::queryeval::flow::btree_cost();
+ double cost(const IDirectPostingStore::LookupResult &term) const noexcept {
+ double rel_est = abs_to_rel_est(term.posting_size, docid_limit);
+ return search::queryeval::flow::btree_cost(rel_est);
}
double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept {
double rel_est = abs_to_rel_est(term.posting_size, docid_limit);
diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
index c0d0b1baa8c..c30b02a0c37 100644
--- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
@@ -72,7 +72,7 @@ queryeval::FlowStats
DiskTermBlueprint::calculate_flow_stats(uint32_t docid_limit) const
{
double rel_est = abs_to_rel_est(_lookupRes->counts._numDocs, docid_limit);
- return {rel_est, disk_index_cost(), disk_index_strict_cost(rel_est)};
+ return {rel_est, disk_index_cost(rel_est), disk_index_strict_cost(rel_est)};
}
SearchIterator::UP
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index eba135d8519..2bc94073c92 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -261,7 +261,7 @@ public:
queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
double rel_est = abs_to_rel_est(_posting_itr.size(), docid_limit);
- return {rel_est, btree_cost(), btree_strict_cost(rel_est)};
+ return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)};
}
SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray& tfmda) const override {
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index 356ecd4c992..e8a58bba9dc 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -7,60 +7,87 @@
namespace search::queryeval::flow {
/**
- * This function is used when calculating the strict cost of
- * intermediate and complex leaf blueprints that use a heap for their strict iterator implementation.
- *
- * Iterator benchmarking has shown the need to increase the strict cost
- * of complex blueprints, to avoid that they are forced strict too early.
- * The 5.0 multiplier reflects this.
- *
- * Program used: searchlib/src/tests/queryeval/iterator_benchmark
- * Tests used: analyze_and_with_filter_vs_*
- */
-inline double heap_cost(double my_est, size_t num_children) {
- return 5.0 * my_est * std::log2(std::max(size_t(1), num_children));
-}
-
-/**
- * The following constants and formulas were derived after analyzing term search over attributes
- * (with and without fast-search) and disk index by using the iterator benchmark program:
+ * The following constants and formulas were derived after benchmarking and analyzing
+ * using the following benchmark program:
* searchlib/src/tests/queryeval/iterator_benchmark
*
- * The following tests were executed on a machine with an Intel Xeon 2.5 GHz CPU with 48 cores and 256 Gb of memory:
- * ./searchlib_iterator_benchmark_test_app --gtest_filter='*analyze_term_search*'
- *
- * TODO: Add details on OR benchmarking.
+ * The tests were executed on:
+ * - Machine with an Intel Xeon 2.5 GHz CPU with 48 cores and 256 Gb of memory.
+ * - Apple M1 MacBook Pro (2021) 32 GB.
*
* The benchmark summary shows the 'average ms per cost' of the different benchmark cases.
- * The following constants and formulas were derived to balance 'average ms per cost' to be similar
+ * The constants and formulas are adjusted to balance 'average ms per cost' to be similar
* across the different benchmark cases.
+ *
+ * The AND benchmark cases outputs the ratio (esti/forc) of the time_ms used by two query planning algorithms:
+ * 'estimate' (legacy) and 'cost with allowed force strict' (new).
+ * 'max_speedup' indicates the gain of using the new cost model, while 'min_speedup' indicates the loss.
+ * The constants and formulas are also adjusted to maximize speedup, while reducing loss.
+ * Tests used:
+ * - IteratorBenchmark::analyze_AND_filter_vs_IN
+ * - IteratorBenchmark::analyze_AND_filter_vs_OR
+ * - IteratorBenchmark::analyze_AND_filter_vs_IN_array
+ * - IteratorBenchmark::analyze_AND_bitvector_vs_IN
*/
+/**
+ * This function is used when calculating the strict cost of
+ * intermediate and complex leaf blueprints that use a heap for their strict iterator implementation.
+ * Tests used:
+ * - IteratorBenchmark::analyze_IN_strict
+ * - IteratorBenchmark::analyze_OR_strict
+ */
+inline double heap_cost(double my_est, size_t num_children) {
+ return my_est * std::log2(std::max(size_t(1), num_children));
+}
+
// Non-strict cost of lookup based matching in an attribute (not fast-search).
+// Test used: IteratorBenchmark::analyze_term_search_in_attributes_non_strict
inline double lookup_cost(size_t num_indirections) {
return 1.0 + (num_indirections * 1.0);
}
// Non-strict cost of reverse lookup into a hash table (containing terms from a multi-term operator).
+// Test used: IteratorBenchmark::analyze_IN_non_strict
inline double reverse_hash_lookup() {
- return 5.0;
+ return 1.0;
}
// Strict cost of lookup based matching in an attribute (not fast-search).
+// IteratorBenchmark::analyze_term_search_in_attributes_strict
inline double lookup_strict_cost(size_t num_indirections) {
return lookup_cost(num_indirections);
}
-// Non-strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field).
-inline double btree_cost() {
- return 1.0;
+/**
+ * Estimates the cost of evaluating an always strict iterator (e.g. btree) in a non-strict context.
+ *
+ * When the estimate and strict cost is low, this models the cost of checking whether
+ * the seek docid matches the docid the iterator is already positioned at (the 0.2 factor).
+ *
+ * The resulting non-strict cost is most accurate when the inflow is 1.0.
+ * The resulting non-strict cost is highly underestimated when the inflow goes to 0.0.
+ * It is important to have a better estimate at higher inflows,
+ * as the latency (time) penalty is higher if choosing wrong.
+ *
+ * Note: This formula is equal to forced_strict_cost() in flow.h.
+ */
+inline double non_strict_cost_of_strict_iterator(double estimate, double strict_cost) {
+ return 0.2 * (1.0 - estimate) + strict_cost;
}
// Strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field).
+// Test used: IteratorBenchmark::analyze_term_search_in_fast_search_attributes
inline double btree_strict_cost(double my_est) {
return my_est;
}
+// Non-strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field).
+// Test used: IteratorBenchmark::analyze_btree_iterator_non_strict
+inline double btree_cost(double my_est) {
+ return non_strict_cost_of_strict_iterator(my_est, btree_strict_cost(my_est));
+}
+
// Non-strict cost of matching in a bitvector.
inline double bitvector_cost() {
return 1.0;
@@ -72,14 +99,16 @@ inline double bitvector_strict_cost(double my_est) {
return 1.5 * my_est;
}
-// Non-strict cost of matching in a disk index posting list.
-inline double disk_index_cost() {
- return 1.5;
-}
-
// Strict cost of matching in a disk index posting list.
+// Test used: IteratorBenchmark::analyze_term_search_in_disk_index
inline double disk_index_strict_cost(double my_est) {
return 1.5 * my_est;
}
+// Non-strict cost of matching in a disk index posting list.
+// Test used: IteratorBenchmark::analyze_term_search_in_disk_index
+inline double disk_index_cost(double my_est) {
+ return non_strict_cost_of_strict_iterator(my_est, disk_index_strict_cost(my_est));
+}
+
}