diff options
author | Geir Storli <geirst@yahooinc.com> | 2024-04-15 16:17:57 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2024-04-15 16:24:44 +0000 |
commit | e5313f493dc58760b04f11f5eb79e9e2f1f06abf (patch) | |
tree | 93f6ffbfe582f5d7f22661b8b8199e4391dc25bb | |
parent | bc104603db66c900c3d813de6860fd116be3b7dc (diff) |
Adjust strict cost of intermediate / complex leaf blueprints.
This ensures that AND(filter vs complex) makes a better decision
regarding when to force the complex blueprint strict.
Reverse hash filter cost is adjusted accordingly.
3 files changed, 34 insertions, 13 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index 590cf1a26f0..d162ef05b06 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -738,6 +738,9 @@ struct BlueprintFactorySetup { std::shared_ptr<BenchmarkBlueprintFactory> make_factory_shared(size_t num_docs, double op_hit_ratio) const { return std::shared_ptr<BenchmarkBlueprintFactory>(make_factory(num_docs, op_hit_ratio)); } + vespalib::string to_string() const { + return "field=" + field_cfg.to_string() + ", query=" + test::to_string(query_op) + ", children=" + std::to_string(children); + } }; BlueprintFactorySetup::~BlueprintFactorySetup() = default; @@ -765,6 +768,7 @@ run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const Bluep void run_and_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs) { + std::cout << "AND[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl; run_intermediate_blueprint_benchmark<AndBlueprintFactory>(a, b, num_docs); } @@ -929,23 +933,29 @@ TEST(IteratorBenchmark, or_vs_filter_crossover_with_allow_force_strict) TEST(IteratorBenchmark, analyze_and_with_filter_vs_in) { - run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, - {int32_fs, QueryOperator::In, {0.1}, 100, false}, - num_docs); + for (uint32_t children: {10, 100, 1000}) { + run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, + {int32_fs, QueryOperator::In, {0.1}, children, false}, + num_docs); + } } TEST(IteratorBenchmark, analyze_and_with_filter_vs_in_array) { - run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, - {int32_array_fs, QueryOperator::In, {0.1}, 100, false}, - num_docs); + for (uint32_t children: {10, 100, 1000}) { + run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, + {int32_array_fs, QueryOperator::In, {0.1}, children, false}, + num_docs); + } } TEST(IteratorBenchmark, analyze_and_with_filter_vs_or) { - run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, - {int32_fs, QueryOperator::Or, {0.1}, 100, false}, - num_docs); + for (uint32_t children: {10, 100, 1000}) { + run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, + {int32_fs, QueryOperator::Or, {0.1}, children, false}, + num_docs); + } } int main(int argc, char **argv) { diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp index e506ec55c76..160bb199fb8 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp @@ -206,8 +206,8 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::calculate_flow_stats(uin // Program used: searchlib/src/tests/queryeval/iterator_benchmark // Tests: analyze_and_with_filter_vs_in(), analyze_and_with_filter_vs_in_array() double non_strict_cost = (SearchType::supports_hash_filter && !_iattr.hasMultiValue()) - ? queryeval::flow::reverse_hash_lookup() : - OrFlow::cost_of(MyAdapter(docid_limit), _terms, false); + ? queryeval::flow::reverse_hash_lookup() + : OrFlow::cost_of(MyAdapter(docid_limit), _terms, false); return {est, non_strict_cost, OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())}; } diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h index c4df2d99508..dae0bd82cd0 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h @@ -6,8 +6,19 @@ namespace search::queryeval::flow { +/** + * This function is used when calculating the strict cost of + * intermediate and complex leaf blueprints that use a heap for their strict iterator implementation. + * + * Iterator benchmarking has shown the need to increase the strict cost + * of complex blueprints, to avoid that they are forced strict too early. + * The 5.0 multiplier reflects this. + * + * Program used: searchlib/src/tests/queryeval/iterator_benchmark + * Tests used: analyze_and_with_filter_vs_* + */ inline double heap_cost(double my_est, size_t num_children) { - return my_est * std::log2(std::max(size_t(1),num_children)); + return 5.0 * my_est * std::log2(std::max(size_t(1), num_children)); } /** @@ -32,7 +43,7 @@ inline double lookup_cost(size_t num_indirections) { // Non-strict cost of reverse lookup into a hash table (containing terms from a multi-term operator). inline double reverse_hash_lookup() { - return 1.0; + return 5.0; } // Strict cost of lookup based matching in an attribute (not fast-search). |