diff options
author | Geir Storli <geirst@yahooinc.com> | 2024-04-15 15:23:55 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2024-04-15 15:26:42 +0000 |
commit | ce0079ffc71b4403947e8515f1b414e06cf8fae7 (patch) | |
tree | 4ab6922975a718395d6a81b4eb6b82ad43e60cfa | |
parent | 171a926fee321261f3aa27eaf5efb5e35eae3470 (diff) |
Re-adjust cost model params for IN/weightedSet/dotProduct.
Only combinations that support reverse hash filter have a cheaper non-strict cost.
4 files changed, 43 insertions, 22 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index f82b9804720..590cf1a26f0 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -934,6 +934,13 @@ TEST(IteratorBenchmark, analyze_and_with_filter_vs_in) num_docs); } +TEST(IteratorBenchmark, analyze_and_with_filter_vs_in_array) +{ + run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, + {int32_array_fs, QueryOperator::In, {0.1}, 100, false}, + num_docs); +} + TEST(IteratorBenchmark, analyze_and_with_filter_vs_or) { run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)}, diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h index 413d0dd0bf4..51321a56885 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h @@ -76,28 +76,7 @@ public: resolve_strict(in_flow); } - queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override { - using OrFlow = search::queryeval::OrFlow; - struct MyAdapter { - uint32_t docid_limit; - MyAdapter(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {} - double estimate(const IDirectPostingStore::LookupResult &term) const noexcept { - return abs_to_rel_est(term.posting_size, docid_limit); - } - double cost(const IDirectPostingStore::LookupResult &) const noexcept { - return search::queryeval::flow::btree_cost(); - } - double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept { - double rel_est = abs_to_rel_est(term.posting_size, docid_limit); - return search::queryeval::flow::btree_strict_cost(rel_est); - } - }; - double est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms); - // Iterator benchmarking has shown that non-strict cost should be 1.0. - // Program: searchlib/src/tests/queryeval/iterator_benchmark - // TODO: Add more details, and consider moving constant to flow_tuning.h - return {est, 1.0, OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())}; - } + queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override; std::unique_ptr<queryeval::SearchIterator> createLeafSearch(const fef::TermFieldMatchDataArray &tfmda) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp index 817eab3e070..e506ec55c76 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp @@ -181,4 +181,34 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::createFilterSearch(Filte return wrapper; } +template <typename PostingStoreType, typename SearchType> +queryeval::FlowStats +DirectMultiTermBlueprint<PostingStoreType, SearchType>::calculate_flow_stats(uint32_t docid_limit) const +{ + using OrFlow = search::queryeval::OrFlow; + struct MyAdapter { + uint32_t docid_limit; + MyAdapter(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {} + double estimate(const IDirectPostingStore::LookupResult &term) const noexcept { + return abs_to_rel_est(term.posting_size, docid_limit); + } + double cost(const IDirectPostingStore::LookupResult &) const noexcept { + return search::queryeval::flow::btree_cost(); + } + double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept { + double rel_est = abs_to_rel_est(term.posting_size, docid_limit); + return search::queryeval::flow::btree_strict_cost(rel_est); + } + }; + double est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms); + // Iterator benchmarking has shown that non-strict cost is different for attributes + // that support using a reverse hash filter (see use_hash_filter()). + // Program used: searchlib/src/tests/queryeval/iterator_benchmark + // Tests: analyze_and_with_filter_vs_in(), analyze_and_with_filter_vs_in_array() + double non_strict_cost = (SearchType::supports_hash_filter && !_iattr.hasMultiValue()) + ? queryeval::flow::reverse_hash_lookup() : + OrFlow::cost_of(MyAdapter(docid_limit), _terms, false); + return {est, non_strict_cost, OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())}; +} + } diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h index 51e544b2e30..c4df2d99508 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h @@ -30,6 +30,11 @@ inline double lookup_cost(size_t num_indirections) { return 1.0 + (num_indirections * 1.0); } +// Non-strict cost of reverse lookup into a hash table (containing terms from a multi-term operator). +inline double reverse_hash_lookup() { + return 1.0; +} + // Strict cost of lookup based matching in an attribute (not fast-search). inline double lookup_strict_cost(size_t num_indirections) { return lookup_cost(num_indirections); |