summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@vespa.ai>2024-04-15 18:23:54 +0200
committerGitHub <noreply@github.com>2024-04-15 18:23:54 +0200
commitbc104603db66c900c3d813de6860fd116be3b7dc (patch)
tree4ab6922975a718395d6a81b4eb6b82ad43e60cfa
parent171a926fee321261f3aa27eaf5efb5e35eae3470 (diff)
parentce0079ffc71b4403947e8515f1b414e06cf8fae7 (diff)
Merge pull request #30921 from vespa-engine/geirst/in-operator-cost-model-readjustment
Re-adjust cost model params for IN/weightedSet/dotProduct.
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h23
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp30
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow_tuning.h5
4 files changed, 43 insertions, 22 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index f82b9804720..590cf1a26f0 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -934,6 +934,13 @@ TEST(IteratorBenchmark, analyze_and_with_filter_vs_in)
num_docs);
}
+TEST(IteratorBenchmark, analyze_and_with_filter_vs_in_array)
+{
+ run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
+ {int32_array_fs, QueryOperator::In, {0.1}, 100, false},
+ num_docs);
+}
+
TEST(IteratorBenchmark, analyze_and_with_filter_vs_or)
{
run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
index 413d0dd0bf4..51321a56885 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
@@ -76,28 +76,7 @@ public:
resolve_strict(in_flow);
}
- queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
- using OrFlow = search::queryeval::OrFlow;
- struct MyAdapter {
- uint32_t docid_limit;
- MyAdapter(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {}
- double estimate(const IDirectPostingStore::LookupResult &term) const noexcept {
- return abs_to_rel_est(term.posting_size, docid_limit);
- }
- double cost(const IDirectPostingStore::LookupResult &) const noexcept {
- return search::queryeval::flow::btree_cost();
- }
- double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept {
- double rel_est = abs_to_rel_est(term.posting_size, docid_limit);
- return search::queryeval::flow::btree_strict_cost(rel_est);
- }
- };
- double est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms);
- // Iterator benchmarking has shown that non-strict cost should be 1.0.
- // Program: searchlib/src/tests/queryeval/iterator_benchmark
- // TODO: Add more details, and consider moving constant to flow_tuning.h
- return {est, 1.0, OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())};
- }
+ queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override;
std::unique_ptr<queryeval::SearchIterator> createLeafSearch(const fef::TermFieldMatchDataArray &tfmda) const override;
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
index 817eab3e070..e506ec55c76 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
@@ -181,4 +181,34 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::createFilterSearch(Filte
return wrapper;
}
+template <typename PostingStoreType, typename SearchType>
+queryeval::FlowStats
+DirectMultiTermBlueprint<PostingStoreType, SearchType>::calculate_flow_stats(uint32_t docid_limit) const
+{
+ using OrFlow = search::queryeval::OrFlow;
+ struct MyAdapter {
+ uint32_t docid_limit;
+ MyAdapter(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {}
+ double estimate(const IDirectPostingStore::LookupResult &term) const noexcept {
+ return abs_to_rel_est(term.posting_size, docid_limit);
+ }
+ double cost(const IDirectPostingStore::LookupResult &) const noexcept {
+ return search::queryeval::flow::btree_cost();
+ }
+ double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept {
+ double rel_est = abs_to_rel_est(term.posting_size, docid_limit);
+ return search::queryeval::flow::btree_strict_cost(rel_est);
+ }
+ };
+ double est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms);
+ // Iterator benchmarking has shown that non-strict cost is different for attributes
+ // that support using a reverse hash filter (see use_hash_filter()).
+ // Program used: searchlib/src/tests/queryeval/iterator_benchmark
+ // Tests: analyze_and_with_filter_vs_in(), analyze_and_with_filter_vs_in_array()
+ double non_strict_cost = (SearchType::supports_hash_filter && !_iattr.hasMultiValue())
+ ? queryeval::flow::reverse_hash_lookup() :
+ OrFlow::cost_of(MyAdapter(docid_limit), _terms, false);
+ return {est, non_strict_cost, OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())};
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index 51e544b2e30..c4df2d99508 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -30,6 +30,11 @@ inline double lookup_cost(size_t num_indirections) {
return 1.0 + (num_indirections * 1.0);
}
+// Non-strict cost of reverse lookup into a hash table (containing terms from a multi-term operator).
+inline double reverse_hash_lookup() {
+ return 1.0;
+}
+
// Strict cost of lookup based matching in an attribute (not fast-search).
inline double lookup_strict_cost(size_t num_indirections) {
return lookup_cost(num_indirections);