summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@vespa.ai>2024-04-15 20:06:08 +0200
committerGitHub <noreply@github.com>2024-04-15 20:06:08 +0200
commit2d75e8ebdd964710b839d3e97969425d2900a7dd (patch)
tree93f6ffbfe582f5d7f22661b8b8199e4391dc25bb
parentbc104603db66c900c3d813de6860fd116be3b7dc (diff)
parente5313f493dc58760b04f11f5eb79e9e2f1f06abf (diff)
Merge pull request #30923 from vespa-engine/geirst/adjust-strict-cost-of-complex-blueprints
Adjust strict cost of intermediate / complex leaf blueprints.
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp28
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow_tuning.h15
3 files changed, 34 insertions, 13 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index 590cf1a26f0..d162ef05b06 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -738,6 +738,9 @@ struct BlueprintFactorySetup {
std::shared_ptr<BenchmarkBlueprintFactory> make_factory_shared(size_t num_docs, double op_hit_ratio) const {
return std::shared_ptr<BenchmarkBlueprintFactory>(make_factory(num_docs, op_hit_ratio));
}
+ vespalib::string to_string() const {
+ return "field=" + field_cfg.to_string() + ", query=" + test::to_string(query_op) + ", children=" + std::to_string(children);
+ }
};
BlueprintFactorySetup::~BlueprintFactorySetup() = default;
@@ -765,6 +768,7 @@ run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const Bluep
void
run_and_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
{
+ std::cout << "AND[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl;
run_intermediate_blueprint_benchmark<AndBlueprintFactory>(a, b, num_docs);
}
@@ -929,23 +933,29 @@ TEST(IteratorBenchmark, or_vs_filter_crossover_with_allow_force_strict)
TEST(IteratorBenchmark, analyze_and_with_filter_vs_in)
{
- run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
- {int32_fs, QueryOperator::In, {0.1}, 100, false},
- num_docs);
+ for (uint32_t children: {10, 100, 1000}) {
+ run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
+ {int32_fs, QueryOperator::In, {0.1}, children, false},
+ num_docs);
+ }
}
TEST(IteratorBenchmark, analyze_and_with_filter_vs_in_array)
{
- run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
- {int32_array_fs, QueryOperator::In, {0.1}, 100, false},
- num_docs);
+ for (uint32_t children: {10, 100, 1000}) {
+ run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
+ {int32_array_fs, QueryOperator::In, {0.1}, children, false},
+ num_docs);
+ }
}
TEST(IteratorBenchmark, analyze_and_with_filter_vs_or)
{
- run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
- {int32_fs, QueryOperator::Or, {0.1}, 100, false},
- num_docs);
+ for (uint32_t children: {10, 100, 1000}) {
+ run_and_benchmark({int32_fs, QueryOperator::Term, gen_ratios(0.1, 8.0, 15)},
+ {int32_fs, QueryOperator::Or, {0.1}, children, false},
+ num_docs);
+ }
}
int main(int argc, char **argv) {
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
index e506ec55c76..160bb199fb8 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
@@ -206,8 +206,8 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::calculate_flow_stats(uin
// Program used: searchlib/src/tests/queryeval/iterator_benchmark
// Tests: analyze_and_with_filter_vs_in(), analyze_and_with_filter_vs_in_array()
double non_strict_cost = (SearchType::supports_hash_filter && !_iattr.hasMultiValue())
- ? queryeval::flow::reverse_hash_lookup() :
- OrFlow::cost_of(MyAdapter(docid_limit), _terms, false);
+ ? queryeval::flow::reverse_hash_lookup()
+ : OrFlow::cost_of(MyAdapter(docid_limit), _terms, false);
return {est, non_strict_cost, OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index c4df2d99508..dae0bd82cd0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -6,8 +6,19 @@
namespace search::queryeval::flow {
+/**
+ * This function is used when calculating the strict cost of
+ * intermediate and complex leaf blueprints that use a heap for their strict iterator implementation.
+ *
+ * Iterator benchmarking has shown the need to increase the strict cost
+ * of complex blueprints, to avoid that they are forced strict too early.
+ * The 5.0 multiplier reflects this.
+ *
+ * Program used: searchlib/src/tests/queryeval/iterator_benchmark
+ * Tests used: analyze_and_with_filter_vs_*
+ */
inline double heap_cost(double my_est, size_t num_children) {
- return my_est * std::log2(std::max(size_t(1),num_children));
+ return 5.0 * my_est * std::log2(std::max(size_t(1), num_children));
}
/**
@@ -32,7 +43,7 @@ inline double lookup_cost(size_t num_indirections) {
// Non-strict cost of reverse lookup into a hash table (containing terms from a multi-term operator).
inline double reverse_hash_lookup() {
- return 1.0;
+ return 5.0;
}
// Strict cost of lookup based matching in an attribute (not fast-search).