summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@vespa.ai>2024-05-06 16:03:22 +0200
committerGitHub <noreply@github.com>2024-05-06 16:03:22 +0200
commit75c398d5cc5143819b28d449bf24a2a3880304bf (patch)
tree02ce96309b04b103e2c48175b9e19edc6dd3ce99
parentdbc231fc5ca223f6698244fe33447d5c9bf236ca (diff)
parent30b6649d21ba30f4fb77e4c6530926c79414c5fd (diff)
Merge pull request #31127 from vespa-engine/geirst/query-cost-model-adjustments
Adjust down the estimate when its unknown and avoid WhiteListBlueprin…
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow_tuning.h6
4 files changed, 17 insertions, 9 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp
index 0c986422be6..758d1336399 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp
@@ -210,7 +210,8 @@ private:
}
FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
double rel_est = abs_to_rel_est(_activeLids.size(), docid_limit);
- return {rel_est, bitvector_cost(), bitvector_strict_cost(rel_est)};
+ double do_not_make_me_strict = 1000.0;
+ return {rel_est, bitvector_cost(), do_not_make_me_strict * bitvector_strict_cost(rel_est)};
}
SearchIterator::UP
createLeafSearch(const TermFieldMatchDataArray &tfmda) const override
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 5b17b491a20..70b86bf22a1 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -94,6 +94,7 @@ using search::queryeval::StrictHeapOrSearch;
using search::queryeval::WeightedSetTermBlueprint;
using search::queryeval::flow::btree_cost;
using search::queryeval::flow::btree_strict_cost;
+using search::queryeval::flow::estimate_when_unknown;
using search::queryeval::flow::get_num_indirections;
using search::queryeval::flow::lookup_cost;
using search::queryeval::flow::lookup_strict_cost;
@@ -150,10 +151,9 @@ public:
search::queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
if (_hit_estimate.is_unknown()) {
// E.g. attributes without fast-search are not able to provide a hit estimate.
- // In this case we just assume matching half of the document corpus.
// In addition, matching is lookup based, and we are not able to skip documents efficiently when being strict.
size_t indirections = get_num_indirections(_attr.getBasicType(), _attr.getCollectionType());
- return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)};
+ return {estimate_when_unknown(), lookup_cost(indirections), lookup_strict_cost(indirections)};
} else {
double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit);
return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)};
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 7334db4b716..cfa165be067 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -1,14 +1,15 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "blueprint.h"
-#include "leaf_blueprints.h"
+#include "andnotsearch.h"
+#include "andsearch.h"
#include "emptysearch.h"
-#include "full_search.h"
#include "field_spec.hpp"
-#include "andsearch.h"
-#include "orsearch.h"
-#include "andnotsearch.h"
+#include "flow_tuning.h"
+#include "full_search.h"
+#include "leaf_blueprints.h"
#include "matching_elements_search.h"
+#include "orsearch.h"
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/vespalib/objects/visit.hpp>
#include <vespa/vespalib/objects/objectdumper.h>
@@ -238,7 +239,7 @@ Blueprint::default_flow_stats(uint32_t docid_limit, uint32_t abs_est, size_t chi
FlowStats
Blueprint::default_flow_stats(size_t child_cnt)
{
- return {0.5, 1.0 + child_cnt, 1.0 + child_cnt};
+ return {flow::estimate_when_unknown(), 1.0 + child_cnt, 1.0 + child_cnt};
}
std::unique_ptr<MatchingElementsSearch>
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index 22faa920bc0..5ed61ef9fc8 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -60,6 +60,12 @@ inline size_t get_num_indirections(const attribute::BasicType& basic_type,
return res;
}
+// Some blueprints are not able to provide a hit estimate (e.g. attributes without fast-search).
+// In such cases the following estimate is used instead. In most cases this is an overestimate.
+inline double estimate_when_unknown() {
+ return 0.1;
+}
+
// Non-strict cost of lookup based matching in an attribute (not fast-search).
// Test used: IteratorBenchmark::analyze_term_search_in_attributes_non_strict
inline double lookup_cost(size_t num_indirections) {