summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-12-06 10:37:45 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2023-12-06 10:37:45 +0000
commite1aa3c85f97305a8d092e78efbb097bda6d4efdc (patch)
treedbfb2797508479a88d703a114e81aff8d3875cee
parent3896913b204caf6aa2b5aa79892925a98cb604df (diff)
Use the use-estimate-for-fetch-postings flag to select future correct estimate or current hitRatio.
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp8
5 files changed, 25 insertions, 16 deletions
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 6bc125226de..1088decb8d6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -519,10 +519,11 @@ IntermediateBlueprint::calculateState() const
}
double
-IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const
+IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const
{
(void) child;
- return hitRate;
+ (void) use_estimate;
+ return hit_rate;
}
bool
@@ -625,7 +626,7 @@ IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo)
for (size_t i = 0; i < _children.size(); ++i) {
Blueprint & child = *_children[i];
child.fetchPostings(ExecuteInfo::create(execInfo.isStrict() && inheritStrict(i), nextHitRate, execInfo));
- nextHitRate = computeNextHitRate(child, nextHitRate);
+ nextHitRate = computeNextHitRate(child, nextHitRate, execInfo.use_estimate_for_fetch_postings());
}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index a02cb7dd17f..a61d435ac25 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -113,7 +113,9 @@ public:
_estimateHits = est.estHits;
_estimateEmpty = est.empty;
}
+ //TODO replace use of estimate by using empty/estHits directly and then have a real estimate here
HitEstimate estimate() const noexcept { return {_estimateHits, _estimateEmpty}; }
+
double hit_ratio(uint32_t docid_limit) const noexcept {
uint32_t total_hits = _estimateHits;
uint32_t total_docs = std::max(total_hits, docid_limit);
@@ -237,6 +239,8 @@ public:
const Blueprint &root() const;
double hit_ratio() const noexcept { return getState().hit_ratio(_docid_limit); }
+ // TODO Call getState().estimate() when it return a normalized estimate
+ double estimate() const noexcept { return getState().hit_ratio(_docid_limit); }
virtual void fetchPostings(const ExecuteInfo &execInfo) = 0;
virtual void freeze() = 0;
@@ -318,7 +322,7 @@ private:
bool infer_want_global_filter() const;
size_t count_termwise_nodes(const UnpackInfo &unpack) const;
- virtual double computeNextHitRate(const Blueprint & child, double hitRate) const;
+ virtual double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const;
protected:
// returns an empty collection if children have empty or
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 4d0656b421c..c4044ba3d00 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -270,19 +270,21 @@ AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
}
double
-AndBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const {
- return hitRate * child.hit_ratio();
+AndBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const {
+ double estimate = use_estimate ? child.estimate() : child.hit_ratio();
+ return hit_rate * estimate;
}
double
-OrBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const {
+OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const {
// Avoid dropping hitRate to zero when meeting a conservatively high hitrate in a child.
// Happens at least when using non fast-search attributes, and with AND nodes.
constexpr double MIN_INVERSE_HIT_RATIO = 0.10;
- double inverse_child_hit_ratio = 1.0 - child.hit_ratio();
- return (inverse_child_hit_ratio > MIN_INVERSE_HIT_RATIO)
- ? hitRate * inverse_child_hit_ratio
- : hitRate;
+ double estimate = use_estimate ? child.estimate() : child.hit_ratio();
+ double inverse_child_estimate = 1.0 - estimate;
+ return (inverse_child_estimate > MIN_INVERSE_HIT_RATIO)
+ ? hit_rate * inverse_child_estimate
+ : hit_rate;
}
//-----------------------------------------------------------------------------
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index 6d8082b60f6..1d88b3b21eb 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -54,7 +54,7 @@ public:
SearchIterator::UP
createFilterSearch(bool strict, FilterConstraint constraint) const override;
private:
- double computeNextHitRate(const Blueprint & child, double hitRate) const override;
+ double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const override;
};
//-----------------------------------------------------------------------------
@@ -78,7 +78,7 @@ public:
SearchIterator::UP
createFilterSearch(bool strict, FilterConstraint constraint) const override;
private:
- double computeNextHitRate(const Blueprint & child, double hitRate) const override;
+ double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const override;
uint8_t calculate_cost_tier() const override;
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
index c93cef47c27..eb1d217ad6a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
@@ -28,7 +28,7 @@ SameElementBlueprint::~SameElementBlueprint() = default;
FieldSpec
SameElementBlueprint::getNextChildField(const vespalib::string &field_name, uint32_t field_id)
{
- return FieldSpec(field_name, field_id, _layout.allocTermField(field_id), false);
+ return {field_name, field_id, _layout.allocTermField(field_id), false};
}
void
@@ -60,11 +60,13 @@ SameElementBlueprint::fetchPostings(const ExecuteInfo &execInfo)
{
if (_terms.empty()) return;
_terms[0]->fetchPostings(execInfo);
- double hit_rate = execInfo.hitRate() * _terms[0]->hit_ratio();
+ double estimate = execInfo.use_estimate_for_fetch_postings() ? _terms[0]->hit_ratio() : _terms[0]->estimate();
+ double hit_rate = execInfo.hitRate() * estimate;
for (size_t i = 1; i < _terms.size(); ++i) {
Blueprint & term = *_terms[i];
term.fetchPostings(ExecuteInfo::create(false, hit_rate, execInfo));
- hit_rate = hit_rate * term.hit_ratio();
+ estimate = execInfo.use_estimate_for_fetch_postings() ? _terms[0]->hit_ratio() : _terms[0]->estimate();
+ hit_rate = hit_rate * estimate;
}
}