diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-06 10:37:45 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-06 10:37:45 +0000 |
commit | e1aa3c85f97305a8d092e78efbb097bda6d4efdc (patch) | |
tree | dbfb2797508479a88d703a114e81aff8d3875cee | |
parent | 3896913b204caf6aa2b5aa79892925a98cb604df (diff) |
Use the use-estimate-for-fetch-postings flag to select future correct estimate or current hitRatio.
5 files changed, 25 insertions, 16 deletions
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 6bc125226de..1088decb8d6 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -519,10 +519,11 @@ IntermediateBlueprint::calculateState() const } double -IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const +IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const { (void) child; - return hitRate; + (void) use_estimate; + return hit_rate; } bool @@ -625,7 +626,7 @@ IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo) for (size_t i = 0; i < _children.size(); ++i) { Blueprint & child = *_children[i]; child.fetchPostings(ExecuteInfo::create(execInfo.isStrict() && inheritStrict(i), nextHitRate, execInfo)); - nextHitRate = computeNextHitRate(child, nextHitRate); + nextHitRate = computeNextHitRate(child, nextHitRate, execInfo.use_estimate_for_fetch_postings()); } } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index a02cb7dd17f..a61d435ac25 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -113,7 +113,9 @@ public: _estimateHits = est.estHits; _estimateEmpty = est.empty; } + //TODO replace use of estimate by using empty/estHits directly and then have a real estimate here HitEstimate estimate() const noexcept { return {_estimateHits, _estimateEmpty}; } + double hit_ratio(uint32_t docid_limit) const noexcept { uint32_t total_hits = _estimateHits; uint32_t total_docs = std::max(total_hits, docid_limit); @@ -237,6 +239,8 @@ public: const Blueprint &root() const; double hit_ratio() const noexcept { return getState().hit_ratio(_docid_limit); } + // TODO Call getState().estimate() when it return a normalized estimate + double estimate() const noexcept { return getState().hit_ratio(_docid_limit); } virtual void fetchPostings(const ExecuteInfo &execInfo) = 0; virtual void freeze() = 0; @@ -318,7 +322,7 @@ private: bool infer_want_global_filter() const; size_t count_termwise_nodes(const UnpackInfo &unpack) const; - virtual double computeNextHitRate(const Blueprint & child, double hitRate) const; + virtual double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const; protected: // returns an empty collection if children have empty or diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index 4d0656b421c..c4044ba3d00 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -270,19 +270,21 @@ AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const } double -AndBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const { - return hitRate * child.hit_ratio(); +AndBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const { + double estimate = use_estimate ? child.estimate() : child.hit_ratio(); + return hit_rate * estimate; } double -OrBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const { +OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const { // Avoid dropping hitRate to zero when meeting a conservatively high hitrate in a child. // Happens at least when using non fast-search attributes, and with AND nodes. constexpr double MIN_INVERSE_HIT_RATIO = 0.10; - double inverse_child_hit_ratio = 1.0 - child.hit_ratio(); - return (inverse_child_hit_ratio > MIN_INVERSE_HIT_RATIO) - ? hitRate * inverse_child_hit_ratio - : hitRate; + double estimate = use_estimate ? child.estimate() : child.hit_ratio(); + double inverse_child_estimate = 1.0 - estimate; + return (inverse_child_estimate > MIN_INVERSE_HIT_RATIO) + ? hit_rate * inverse_child_estimate + : hit_rate; } //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 6d8082b60f6..1d88b3b21eb 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -54,7 +54,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - double computeNextHitRate(const Blueprint & child, double hitRate) const override; + double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const override; }; //----------------------------------------------------------------------------- @@ -78,7 +78,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - double computeNextHitRate(const Blueprint & child, double hitRate) const override; + double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const override; uint8_t calculate_cost_tier() const override; }; diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp index c93cef47c27..eb1d217ad6a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp @@ -28,7 +28,7 @@ SameElementBlueprint::~SameElementBlueprint() = default; FieldSpec SameElementBlueprint::getNextChildField(const vespalib::string &field_name, uint32_t field_id) { - return FieldSpec(field_name, field_id, _layout.allocTermField(field_id), false); + return {field_name, field_id, _layout.allocTermField(field_id), false}; } void @@ -60,11 +60,13 @@ SameElementBlueprint::fetchPostings(const ExecuteInfo &execInfo) { if (_terms.empty()) return; _terms[0]->fetchPostings(execInfo); - double hit_rate = execInfo.hitRate() * _terms[0]->hit_ratio(); + double estimate = execInfo.use_estimate_for_fetch_postings() ? _terms[0]->hit_ratio() : _terms[0]->estimate(); + double hit_rate = execInfo.hitRate() * estimate; for (size_t i = 1; i < _terms.size(); ++i) { Blueprint & term = *_terms[i]; term.fetchPostings(ExecuteInfo::create(false, hit_rate, execInfo)); - hit_rate = hit_rate * term.hit_ratio(); + estimate = execInfo.use_estimate_for_fetch_postings() ? _terms[0]->hit_ratio() : _terms[0]->estimate(); + hit_rate = hit_rate * estimate; } } |