aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-12-18 12:50:02 +0100
committerGitHub <noreply@github.com>2023-12-18 12:50:02 +0100
commitce02fe1d45877ec7543e35d9a87ac65f9f8744e3 (patch)
tree4976975017dfc6c2329141ccb7f927c2e25e5679
parent3129fe54642666b554bb21c6126984fe2ff6fced (diff)
parent7251d807994bde066a1d05586d8596498657c55a (diff)
Merge pull request #29687 from vespa-engine/balder/improve-quality-of-estimate
- When estimating number of hits in range measure at both ends of ran…
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h39
1 files changed, 28 insertions, 11 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index 50c1e4d2498..83267dc4bfb 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -57,6 +57,13 @@ protected:
~PostingListSearchContext() override;
+ double avg_values_per_document() const noexcept {
+ return static_cast<double>(_numValues) / static_cast<double>(_docIdLimit);
+ }
+ double avg_postinglist_size() const noexcept {
+ return static_cast<double>(_numValues) / _dictSize;
+ }
+
void lookupTerm(const vespalib::datastore::EntryComparator &comp);
void lookupRange(const vespalib::datastore::EntryComparator &low, const vespalib::datastore::EntryComparator &high);
void lookupSingle();
@@ -347,12 +354,11 @@ NumericPostingSearchContext<BaseSC, AttrT, DataT>::use_posting_lists_when_non_st
if ( ! info.create_postinglist_when_non_strict()) return false;
- constexpr float lookup_match_constant = 5.0;
- constexpr float posting_list_merge_constant = 1.0;
+ constexpr double lookup_match_constant = 5.0;
+ constexpr double posting_list_merge_constant = 1.0;
uint32_t exp_doc_hits = this->_docIdLimit * info.hit_rate();
- float avg_values_per_document = static_cast<float>(this->_numValues) / static_cast<float>(this->_docIdLimit);
- float lookup_match_cost = exp_doc_hits * avg_values_per_document * lookup_match_constant;
+ float lookup_match_cost = exp_doc_hits * this->avg_values_per_document() * lookup_match_constant;
float posting_list_cost = this->estimated_hits_in_range() * posting_list_merge_constant;
return posting_list_cost < lookup_match_cost;
}
@@ -364,14 +370,25 @@ NumericPostingSearchContext<BaseSC, AttrT, DataT>::calc_estimated_hits_in_range(
size_t exact_sum = 0;
size_t estimated_sum = 0;
- auto it = this->_lowerDictItr;
- for (uint32_t count = 0; (it != this->_upperDictItr) && (count < this->max_posting_lists_to_count); ++it, ++count) {
- exact_sum += this->_posting_store.frozenSize(it.getData().load_acquire());
+ // Sample lower range
+ auto it_forward = this->_lowerDictItr;
+ for (uint32_t count = 0; (it_forward != this->_upperDictItr) && (count < this->max_posting_lists_to_count); ++it_forward, ++count) {
+ exact_sum += this->_posting_store.frozenSize(it_forward.getData().load_acquire());
}
- if (it != this->_upperDictItr) {
- uint32_t remaining_posting_lists = this->_upperDictItr - it;
- float hits_per_posting_list = static_cast<float>(exact_sum) / static_cast<float>(this->max_posting_lists_to_count);
- estimated_sum = remaining_posting_lists * hits_per_posting_list;
+ if (it_forward != this->_upperDictItr) {
+ //Sample upper range
+ auto it_backward = this->_upperDictItr;
+ for (uint32_t count = 0; (it_backward != it_forward) && (count < this->max_posting_lists_to_count);++count) {
+ --it_backward;
+ exact_sum += this->_posting_store.frozenSize(it_backward.getData().load_acquire());
+ }
+ if (it_forward != it_backward) {
+ // Estimate the rest
+ uint32_t remaining_posting_lists = it_backward - it_forward;
+ double measured_hits_per_posting_list = static_cast<double>(exact_sum) / (this->max_posting_lists_to_count * 2);
+ // Let measure and global rate count equally, to reduce the effect of outlayers.
+ estimated_sum = remaining_posting_lists * (measured_hits_per_posting_list + this->avg_postinglist_size())/2;
+ }
}
return exact_sum + estimated_sum;
}