aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-11-07 13:10:16 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2023-11-07 13:10:16 +0000
commit0207f8754280121f47e6b3b183e3bdc9ecc27f89 (patch)
tree2bee08645276f7c37b506c8fcd00232e7d4315ba /searchcore
parent8e983a117284a7e965e27e3a5c0a07cdbcb7d4cd (diff)
If match-phase limiting has concluded that a post filter is most efficient,
we must only generate posting lists if it is actually benefiscal. If not the fixed cost is too high.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp6
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h3
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_phase_limit_calculator.h16
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp8
5 files changed, 20 insertions, 18 deletions
diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
index 9f4edac9ba4..21c572995d3 100644
--- a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
@@ -181,6 +181,7 @@ TEST("require that max group size is calculated correctly") {
TEST("require that the attribute limiter works correctly") {
FakeRequestContext requestContext;
MockRangeLocator rangeLocator;
+ constexpr double HIT_RATE = 0.1;
for (int i = 0; i <= 7; ++i) {
bool descending = (i & 1) != 0;
bool strict = (i & 2) != 0;
@@ -190,10 +191,10 @@ TEST("require that the attribute limiter works correctly") {
"category", 10.0, AttributeLimiter::LOOSE);
EXPECT_EQUAL(0u, searchable.create_cnt);
EXPECT_FALSE(limiter.was_used());
- SearchIterator::UP s1 = limiter.create_search(42, diverse ? 3 : 42, strict);
+ SearchIterator::UP s1 = limiter.create_search(42, diverse ? 3 : 42, HIT_RATE, strict);
EXPECT_TRUE(limiter.was_used());
EXPECT_EQUAL(1u, searchable.create_cnt);
- SearchIterator::UP s2 = limiter.create_search(42, diverse ? 3 : 42, strict);
+ SearchIterator::UP s2 = limiter.create_search(42, diverse ? 3 : 42, HIT_RATE, strict);
EXPECT_EQUAL(1u, searchable.create_cnt);
auto *ms = dynamic_cast<MockSearch*>(s1.get());
ASSERT_TRUE(ms != nullptr);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp
index 9911b04e087..988f79111a1 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp
@@ -76,8 +76,8 @@ toString(AttributeLimiter::DiversityCutoffStrategy strategy)
return (strategy == AttributeLimiter::DiversityCutoffStrategy::STRICT) ? STRICT_STR : LOOSE_STR;
}
-SearchIterator::UP
-AttributeLimiter::create_search(size_t want_hits, size_t max_group_size, bool strictSearch)
+std::unique_ptr<SearchIterator>
+AttributeLimiter::create_search(size_t want_hits, size_t max_group_size, double hit_rate, bool strictSearch)
{
std::lock_guard<std::mutex> guard(_lock);
const uint32_t my_field_id = 0;
@@ -99,7 +99,7 @@ AttributeLimiter::create_search(size_t want_hits, size_t max_group_size, bool st
FieldSpecList field; // single field API is protected
field.add(FieldSpec(_attribute_name, my_field_id, my_handle));
_blueprint = _searchable_attributes.createBlueprint(_requestContext, field, node);
- _blueprint->fetchPostings(ExecuteInfo::create(strictSearch, &_requestContext.getDoom()));
+ _blueprint->fetchPostings(ExecuteInfo::create(strictSearch, hit_rate, &_requestContext.getDoom()));
_estimatedHits.store(_blueprint->getState().estimate().estHits, std::memory_order_relaxed);
_blueprint->freeze();
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h
index 676dbf26108..df0acccbd7a 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h
@@ -31,6 +31,7 @@ class RangeQueryLocator;
class AttributeLimiter
{
public:
+ using SearchIterator = search::queryeval::SearchIterator;
enum DiversityCutoffStrategy { LOOSE, STRICT};
AttributeLimiter(const RangeQueryLocator & _rangeQueryLocator,
search::queryeval::Searchable &searchable_attributes,
@@ -40,7 +41,7 @@ public:
double diversityCutoffFactor,
DiversityCutoffStrategy diversityCutoffStrategy);
~AttributeLimiter();
- std::unique_ptr<search::queryeval::SearchIterator> create_search(size_t want_hits, size_t max_group_size, bool strictSearch);
+ std::unique_ptr<SearchIterator> create_search(size_t want_hits, size_t max_group_size, double hit_rate, bool strictSearch);
bool was_used() const;
ssize_t getEstimatedHits() const;
static DiversityCutoffStrategy toDiversityCutoffStrategy(vespalib::stringref strategy);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limit_calculator.h b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limit_calculator.h
index 6cbeeebbfa0..c422529ea30 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limit_calculator.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limit_calculator.h
@@ -25,21 +25,21 @@ public:
* @param min_groups the minimum number of diversity groups you want
* @param sample fraction of max_hits to be used as sample size before performing match phase limiting
*/
- MatchPhaseLimitCalculator(size_t max_hits, size_t min_groups, double sample) :
- _max_hits(max_hits),
- _min_groups(std::max(size_t(1), min_groups)),
- _sample_hits(max_hits * sample)
+ MatchPhaseLimitCalculator(size_t max_hits, size_t min_groups, double sample) noexcept
+ : _max_hits(max_hits),
+ _min_groups(std::max(size_t(1), min_groups)),
+ _sample_hits(max_hits * sample)
{}
- size_t sample_hits_per_thread(size_t num_threads) const {
+ size_t sample_hits_per_thread(size_t num_threads) const noexcept {
return std::max(size_t(1), std::max(128 / num_threads, _sample_hits / num_threads));
}
- size_t wanted_num_docs(double hit_rate) const {
+ size_t wanted_num_docs(double hit_rate) const noexcept {
return std::min((double)0x7fffFFFF, std::max(128.0, _max_hits / hit_rate));
}
- size_t estimated_hits(double hit_rate, size_t num_docs) const {
+ size_t estimated_hits(double hit_rate, size_t num_docs) const noexcept {
return (size_t) (hit_rate * num_docs);
}
- size_t max_group_size(size_t wanted_num_docs_in) const {
+ size_t max_group_size(size_t wanted_num_docs_in) const noexcept {
return (wanted_num_docs_in / _min_groups);
}
};
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
index 3d319b84828..908843ca3ca 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
@@ -87,11 +87,11 @@ namespace {
template <bool PRE_FILTER>
SearchIterator::UP
-do_limit(AttributeLimiter &limiter_factory, SearchIterator::UP search,
+do_limit(AttributeLimiter &limiter_factory, SearchIterator::UP search, double match_freq,
size_t wanted_num_docs, size_t max_group_size,
uint32_t current_id, uint32_t end_id)
{
- SearchIterator::UP limiter = limiter_factory.create_search(wanted_num_docs, max_group_size, PRE_FILTER);
+ SearchIterator::UP limiter = limiter_factory.create_search(wanted_num_docs, max_group_size, match_freq, PRE_FILTER);
limiter = search->andWith(std::move(limiter), wanted_num_docs);
if (limiter) {
search = std::make_unique<LimitedSearchT<PRE_FILTER>>(std::move(limiter), std::move(search));
@@ -139,8 +139,8 @@ MatchPhaseLimiter::maybe_limit(SearchIterator::UP search, double match_freq, siz
use_pre_filter ? "pre" : "post", match_freq, num_docs, max_filter_docs, wanted_num_docs,
max_group_size, current_id, end_id, total_query_hits);
return (use_pre_filter)
- ? do_limit<true>(_limiter_factory, std::move(search), wanted_num_docs, max_group_size, current_id, end_id)
- : do_limit<false>(_limiter_factory, std::move(search), wanted_num_docs, max_group_size, current_id, end_id);
+ ? do_limit<true>(_limiter_factory, std::move(search), match_freq, wanted_num_docs, max_group_size, current_id, end_id)
+ : do_limit<false>(_limiter_factory, std::move(search), match_freq, wanted_num_docs, max_group_size, current_id, end_id);
}
void