diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-11-17 13:55:23 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-17 13:55:23 +0100 |
commit | b0ff93e170cdd38f1bc1b38b756a428c4e0726ca (patch) | |
tree | 19d95bb31a133426f9b89e6ecc0191d8996acbbe | |
parent | 7bc9bc1707c81c24b9fa031609d537e39e49bae2 (diff) | |
parent | 38c682f3a630bb5274027bb8086e1bff27f7725a (diff) |
Merge pull request #29355 from vespa-engine/balder/avoid-limiting-not-necessary
Balder/avoid limiting not necessary
5 files changed, 26 insertions, 24 deletions
diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp index 21c572995d3..b26ed1d4765 100644 --- a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp +++ b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp @@ -352,6 +352,7 @@ TEST("require that the match phase limiter is able to pre-limit the query") { " hit_rate: 0.1," " num_docs: 100000," " max_filter_docs: 100000," + " upper_limited_corpus_size: 100000," " wanted_docs: 5000," " action: 'Will limit with prefix filter'," " max_group_size: 5000," diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp index b64d5ba4c05..784ce649c5f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp @@ -102,6 +102,10 @@ do_limit(AttributeLimiter &limiter_factory, SearchIterator::UP search, double ma return search; } +// When hitrate is below 1% limiting the query is often far more expensive than not. +// TODO This limit should probably be a lot higher. +constexpr double MIN_HIT_RATE_LIMIT = 0.01; + } // namespace proton::matching::<unnamed> SearchIterator::UP @@ -114,11 +118,16 @@ MatchPhaseLimiter::maybe_limit(SearchIterator::UP search, double match_freq, siz trace->setDouble("hit_rate", match_freq); trace->setLong("num_docs", num_docs); trace->setLong("max_filter_docs", max_filter_docs); + trace->setLong("upper_limited_corpus_size", upper_limited_corpus_size); trace->setLong("wanted_docs", wanted_num_docs); } - if (upper_limited_corpus_size <= wanted_num_docs) { + if ((upper_limited_corpus_size <= wanted_num_docs) || (match_freq < MIN_HIT_RATE_LIMIT)) { if (trace) { - trace->setString("action", "Will not limit !"); + if (upper_limited_corpus_size <= wanted_num_docs) { + trace->setString("action", "Will not limit due to upper_limited_corpus_size <= wanted_num_docs"); + } else if (match_freq < MIN_HIT_RATE_LIMIT) { + trace->setString("action", "Will not limit due to match_freq < MIN_HIT_RATE_LIMIT(1%)"); + } } LOG(debug, "Will not limit ! maybe_limit(hit_rate=%g, num_docs=%ld, max_filter_docs=%ld) = wanted_num_docs=%ld", match_freq, num_docs, max_filter_docs, wanted_num_docs); diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index e4047b57341..cc12b1f7825 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -204,7 +204,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) } } // returning nullptr will trigger fallback to filter iterator - return SearchIterator::UP(); + return {}; } diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.h b/vespalib/src/vespa/vespalib/btree/btreeiterator.h index 153fb005f00..53e4d004981 100644 --- a/vespalib/src/vespa/vespalib/btree/btreeiterator.h +++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.h @@ -166,7 +166,7 @@ private: /* * Find the previous leaf node, called by operator--() as needed. */ - VESPA_DLL_LOCAL void findPrevLeafNode(); + void findPrevLeafNode(); protected: /* @@ -219,7 +219,18 @@ protected: * Step iterator backwards. If at end then place it at last valid * position in tree (cf. rbegin()) */ - BTreeIteratorBase & operator--(); + BTreeIteratorBase & operator--() { + if (_leaf.getNode() == nullptr) { + rbegin(); + return *this; + } + if (_leaf.getIdx() > 0u) { + _leaf.decIdx(); + return *this; + } + findPrevLeafNode(); + return *this; + } ~BTreeIteratorBase(); BTreeIteratorBase(const BTreeIteratorBase &other); diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp index 5f15ebc8b61..fdab3a94a5b 100644 --- a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp +++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp @@ -461,25 +461,6 @@ BTreeIteratorBase() noexcept template <typename KeyT, typename DataT, typename AggrT, uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE> -BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE> & -BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>:: -operator--() -{ - if (_leaf.getNode() == nullptr) { - rbegin(); - return *this; - } - if (_leaf.getIdx() > 0u) { - _leaf.decIdx(); - return *this; - } - findPrevLeafNode(); - return *this; -} - - -template <typename KeyT, typename DataT, typename AggrT, - uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE> size_t BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>:: size() const noexcept |