From 989b472666ae79b9d55adcec85833ab17b9dc942 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 4 Dec 2023 15:33:25 +0000 Subject: - Control creation of temporary postinglists during fetchPostings for non-strict iterators. --- searchcore/src/tests/proton/matching/query_test.cpp | 6 +++--- .../searchcore/proton/matching/attribute_limiter.cpp | 2 +- .../vespa/searchcore/proton/matching/match_tools.cpp | 6 ++++-- .../src/vespa/searchcore/proton/matching/query.cpp | 5 +++-- .../src/vespa/searchcore/proton/matching/query.h | 3 ++- searchlib/src/vespa/searchlib/fef/indexproperties.cpp | 6 ++++++ searchlib/src/vespa/searchlib/fef/indexproperties.h | 10 ++++++++++ searchlib/src/vespa/searchlib/fef/ranksetup.cpp | 2 ++ searchlib/src/vespa/searchlib/fef/ranksetup.h | 2 ++ searchlib/src/vespa/searchlib/queryeval/blueprint.cpp | 2 +- .../src/vespa/searchlib/queryeval/executeinfo.cpp | 4 ++-- searchlib/src/vespa/searchlib/queryeval/executeinfo.h | 19 ++++++++++++++----- .../searchlib/queryeval/same_element_blueprint.cpp | 2 +- 13 files changed, 51 insertions(+), 18 deletions(-) diff --git a/searchcore/src/tests/proton/matching/query_test.cpp b/searchcore/src/tests/proton/matching/query_test.cpp index cb24d8bc407..bf574af725d 100644 --- a/searchcore/src/tests/proton/matching/query_test.cpp +++ b/searchcore/src/tests/proton/matching/query_test.cpp @@ -713,7 +713,7 @@ void Test::requireThatQueryGluesEverythingTogether() { EXPECT_EQUAL(1u, md->getNumTermFields()); query.optimize(); - query.fetchPostings(ExecuteInfo::create(true, 1.0F, &requestContext.getDoom())); + query.fetchPostings(ExecuteInfo::TRUE); SearchIterator::UP search = query.createSearch(*md); ASSERT_TRUE(search.get()); } @@ -746,7 +746,7 @@ void checkQueryAddsLocation(const string &loc_in, const string &loc_out) { MatchData::UP md = mdl.createMatchData(); EXPECT_EQUAL(2u, md->getNumTermFields()); - query.fetchPostings(ExecuteInfo::create(true, 1.0F, &requestContext.getDoom())); + query.fetchPostings(ExecuteInfo::TRUE); SearchIterator::UP search = query.createSearch(*md); ASSERT_TRUE(search.get()); if (!EXPECT_NOT_EQUAL(string::npos, search->asString().find(loc_out))) { @@ -968,7 +968,7 @@ Test::requireThatWhiteListBlueprintCanBeUsed() MatchData::UP md = mdl.createMatchData(); query.optimize(); - query.fetchPostings(ExecuteInfo::create(true, 1.0F, &requestContext.getDoom())); + query.fetchPostings(ExecuteInfo::TRUE); SearchIterator::UP search = query.createSearch(*md); SimpleResult exp = SimpleResult().addHit(1).addHit(5).addHit(7).addHit(11); SimpleResult act; diff --git a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp index d07169a0d63..349d33e03e9 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp @@ -99,7 +99,7 @@ AttributeLimiter::create_match_data(size_t want_hits, size_t max_group_size, dou FieldSpecList field; // single field API is protected field.add(FieldSpec(_attribute_name, my_field_id, my_handle)); _blueprint = _searchable_attributes.createBlueprint(_requestContext, field, node); - auto execInfo = ExecuteInfo::create(strictSearch, strictSearch ? 1.0F : hit_rate, &_requestContext.getDoom()); + auto execInfo = ExecuteInfo::create(strictSearch, strictSearch ? 1.0F : hit_rate, &_requestContext.getDoom(), true); _blueprint->fetchPostings(execInfo); _estimatedHits.store(_blueprint->getState().estimate().estHits, std::memory_order_relaxed); _blueprint->freeze(); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 7beecaca613..521e4ffdbe2 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -204,12 +204,14 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _query.optimize(); trace.addEvent(4, "Perform dictionary lookups and posting lists initialization"); float hitRate = std::min(1.0F, float(maxNumHits)/float(searchContext.getDocIdLimit())); - _query.fetchPostings(search::queryeval::ExecuteInfo::create(is_search, hitRate, &_requestContext.getDoom())); + bool create_postinglist_when_non_strict = CreatePostingListWithNonStrict::check(_queryEnv.getProperties(), rankSetup.create_postinglist_when_non_strict()); + _query.fetchPostings(search::queryeval::ExecuteInfo::create(is_search, hitRate, &_requestContext.getDoom(), + create_postinglist_when_non_strict)); if (is_search) { _query.handle_global_filter(_requestContext.getDoom(), searchContext.getDocIdLimit(), _attribute_blueprint_params.global_filter_lower_limit, _attribute_blueprint_params.global_filter_upper_limit, - thread_bundle, trace); + thread_bundle, trace, create_postinglist_when_non_strict); } _query.freeze(); trace.addEvent(5, "Prepare shared state for multi-threaded rank executors"); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index f55ba77cec8..de0a4846615 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -248,7 +248,8 @@ Query::fetchPostings(const ExecuteInfo & executeInfo) void Query::handle_global_filter(const vespalib::Doom & doom, uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit, - vespalib::ThreadBundle &thread_bundle, search::engine::Trace& trace) + vespalib::ThreadBundle &thread_bundle, search::engine::Trace& trace, + bool create_postinglist_when_non_strict) { if (!handle_global_filter(*_blueprint, docid_limit, global_filter_lower_limit, global_filter_upper_limit, thread_bundle, &trace)) { return; @@ -258,7 +259,7 @@ Query::handle_global_filter(const vespalib::Doom & doom, uint32_t docid_limit, _blueprint = Blueprint::optimize(std::move(_blueprint)); LOG(debug, "blueprint after handle_global_filter:\n%s\n", _blueprint->asString().c_str()); // strictness may change if optimized order changed: - fetchPostings(ExecuteInfo::create(true, 1.0F, &doom)); + fetchPostings(ExecuteInfo::create(true, 1.0F, &doom, create_postinglist_when_non_strict)); } bool diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index 6ea326834a5..3e2c7686066 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -106,7 +106,8 @@ public: void handle_global_filter(const vespalib::Doom & doom, uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit, - vespalib::ThreadBundle &thread_bundle, search::engine::Trace& trace); + vespalib::ThreadBundle &thread_bundle, search::engine::Trace& trace, + bool create_postinglist_when_non_strict); /** * Calculates and handles the global filter if needed by the blueprint tree. diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 9c986d0bc63..7085e09eaa5 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -461,6 +461,12 @@ bool AlwaysMarkPhraseExpensive::check(const Properties &props, bool fallback) { return lookupBool(props, NAME, fallback); } +const vespalib::string CreatePostingListWithNonStrict::NAME("vespa.matching.create_postinglist_when_non_strict"); +const bool CreatePostingListWithNonStrict::DEFAULT_VALUE(true); +bool CreatePostingListWithNonStrict::check(const Properties &props, bool fallback) { + return lookupBool(props, NAME, fallback); +} + } // namespace matching namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 1921f52276f..fed238aa8a6 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -347,6 +347,16 @@ namespace matching { static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); } static bool check(const Properties &props, bool fallback); }; + + /** + * When enabled posting lists can be created on the fly even if iterator is not strict. + **/ + struct CreatePostingListWithNonStrict { + static const vespalib::string NAME; + static const bool DEFAULT_VALUE; + static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); } + static bool check(const Properties &props, bool fallback); + }; } namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index d6b0b900516..d5c1361adac 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -61,6 +61,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _compileError(false), _degradationAscendingOrder(false), _always_mark_phrase_expensive(false), + _create_postinglist_when_non_strict(true), _diversityAttribute(), _diversityMinGroups(1), _diversityCutoffFactor(10.0), @@ -135,6 +136,7 @@ RankSetup::configure() _mutateOnSummary._operation = mutate::on_summary::Operation::lookup(_indexEnv.getProperties()); _mutateAllowQueryOverride = mutate::AllowQueryOverride::check(_indexEnv.getProperties()); _always_mark_phrase_expensive = matching::AlwaysMarkPhraseExpensive::check(_indexEnv.getProperties()); + _create_postinglist_when_non_strict = matching::CreatePostingListWithNonStrict::check(_indexEnv.getProperties()); } void diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index d744b38cc6e..d560614ce39 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -70,6 +70,7 @@ private: bool _compileError; bool _degradationAscendingOrder; bool _always_mark_phrase_expensive; + bool _create_postinglist_when_non_strict; vespalib::string _diversityAttribute; uint32_t _diversityMinGroups; double _diversityCutoffFactor; @@ -222,6 +223,7 @@ public: return _degradationAscendingOrder; } bool always_mark_phrase_expensive() const noexcept { return _always_mark_phrase_expensive; } + bool create_postinglist_when_non_strict() const noexcept { return _create_postinglist_when_non_strict; } /** get number of hits to collect during graceful degradation in match phase */ uint32_t getDegradationMaxHits() const { return _degradationMaxHits; diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 639805e116e..6bc125226de 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -624,7 +624,7 @@ IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo) double nextHitRate = execInfo.hitRate(); for (size_t i = 0; i < _children.size(); ++i) { Blueprint & child = *_children[i]; - child.fetchPostings(ExecuteInfo::create(execInfo.isStrict() && inheritStrict(i), nextHitRate, execInfo.getDoom())); + child.fetchPostings(ExecuteInfo::create(execInfo.isStrict() && inheritStrict(i), nextHitRate, execInfo)); nextHitRate = computeNextHitRate(child, nextHitRate); } } diff --git a/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp b/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp index 6a7ca84b72f..27366a9b924 100644 --- a/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp @@ -4,7 +4,7 @@ namespace search::queryeval { -const ExecuteInfo ExecuteInfo::TRUE(true, 1.0, nullptr); -const ExecuteInfo ExecuteInfo::FALSE(false, 1.0, nullptr); +const ExecuteInfo ExecuteInfo::TRUE(true, 1.0, nullptr, true); +const ExecuteInfo ExecuteInfo::FALSE(false, 1.0, nullptr, true); } diff --git a/searchlib/src/vespa/searchlib/queryeval/executeinfo.h b/searchlib/src/vespa/searchlib/queryeval/executeinfo.h index 362b0826f67..0095429e609 100644 --- a/searchlib/src/vespa/searchlib/queryeval/executeinfo.h +++ b/searchlib/src/vespa/searchlib/queryeval/executeinfo.h @@ -12,18 +12,25 @@ namespace search::queryeval { */ class ExecuteInfo { public: - ExecuteInfo() noexcept : ExecuteInfo(false, 1.0F, nullptr) { } + ExecuteInfo() noexcept : ExecuteInfo(false, 1.0F, nullptr, true) { } bool isStrict() const noexcept { return _strict; } + bool create_postinglist_when_non_strict() const noexcept { return _create_postinglist_when_non_strict; } float hitRate() const noexcept { return _hitRate; } bool soft_doom() const noexcept { return _doom && _doom->soft_doom(); } const vespalib::Doom * getDoom() const { return _doom; } static const ExecuteInfo TRUE; static const ExecuteInfo FALSE; static ExecuteInfo create(bool strict, const ExecuteInfo & org) noexcept { - return {strict, org._hitRate, org.getDoom()}; + return {strict, org._hitRate, org.getDoom(), org.create_postinglist_when_non_strict()}; + } + static ExecuteInfo create(bool strict, float hitRate, const ExecuteInfo & org) noexcept { + return {strict, hitRate, org.getDoom(), org.create_postinglist_when_non_strict()}; } static ExecuteInfo create(bool strict, float hitRate, const vespalib::Doom * doom) noexcept { - return {strict, hitRate, doom}; + return create(strict, hitRate, doom, true); + } + static ExecuteInfo create(bool strict, float hitRate, const vespalib::Doom * doom, bool postinglist_when_non_strict) noexcept { + return {strict, hitRate, doom, postinglist_when_non_strict}; } static ExecuteInfo createForTest(bool strict) noexcept { return createForTest(strict, 1.0F); @@ -32,14 +39,16 @@ public: return create(strict, hitRate, nullptr); } private: - ExecuteInfo(bool strict, float hitRate_in, const vespalib::Doom * doom) noexcept + ExecuteInfo(bool strict, float hitRate_in, const vespalib::Doom * doom, bool postinglist_when_non_strict) noexcept : _doom(doom), _hitRate(hitRate_in), - _strict(strict) + _strict(strict), + _create_postinglist_when_non_strict(postinglist_when_non_strict) { } const vespalib::Doom * _doom; float _hitRate; bool _strict; + bool _create_postinglist_when_non_strict; }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp index 5c795679d48..c93cef47c27 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp @@ -63,7 +63,7 @@ SameElementBlueprint::fetchPostings(const ExecuteInfo &execInfo) double hit_rate = execInfo.hitRate() * _terms[0]->hit_ratio(); for (size_t i = 1; i < _terms.size(); ++i) { Blueprint & term = *_terms[i]; - term.fetchPostings(ExecuteInfo::create(false, hit_rate, execInfo.getDoom())); + term.fetchPostings(ExecuteInfo::create(false, hit_rate, execInfo)); hit_rate = hit_rate * term.hit_ratio(); } } -- cgit v1.2.3