diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-06 13:19:03 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-06 13:19:03 +0100 |
commit | 60ff007d5e7fedeecb7cc242cd363e2575ee8578 (patch) | |
tree | 11da251037100bad147a1b557ceb0357c40ef302 | |
parent | cc5a184caf737bf1fa97573ae7e15c2361396ba4 (diff) | |
parent | 281724ecc6edf458ba14803a7ec254e592fec6ea (diff) |
Merge pull request #29565 from vespa-engine/balder/add-use-estimate-for-fetch-postings-flag
Balder/add use estimate for fetch postings flag
20 files changed, 94 insertions, 37 deletions
diff --git a/config-model-api/abi-spec.json b/config-model-api/abi-spec.json index 81834c5e0cb..8f5d0d37c21 100644 --- a/config-model-api/abi-spec.json +++ b/config-model-api/abi-spec.json @@ -1288,7 +1288,8 @@ "public long mergingMaxMemoryUsagePerNode()", "public boolean usePerDocumentThrottledDeleteBucket()", "public boolean alwaysMarkPhraseExpensive()", - "public boolean createPostinglistWhenNonStrict()" + "public boolean createPostinglistWhenNonStrict()", + "public boolean useEstimateForFetchPostings()" ], "fields" : [ ] }, diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index cb394054510..f34f63a0cfc 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -117,6 +117,7 @@ public interface ModelContext { @ModelFeatureFlag(owners = {"vekterli"}) default boolean usePerDocumentThrottledDeleteBucket() { return false; } @ModelFeatureFlag(owners = {"baldersheim"}) default boolean alwaysMarkPhraseExpensive() { return false; } @ModelFeatureFlag(owners = {"baldersheim"}) default boolean createPostinglistWhenNonStrict() { return true; } + @ModelFeatureFlag(owners = {"baldersheim"}) default boolean useEstimateForFetchPostings() { return false; } } /** Warning: As elsewhere in this package, do not make backwards incompatible changes that will break old config models! */ diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java index 411e754f3ef..8090344ea8c 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java @@ -172,6 +172,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer { private final double rankScoreDropLimit; private final boolean alwaysMarkPhraseExpensive; private final boolean createPostinglistWhenNonStrict; + private final boolean useEstimateForFetchPostings; /** * The rank type definitions used to derive settings for the native rank features @@ -215,6 +216,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer { termwiseLimit = compiled.getTermwiseLimit().orElse(deployProperties.featureFlags().defaultTermwiseLimit()); alwaysMarkPhraseExpensive = deployProperties.featureFlags().alwaysMarkPhraseExpensive(); createPostinglistWhenNonStrict = deployProperties.featureFlags().createPostinglistWhenNonStrict(); + useEstimateForFetchPostings = deployProperties.featureFlags().useEstimateForFetchPostings(); postFilterThreshold = compiled.getPostFilterThreshold(); approximateThreshold = compiled.getApproximateThreshold(); targetHitsMaxAdjustmentFactor = compiled.getTargetHitsMaxAdjustmentFactor(); @@ -471,6 +473,9 @@ public class RawRankProfile implements RankProfilesConfig.Producer { if ( ! createPostinglistWhenNonStrict) { properties.add(new Pair<>("vespa.matching.create_postinglist_when_non_strict", String.valueOf(createPostinglistWhenNonStrict))); } + if (useEstimateForFetchPostings) { + properties.add(new Pair<>("vespa.matching.use_estimate_for_fetch_postings", String.valueOf(useEstimateForFetchPostings))); + } if (postFilterThreshold.isPresent()) { properties.add(new Pair<>("vespa.matching.global_filter.upper_limit", String.valueOf(postFilterThreshold.getAsDouble()))); } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index 27c24c5d6ed..2f126cd84d3 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -203,6 +203,7 @@ public class ModelContextImpl implements ModelContext { private final String summaryDecodePolicy; private final boolean alwaysMarkPhraseExpensive; private final boolean createPostinglistWhenNonStrict; + private final boolean useEstimateForFetchPostings; private final int contentLayerMetadataFeatureLevel; private final boolean dynamicHeapSize; private final String unknownConfigDefinition; @@ -254,6 +255,7 @@ public class ModelContextImpl implements ModelContext { this.usePerDocumentThrottledDeleteBucket = flagValue(source, appId, version, Flags.USE_PER_DOCUMENT_THROTTLED_DELETE_BUCKET); this.alwaysMarkPhraseExpensive = flagValue(source, appId, version, Flags.ALWAYS_MARK_PHRASE_EXPENSIVE); this.createPostinglistWhenNonStrict = flagValue(source, appId, version, Flags.CREATE_POSTINGLIST_WHEN_NON_STRICT); + this.useEstimateForFetchPostings = flagValue(source, appId, version, Flags.USE_ESTIMATE_FOR_FETCH_POSTINGS); } @Override public int heapSizePercentage() { return heapPercentage; } @@ -301,6 +303,7 @@ public class ModelContextImpl implements ModelContext { } @Override public boolean alwaysMarkPhraseExpensive() { return alwaysMarkPhraseExpensive; } @Override public boolean createPostinglistWhenNonStrict() { return createPostinglistWhenNonStrict; } + @Override public boolean useEstimateForFetchPostings() { return useEstimateForFetchPostings; } @Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; } @Override public boolean dynamicHeapSize() { return dynamicHeapSize; } @Override public String unknownConfigDefinition() { return unknownConfigDefinition; } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 5e395f04956..5af7c82a8c4 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -334,6 +334,13 @@ public class Flags { "Takes effect at redeployment", INSTANCE_ID); + public static final UnboundBooleanFlag USE_ESTIMATE_FOR_FETCH_POSTINGS = defineFeatureFlag( + "use-estimate-for-fetch-postings", false, + List.of("baldersheim"), "2023-12-06", "2024-01-31", + "If true an estimate assuming uncorrelated query terms is used in Blueprint::fetchPostings", + "Takes effect at redeployment", + INSTANCE_ID); + public static final UnboundBooleanFlag WRITE_CONFIG_SERVER_SESSION_DATA_AS_ONE_BLOB = defineFeatureFlag( "write-config-server-session-data-as-blob", false, List.of("hmusum"), "2023-07-19", "2024-02-01", diff --git a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp index 349d33e03e9..d96d202fd4b 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.cpp @@ -99,7 +99,8 @@ AttributeLimiter::create_match_data(size_t want_hits, size_t max_group_size, dou FieldSpecList field; // single field API is protected field.add(FieldSpec(_attribute_name, my_field_id, my_handle)); _blueprint = _searchable_attributes.createBlueprint(_requestContext, field, node); - auto execInfo = ExecuteInfo::create(strictSearch, strictSearch ? 1.0F : hit_rate, &_requestContext.getDoom(), true); + //TODO use_estimate must be switched to true quite soon + auto execInfo = ExecuteInfo::create(strictSearch, strictSearch ? 1.0F : hit_rate, &_requestContext.getDoom(), true, false); _blueprint->fetchPostings(execInfo); _estimatedHits.store(_blueprint->getState().estimate().estHits, std::memory_order_relaxed); _blueprint->freeze(); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index ad1388f0407..7c8608985c3 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -17,6 +17,7 @@ using search::queryeval::IDiversifier; using search::attribute::diversity::DiversityFilter; using search::attribute::BasicType; using search::attribute::AttributeBlueprintParams; +using search::queryeval::ExecuteInfo; using vespalib::Issue; using namespace search::fef::indexproperties::matchphase; @@ -205,13 +206,15 @@ MatchToolsFactory(QueryLimiter & queryLimiter, trace.addEvent(4, "Perform dictionary lookups and posting lists initialization"); float hitRate = std::min(1.0F, float(maxNumHits)/float(searchContext.getDocIdLimit())); bool create_postinglist_when_non_strict = CreatePostingListWhenNonStrict::check(_queryEnv.getProperties(), rankSetup.create_postinglist_when_non_strict()); - _query.fetchPostings(search::queryeval::ExecuteInfo::create(is_search, hitRate, &_requestContext.getDoom(), - create_postinglist_when_non_strict)); + bool use_estimate_for_fetch_postings = UseEstimateForFetchPostings::check(_queryEnv.getProperties(), rankSetup.use_estimate_for_fetch_postings()); + _query.fetchPostings(ExecuteInfo::create(is_search, hitRate, &_requestContext.getDoom(), + create_postinglist_when_non_strict, use_estimate_for_fetch_postings)); if (is_search) { _query.handle_global_filter(_requestContext.getDoom(), searchContext.getDocIdLimit(), _attribute_blueprint_params.global_filter_lower_limit, _attribute_blueprint_params.global_filter_upper_limit, - thread_bundle, trace, create_postinglist_when_non_strict); + thread_bundle, trace, create_postinglist_when_non_strict, + use_estimate_for_fetch_postings); } _query.freeze(); trace.addEvent(5, "Prepare shared state for multi-threaded rank executors"); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index de0a4846615..d9051ac09e7 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -249,7 +249,7 @@ void Query::handle_global_filter(const vespalib::Doom & doom, uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit, vespalib::ThreadBundle &thread_bundle, search::engine::Trace& trace, - bool create_postinglist_when_non_strict) + bool create_postinglist_when_non_strict, bool use_estimate_for_fetch_postings) { if (!handle_global_filter(*_blueprint, docid_limit, global_filter_lower_limit, global_filter_upper_limit, thread_bundle, &trace)) { return; @@ -259,7 +259,7 @@ Query::handle_global_filter(const vespalib::Doom & doom, uint32_t docid_limit, _blueprint = Blueprint::optimize(std::move(_blueprint)); LOG(debug, "blueprint after handle_global_filter:\n%s\n", _blueprint->asString().c_str()); // strictness may change if optimized order changed: - fetchPostings(ExecuteInfo::create(true, 1.0F, &doom, create_postinglist_when_non_strict)); + fetchPostings(ExecuteInfo::create(true, 1.0F, &doom, create_postinglist_when_non_strict, use_estimate_for_fetch_postings)); } bool diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index 3e2c7686066..c3ea2c5e664 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -107,7 +107,7 @@ public: void handle_global_filter(const vespalib::Doom & doom, uint32_t docid_limit, double global_filter_lower_limit, double global_filter_upper_limit, vespalib::ThreadBundle &thread_bundle, search::engine::Trace& trace, - bool create_postinglist_when_non_strict); + bool create_postinglist_when_non_strict, bool use_estimate_for_fetch_postings); /** * Calculates and handles the global filter if needed by the blueprint tree. diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index d503a955afc..637c197b303 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -467,6 +467,12 @@ bool CreatePostingListWhenNonStrict::check(const Properties &props, bool fallbac return lookupBool(props, NAME, fallback); } +const vespalib::string UseEstimateForFetchPostings::NAME("vespa.matching.use_estimate_for_fetch_postings"); +const bool UseEstimateForFetchPostings::DEFAULT_VALUE(false); +bool UseEstimateForFetchPostings::check(const Properties &props, bool fallback) { + return lookupBool(props, NAME, fallback); +} + } // namespace matching namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 7262e599235..16046709d7e 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -357,6 +357,16 @@ namespace matching { static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); } static bool check(const Properties &props, bool fallback); }; + + /** + * When enabled posting lists can be created on the fly even if iterator is not strict. + **/ + struct UseEstimateForFetchPostings { + static const vespalib::string NAME; + static const bool DEFAULT_VALUE; + static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); } + static bool check(const Properties &props, bool fallback); + }; } namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index ed74386b2fa..1f30a3b59e9 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -62,6 +62,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _degradationAscendingOrder(false), _always_mark_phrase_expensive(false), _create_postinglist_when_non_strict(true), + _use_estimate_for_fetch_postings(false), _diversityAttribute(), _diversityMinGroups(1), _diversityCutoffFactor(10.0), @@ -137,6 +138,7 @@ RankSetup::configure() _mutateAllowQueryOverride = mutate::AllowQueryOverride::check(_indexEnv.getProperties()); _always_mark_phrase_expensive = matching::AlwaysMarkPhraseExpensive::check(_indexEnv.getProperties()); _create_postinglist_when_non_strict = matching::CreatePostingListWhenNonStrict::check(_indexEnv.getProperties()); + _use_estimate_for_fetch_postings = matching::UseEstimateForFetchPostings::check(_indexEnv.getProperties()); } void diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index d560614ce39..8e4a6c4246e 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -71,6 +71,7 @@ private: bool _degradationAscendingOrder; bool _always_mark_phrase_expensive; bool _create_postinglist_when_non_strict; + bool _use_estimate_for_fetch_postings; vespalib::string _diversityAttribute; uint32_t _diversityMinGroups; double _diversityCutoffFactor; @@ -224,6 +225,7 @@ public: } bool always_mark_phrase_expensive() const noexcept { return _always_mark_phrase_expensive; } bool create_postinglist_when_non_strict() const noexcept { return _create_postinglist_when_non_strict; } + bool use_estimate_for_fetch_postings() const noexcept { return _use_estimate_for_fetch_postings; } /** get number of hits to collect during graceful degradation in match phase */ uint32_t getDegradationMaxHits() const { return _degradationMaxHits; diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 6bc125226de..1088decb8d6 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -519,10 +519,11 @@ IntermediateBlueprint::calculateState() const } double -IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const +IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const { (void) child; - return hitRate; + (void) use_estimate; + return hit_rate; } bool @@ -625,7 +626,7 @@ IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo) for (size_t i = 0; i < _children.size(); ++i) { Blueprint & child = *_children[i]; child.fetchPostings(ExecuteInfo::create(execInfo.isStrict() && inheritStrict(i), nextHitRate, execInfo)); - nextHitRate = computeNextHitRate(child, nextHitRate); + nextHitRate = computeNextHitRate(child, nextHitRate, execInfo.use_estimate_for_fetch_postings()); } } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index a02cb7dd17f..a61d435ac25 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -113,7 +113,9 @@ public: _estimateHits = est.estHits; _estimateEmpty = est.empty; } + //TODO replace use of estimate by using empty/estHits directly and then have a real estimate here HitEstimate estimate() const noexcept { return {_estimateHits, _estimateEmpty}; } + double hit_ratio(uint32_t docid_limit) const noexcept { uint32_t total_hits = _estimateHits; uint32_t total_docs = std::max(total_hits, docid_limit); @@ -237,6 +239,8 @@ public: const Blueprint &root() const; double hit_ratio() const noexcept { return getState().hit_ratio(_docid_limit); } + // TODO Call getState().estimate() when it return a normalized estimate + double estimate() const noexcept { return getState().hit_ratio(_docid_limit); } virtual void fetchPostings(const ExecuteInfo &execInfo) = 0; virtual void freeze() = 0; @@ -318,7 +322,7 @@ private: bool infer_want_global_filter() const; size_t count_termwise_nodes(const UnpackInfo &unpack) const; - virtual double computeNextHitRate(const Blueprint & child, double hitRate) const; + virtual double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const; protected: // returns an empty collection if children have empty or diff --git a/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp b/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp index 27366a9b924..ac3dfcc3b20 100644 --- a/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp @@ -4,7 +4,7 @@ namespace search::queryeval { -const ExecuteInfo ExecuteInfo::TRUE(true, 1.0, nullptr, true); -const ExecuteInfo ExecuteInfo::FALSE(false, 1.0, nullptr, true); +const ExecuteInfo ExecuteInfo::TRUE(true, 1.0, nullptr, true, true); +const ExecuteInfo ExecuteInfo::FALSE(false, 1.0, nullptr, true, true); } diff --git a/searchlib/src/vespa/searchlib/queryeval/executeinfo.h b/searchlib/src/vespa/searchlib/queryeval/executeinfo.h index 0095429e609..1d6cf7281ea 100644 --- a/searchlib/src/vespa/searchlib/queryeval/executeinfo.h +++ b/searchlib/src/vespa/searchlib/queryeval/executeinfo.h @@ -12,43 +12,50 @@ namespace search::queryeval { */ class ExecuteInfo { public: - ExecuteInfo() noexcept : ExecuteInfo(false, 1.0F, nullptr, true) { } + ExecuteInfo() noexcept : ExecuteInfo(false, 1.0F, nullptr, true, true) { } bool isStrict() const noexcept { return _strict; } bool create_postinglist_when_non_strict() const noexcept { return _create_postinglist_when_non_strict; } + bool use_estimate_for_fetch_postings() const noexcept { return _use_estimate_for_fetch_postings; } float hitRate() const noexcept { return _hitRate; } bool soft_doom() const noexcept { return _doom && _doom->soft_doom(); } const vespalib::Doom * getDoom() const { return _doom; } static const ExecuteInfo TRUE; static const ExecuteInfo FALSE; static ExecuteInfo create(bool strict, const ExecuteInfo & org) noexcept { - return {strict, org._hitRate, org.getDoom(), org.create_postinglist_when_non_strict()}; + return create(strict, org._hitRate, org); } static ExecuteInfo create(bool strict, float hitRate, const ExecuteInfo & org) noexcept { - return {strict, hitRate, org.getDoom(), org.create_postinglist_when_non_strict()}; + return {strict, hitRate, org.getDoom(), org.create_postinglist_when_non_strict(), org.use_estimate_for_fetch_postings()}; } - static ExecuteInfo create(bool strict, float hitRate, const vespalib::Doom * doom) noexcept { - return create(strict, hitRate, doom, true); - } - static ExecuteInfo create(bool strict, float hitRate, const vespalib::Doom * doom, bool postinglist_when_non_strict) noexcept { - return {strict, hitRate, doom, postinglist_when_non_strict}; + + static ExecuteInfo create(bool strict, float hitRate, const vespalib::Doom * doom, bool postinglist_when_non_strict, + bool use_estimate_for_fetch_postings) noexcept + { + return {strict, hitRate, doom, postinglist_when_non_strict, use_estimate_for_fetch_postings}; } static ExecuteInfo createForTest(bool strict) noexcept { return createForTest(strict, 1.0F); } static ExecuteInfo createForTest(bool strict, float hitRate) noexcept { - return create(strict, hitRate, nullptr); + return createForTest(strict, hitRate, nullptr); + } + static ExecuteInfo createForTest(bool strict, float hitRate, const vespalib::Doom * doom) noexcept { + return create(strict, hitRate, doom, true, true); } private: - ExecuteInfo(bool strict, float hitRate_in, const vespalib::Doom * doom, bool postinglist_when_non_strict) noexcept + ExecuteInfo(bool strict, float hitRate_in, const vespalib::Doom * doom, bool postinglist_when_non_strict, + bool use_estimate_for_fetch_postings) noexcept : _doom(doom), _hitRate(hitRate_in), _strict(strict), - _create_postinglist_when_non_strict(postinglist_when_non_strict) + _create_postinglist_when_non_strict(postinglist_when_non_strict), + _use_estimate_for_fetch_postings(use_estimate_for_fetch_postings) { } const vespalib::Doom * _doom; float _hitRate; bool _strict; bool _create_postinglist_when_non_strict; + bool _use_estimate_for_fetch_postings; }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index 4d0656b421c..c4044ba3d00 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -270,19 +270,21 @@ AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const } double -AndBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const { - return hitRate * child.hit_ratio(); +AndBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const { + double estimate = use_estimate ? child.estimate() : child.hit_ratio(); + return hit_rate * estimate; } double -OrBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const { +OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const { // Avoid dropping hitRate to zero when meeting a conservatively high hitrate in a child. // Happens at least when using non fast-search attributes, and with AND nodes. constexpr double MIN_INVERSE_HIT_RATIO = 0.10; - double inverse_child_hit_ratio = 1.0 - child.hit_ratio(); - return (inverse_child_hit_ratio > MIN_INVERSE_HIT_RATIO) - ? hitRate * inverse_child_hit_ratio - : hitRate; + double estimate = use_estimate ? child.estimate() : child.hit_ratio(); + double inverse_child_estimate = 1.0 - estimate; + return (inverse_child_estimate > MIN_INVERSE_HIT_RATIO) + ? hit_rate * inverse_child_estimate + : hit_rate; } //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 6d8082b60f6..1d88b3b21eb 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -54,7 +54,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - double computeNextHitRate(const Blueprint & child, double hitRate) const override; + double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const override; }; //----------------------------------------------------------------------------- @@ -78,7 +78,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - double computeNextHitRate(const Blueprint & child, double hitRate) const override; + double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const override; uint8_t calculate_cost_tier() const override; }; diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp index c93cef47c27..eb1d217ad6a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp @@ -28,7 +28,7 @@ SameElementBlueprint::~SameElementBlueprint() = default; FieldSpec SameElementBlueprint::getNextChildField(const vespalib::string &field_name, uint32_t field_id) { - return FieldSpec(field_name, field_id, _layout.allocTermField(field_id), false); + return {field_name, field_id, _layout.allocTermField(field_id), false}; } void @@ -60,11 +60,13 @@ SameElementBlueprint::fetchPostings(const ExecuteInfo &execInfo) { if (_terms.empty()) return; _terms[0]->fetchPostings(execInfo); - double hit_rate = execInfo.hitRate() * _terms[0]->hit_ratio(); + double estimate = execInfo.use_estimate_for_fetch_postings() ? _terms[0]->hit_ratio() : _terms[0]->estimate(); + double hit_rate = execInfo.hitRate() * estimate; for (size_t i = 1; i < _terms.size(); ++i) { Blueprint & term = *_terms[i]; term.fetchPostings(ExecuteInfo::create(false, hit_rate, execInfo)); - hit_rate = hit_rate * term.hit_ratio(); + estimate = execInfo.use_estimate_for_fetch_postings() ? _terms[0]->hit_ratio() : _terms[0]->estimate(); + hit_rate = hit_rate * estimate; } } |