diff options
9 files changed, 115 insertions, 208 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp index 9e9199bc8ba..5a58aa869e0 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp @@ -13,6 +13,7 @@ LOG_SETUP(".proton.documentmetastore.lid_allocator"); using search::fef::TermFieldMatchDataArray; using search::queryeval::Blueprint; +using search::queryeval::FlowStats; using search::queryeval::FullSearch; using search::queryeval::SearchIterator; using search::queryeval::SimpleLeafBlueprint; @@ -222,8 +223,9 @@ public: setEstimate(HitEstimate(_activeLids.size(), false)); } - double calculate_relative_estimate() const final { - return abs_to_rel_est(getState().estimate().estHits, get_docid_limit()); + FlowStats calculate_flow_stats(uint32_t docid_limit) const override { + auto est = abs_to_rel_est(getState().estimate().estHits, docid_limit); + return {est, 1.0, est}; } bool isWhiteList() const noexcept final { return true; } diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index 90452f1d12b..51164427690 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -23,14 +23,10 @@ class MyOr : public IntermediateBlueprint { private: public: - double calculate_relative_estimate() const final { - return OrFlow::estimate_of(get_children()); - } - double calculate_cost() const final { - return OrFlow::cost_of(get_children(), false); - } - double calculate_strict_cost() const final { - return OrFlow::cost_of(get_children(), true); + FlowStats calculate_flow_stats(uint32_t) const final { + return {OrFlow::estimate_of(get_children()), + OrFlow::cost_of(get_children(), false), + OrFlow::cost_of(get_children(), true)}; } HitEstimate combine(const std::vector<HitEstimate> &data) const override { return max(data); @@ -798,7 +794,7 @@ TEST("requireThatDocIdLimitInjectionWorks") TEST("Control object sizes") { EXPECT_EQUAL(32u, sizeof(Blueprint::State)); EXPECT_EQUAL(56u, sizeof(Blueprint)); - EXPECT_EQUAL(96u, sizeof(LeafBlueprint)); + EXPECT_EQUAL(88u, sizeof(LeafBlueprint)); } TEST_MAIN() { diff --git a/searchlib/src/tests/queryeval/blueprint/mysearch.h b/searchlib/src/tests/queryeval/blueprint/mysearch.h index 6eb27364c2b..79c9885bb7d 100644 --- a/searchlib/src/tests/queryeval/blueprint/mysearch.h +++ b/searchlib/src/tests/queryeval/blueprint/mysearch.h @@ -117,8 +117,15 @@ public: MyLeaf() : SimpleLeafBlueprint() {} MyLeaf(FieldSpecBaseList fields) : SimpleLeafBlueprint(std::move(fields)) {} void set_cost(double value) noexcept { _cost = value; } - double calculate_cost() const override { return _cost; } - + FlowStats calculate_flow_stats(uint32_t docid_limit) const override { + double rel_est = abs_to_rel_est(getState().estimate().estHits, docid_limit); + if (rel_est > 0.9) { + return {0.5, _cost, _cost}; + } else { + return {rel_est, _cost, _cost * rel_est}; + } + } + MyLeaf &estimate(uint32_t hits, bool empty = false) { setEstimate(HitEstimate(hits, empty)); return *this; diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index 4fc8922b9a3..ca450c6d712 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -47,9 +47,7 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) { // inherit Blueprint to capture the default filter factory struct DefaultBlueprint : Blueprint { - double calculate_relative_estimate() const override { abort(); } - double calculate_cost() const override { abort(); } - double calculate_strict_cost() const override { abort(); } + FlowStats calculate_flow_stats(uint32_t) const override { abort(); } void optimize(Blueprint* &, OptimizePass) override { abort(); } void sort(bool, bool) override { abort(); } const State &getState() const override { abort(); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 2b9b45c990e..7f34bc82c03 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -118,9 +118,7 @@ Blueprint::State::~State() = default; Blueprint::Blueprint() noexcept : _parent(nullptr), - _relative_estimate(0.0), - _cost(0.0), - _strict_cost(0.0), + _flow_stats(0.0, 0.0, 0.0), _sourceId(0xffffffff), _docid_limit(0), _frozen(false) @@ -357,9 +355,9 @@ Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const visitor.visitInt("tree_size", state.tree_size()); visitor.visitBool("allow_termwise_eval", state.allow_termwise_eval()); visitor.closeStruct(); - visitor.visitFloat("relative_estimate", _relative_estimate); - visitor.visitFloat("cost", _cost); - visitor.visitFloat("strict_cost", _strict_cost); + visitor.visitFloat("relative_estimate", estimate()); + visitor.visitFloat("cost", cost()); + visitor.visitFloat("strict_cost", strict_cost()); visitor.visitInt("sourceId", _sourceId); visitor.visitInt("docid_limit", _docid_limit); } @@ -558,9 +556,7 @@ IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) } optimize_self(pass); if (pass == OptimizePass::LAST) { - set_relative_estimate(calculate_relative_estimate()); - set_cost(calculate_cost()); - set_strict_cost(calculate_strict_cost()); + update_flow_stats(get_docid_limit()); } maybe_eliminate_self(self, get_replacement()); } @@ -718,34 +714,20 @@ IntermediateBlueprint::calculateUnpackInfo(const fef::MatchData & md) const //----------------------------------------------------------------------------- -double -LeafBlueprint::calculate_relative_estimate() const +FlowStats +LeafBlueprint::calculate_flow_stats(uint32_t docid_limit) const { - double rel_est = abs_to_rel_est(_state.estimate().estHits, get_docid_limit()); + double rel_est = abs_to_rel_est(_state.estimate().estHits, docid_limit); if (rel_est > 0.9) { // Assume we do not really know how much we are matching when // we claim to match 'everything'. Also assume we are not able // to skip documents efficiently when strict. - _can_skip = false; - return 0.5; + return {0.5, 1.0, 1.0}; } else { - _can_skip = true; - return rel_est; + return {rel_est, 1.0, rel_est}; } } -double -LeafBlueprint::calculate_cost() const -{ - return 1.0; -} - -double -LeafBlueprint::calculate_strict_cost() const -{ - return _can_skip ? estimate() * cost() : cost(); -} - void LeafBlueprint::fetchPostings(const ExecuteInfo &) { @@ -780,9 +762,7 @@ LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass) assert(self == this); optimize_self(pass); if (pass == OptimizePass::LAST) { - set_relative_estimate(calculate_relative_estimate()); - set_cost(calculate_cost()); - set_strict_cost(calculate_strict_cost()); + update_flow_stats(get_docid_limit()); } maybe_eliminate_self(self, get_replacement()); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index e080d667dfa..20606c713a5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -2,6 +2,7 @@ #pragma once +#include "flow.h" #include "field_spec.h" #include "unpackinfo.h" #include "executeinfo.h" @@ -178,9 +179,7 @@ public: private: Blueprint *_parent; - double _relative_estimate; - double _cost; - double _strict_cost; + FlowStats _flow_stats; uint32_t _sourceId; uint32_t _docid_limit; bool _frozen; @@ -196,10 +195,6 @@ protected: _frozen = true; } - void set_relative_estimate(double value) noexcept { _relative_estimate = value; } - void set_cost(double value) noexcept { _cost = value; } - void set_strict_cost(double value) noexcept { _strict_cost = value; } - public: class IPredicate { public: @@ -259,25 +254,31 @@ public: double hit_ratio() const { return getState().hit_ratio(_docid_limit); } // The flow statistics for a blueprint is calculated during the - // LAST optimize pass (just prior to sorting). The relative - // estimate may be used to calculate the costs and the non-strict - // cost may be used to calculate the strict cost. After being + // LAST optimize pass (just prior to sorting). After being // calculated, each value is available through a simple accessor - // function. Note that these values may not be available for - // blueprints used inside complex leafs (this case will probably - // be solved using custom flow adapters that has knowledge of - // docid limit). + // function. Since the optimize process is performed bottom-up, a + // blueprint can expect all children to already have these values + // calculated when the calculate_flow_stats function is called. + // + // Note that values are not automatically available for blueprints + // used inside complex leafs since they are not part of the tree + // seen by optimize. When the calculate_flow_stats function is + // called on a complex leaf, it can call the update_flow_stats + // function directly (the function that is normally called by + // optimize) on interal blueprints to make these values available + // before using them to calculate its own flow stats. // // 'estimate': relative estimate in the range [0,1] // 'cost': per-document cost of non-strict evaluation // 'strict_cost': per-document cost of strict evaluation - double estimate() const noexcept { return _relative_estimate; } - double cost() const noexcept { return _cost; } - double strict_cost() const noexcept { return _strict_cost; } - virtual double calculate_relative_estimate() const = 0; - virtual double calculate_cost() const = 0; - virtual double calculate_strict_cost() const = 0; - + double estimate() const noexcept { return _flow_stats.estimate; } + double cost() const noexcept { return _flow_stats.cost; } + double strict_cost() const noexcept { return _flow_stats.strict_cost; } + virtual FlowStats calculate_flow_stats(uint32_t docid_limit) const = 0; + void update_flow_stats(uint32_t docid_limit) { + _flow_stats = calculate_flow_stats(docid_limit); + } + virtual void fetchPostings(const ExecuteInfo &execInfo) = 0; virtual void freeze() = 0; bool frozen() const { return _frozen; } @@ -417,7 +418,6 @@ class LeafBlueprint : public Blueprint { private: State _state; - mutable bool _can_skip = true; protected: void optimize(Blueprint* &self, OptimizePass pass) final; void sort(bool strict, bool sort_by_cost) override; @@ -453,9 +453,7 @@ protected: public: ~LeafBlueprint() override = default; const State &getState() const final { return _state; } - double calculate_relative_estimate() const override; - double calculate_cost() const override; - double calculate_strict_cost() const override; + FlowStats calculate_flow_stats(uint32_t docid_limit) const override; void fetchPostings(const ExecuteInfo &execInfo) override; void freeze() final; SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index b90321581b5..01a5922d864 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -10,6 +10,14 @@ namespace search::queryeval { +struct FlowStats { + double estimate; + double cost; + double strict_cost; + constexpr FlowStats(double estimate_in, double cost_in, double strict_cost_in) noexcept + : estimate(estimate_in), cost(cost_in), strict_cost(strict_cost_in) {} +}; + namespace flow { // the default adapter expects the shape of std::unique_ptr<Blueprint> diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index 8cabe189b0e..089351b4ae5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -1,7 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "intermediate_blueprints.h" -#include "flow.h" #include "andnotsearch.h" #include "andsearch.h" #include "orsearch.h" @@ -86,22 +85,12 @@ need_normal_features_for_children(const IntermediateBlueprint &blueprint, fef::M //----------------------------------------------------------------------------- -double -AndNotBlueprint::calculate_relative_estimate() const -{ - return AndNotFlow::estimate_of(get_children()); -} - -double -AndNotBlueprint::calculate_cost() const -{ - return AndNotFlow::cost_of(get_children(), false); -} - -double -AndNotBlueprint::calculate_strict_cost() const +FlowStats +AndNotBlueprint::calculate_flow_stats(uint32_t) const { - return AndNotFlow::cost_of(get_children(), true); + return {AndNotFlow::estimate_of(get_children()), + AndNotFlow::cost_of(get_children(), false), + AndNotFlow::cost_of(get_children(), true)}; } Blueprint::HitEstimate @@ -218,19 +207,11 @@ AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) co //----------------------------------------------------------------------------- -double -AndBlueprint::calculate_relative_estimate() const { - return AndFlow::estimate_of(get_children()); -} - -double -AndBlueprint::calculate_cost() const { - return AndFlow::cost_of(get_children(), false); -} - -double -AndBlueprint::calculate_strict_cost() const { - return AndFlow::cost_of(get_children(), true); +FlowStats +AndBlueprint::calculate_flow_stats(uint32_t) const { + return {AndFlow::estimate_of(get_children()), + AndFlow::cost_of(get_children(), false), + AndFlow::cost_of(get_children(), true)}; } Blueprint::HitEstimate @@ -332,19 +313,11 @@ OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const OrBlueprint::~OrBlueprint() = default; -double -OrBlueprint::calculate_relative_estimate() const { - return OrFlow::estimate_of(get_children()); -} - -double -OrBlueprint::calculate_cost() const { - return OrFlow::cost_of(get_children(), false); -} - -double -OrBlueprint::calculate_strict_cost() const { - return OrFlow::cost_of(get_children(), true); +FlowStats +OrBlueprint::calculate_flow_stats(uint32_t) const { + return {OrFlow::estimate_of(get_children()), + OrFlow::cost_of(get_children(), false), + OrFlow::cost_of(get_children(), true)}; } Blueprint::HitEstimate @@ -442,21 +415,13 @@ OrBlueprint::calculate_cost_tier() const //----------------------------------------------------------------------------- WeakAndBlueprint::~WeakAndBlueprint() = default; -double -WeakAndBlueprint::calculate_relative_estimate() const { +FlowStats +WeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const { double child_est = OrFlow::estimate_of(get_children()); - double my_est = abs_to_rel_est(_n, get_docid_limit()); - return std::min(my_est, child_est); -} - -double -WeakAndBlueprint::calculate_cost() const { - return OrFlow::cost_of(get_children(), false); -} - -double -WeakAndBlueprint::calculate_strict_cost() const { - return OrFlow::cost_of(get_children(), true); + double my_est = abs_to_rel_est(_n, docid_limit); + return {std::min(my_est, child_est), + OrFlow::cost_of(get_children(), false), + OrFlow::cost_of(get_children(), true)}; } Blueprint::HitEstimate @@ -518,19 +483,12 @@ WeakAndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) c //----------------------------------------------------------------------------- -double -NearBlueprint::calculate_relative_estimate() const { - return AndFlow::estimate_of(get_children()); -} - -double -NearBlueprint::calculate_cost() const { - return AndFlow::cost_of(get_children(), false) + childCnt() * estimate(); -} - -double -NearBlueprint::calculate_strict_cost() const { - return AndFlow::cost_of(get_children(), true) + childCnt() * estimate(); +FlowStats +NearBlueprint::calculate_flow_stats(uint32_t) const { + double est = AndFlow::estimate_of(get_children()); + return {est, + AndFlow::cost_of(get_children(), false) + childCnt() * est, + AndFlow::cost_of(get_children(), true) + childCnt() * est}; } Blueprint::HitEstimate @@ -590,19 +548,12 @@ NearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons //----------------------------------------------------------------------------- -double -ONearBlueprint::calculate_relative_estimate() const { - return AndFlow::estimate_of(get_children()); -} - -double -ONearBlueprint::calculate_cost() const { - return AndFlow::cost_of(get_children(), false) + childCnt() * estimate(); -} - -double -ONearBlueprint::calculate_strict_cost() const { - return AndFlow::cost_of(get_children(), true) + childCnt() * estimate(); +FlowStats +ONearBlueprint::calculate_flow_stats(uint32_t) const { + double est = AndFlow::estimate_of(get_children()); + return {est, + AndFlow::cost_of(get_children(), false) + childCnt() * est, + AndFlow::cost_of(get_children(), true) + childCnt() * est}; } Blueprint::HitEstimate @@ -660,19 +611,14 @@ ONearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) con //----------------------------------------------------------------------------- -double -RankBlueprint::calculate_relative_estimate() const { - return (childCnt() == 0) ? 0.0 : getChild(0).estimate(); -} - -double -RankBlueprint::calculate_cost() const { - return (childCnt() == 0) ? 0.0 : getChild(0).cost(); -} - -double -RankBlueprint::calculate_strict_cost() const { - return (childCnt() == 0) ? 0.0 : getChild(0).strict_cost(); +FlowStats +RankBlueprint::calculate_flow_stats(uint32_t) const { + if (childCnt() == 0) { + return {0.0, 0.0, 0.0}; + } + return {getChild(0).estimate(), + getChild(0).cost(), + getChild(0).strict_cost()}; } Blueprint::HitEstimate @@ -766,27 +712,15 @@ SourceBlenderBlueprint::SourceBlenderBlueprint(const ISourceSelector &selector) SourceBlenderBlueprint::~SourceBlenderBlueprint() = default; -double -SourceBlenderBlueprint::calculate_relative_estimate() const { - return OrFlow::estimate_of(get_children()); -} - -double -SourceBlenderBlueprint::calculate_cost() const { +FlowStats +SourceBlenderBlueprint::calculate_flow_stats(uint32_t) const { double my_cost = 0.0; + double my_strict_cost = 0.0; for (const auto &child: get_children()) { my_cost = std::max(my_cost, child->cost()); + my_strict_cost = std::max(my_strict_cost, child->strict_cost()); } - return my_cost; -} - -double -SourceBlenderBlueprint::calculate_strict_cost() const { - double my_cost = 0.0; - for (const auto &child: get_children()) { - my_cost = std::max(my_cost, child->strict_cost()); - } - return my_cost; + return {OrFlow::estimate_of(get_children()), my_cost, my_strict_cost}; } Blueprint::HitEstimate diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 368cbd35c69..1da70b4fa70 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -15,9 +15,7 @@ class AndNotBlueprint : public IntermediateBlueprint { public: bool supports_termwise_children() const override { return true; } - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void optimize_self(OptimizePass pass) override; @@ -44,9 +42,7 @@ class AndBlueprint : public IntermediateBlueprint { public: bool supports_termwise_children() const override { return true; } - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void optimize_self(OptimizePass pass) override; @@ -71,9 +67,7 @@ class OrBlueprint : public IntermediateBlueprint public: ~OrBlueprint() override; bool supports_termwise_children() const override { return true; } - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void optimize_self(OptimizePass pass) override; @@ -100,9 +94,7 @@ private: std::vector<uint32_t> _weights; public: - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, bool strict, bool sort_on_cost) const override; @@ -132,9 +124,7 @@ private: uint32_t _window; public: - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, bool strict, bool sort_by_cost) const override; @@ -156,9 +146,7 @@ private: uint32_t _window; public: - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, bool strict, bool sort_by_cost) const override; @@ -177,9 +165,7 @@ public: class RankBlueprint final : public IntermediateBlueprint { public: - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void optimize_self(OptimizePass pass) override; @@ -207,9 +193,7 @@ private: public: explicit SourceBlenderBlueprint(const ISourceSelector &selector) noexcept; ~SourceBlenderBlueprint() override; - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, bool strict, bool sort_by_cost) const override; |