diff options
author | Geir Storli <geirst@vespa.ai> | 2024-05-14 15:46:57 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-14 15:46:57 +0200 |
commit | c31ecc4676ac4723a9ec79d5daaac28338d6e2ac (patch) | |
tree | db5ab3e67628b0723af1b0a19de6c0b377bbd902 | |
parent | 7b08c66caac7d0fe336130d50bff5f14d0c686b2 (diff) | |
parent | abeb06cbdc28970d2c71b12487462abfcd611ccb (diff) |
Merge pull request #31201 from vespa-engine/havardpe/remove-actual-cost-estimates
remove code to estimate actual cost
6 files changed, 14 insertions, 107 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index 96472200952..db0fe76b7af 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -26,6 +26,18 @@ using vespalib::make_string_short::fmt; const vespalib::string field_name = "myfield"; double budget_sec = 1.0; +double estimate_actual_cost(Blueprint &bp, InFlow in_flow) { + if (in_flow.strict()) { + assert(bp.strict()); + return bp.strict_cost(); + } else if (bp.strict()) { + auto stats = FlowStats::from(flow::DefaultAdapter(), &bp); + return flow::forced_strict_cost(stats, in_flow.rate()); + } else { + return bp.cost() * in_flow.rate(); + } +} + enum class PlanningAlgo { Order, Estimate, @@ -236,7 +248,7 @@ strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, Planning timer.after(); } FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost()); - double actual_cost = ctx.blueprint->estimate_actual_cost(InFlow(true)); + double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(true)); return {timer.min_time() * 1000.0, hits + 1, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)}; } @@ -270,7 +282,7 @@ non_strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, doub timer.after(); } FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost()); - double actual_cost = ctx.blueprint->estimate_actual_cost(InFlow(filter_hit_ratio)); + double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(filter_hit_ratio)); return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)}; } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index cfa165be067..412a5973ad8 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -169,31 +169,6 @@ Blueprint::null_plan(InFlow in_flow, uint32_t docid_limit) sort(in_flow); } -double -Blueprint::estimate_actual_cost(InFlow in_flow) const noexcept -{ - double res = estimate_strict_cost_diff(in_flow); - if (in_flow.strict()) { - res += strict_cost(); - } else { - res += in_flow.rate() * cost(); - } - return res; -} - -double -Blueprint::estimate_strict_cost_diff(InFlow &in_flow) const noexcept -{ - if (in_flow.strict()) { - REQUIRE(strict()); - } else if (strict()) { - double rate = in_flow.rate(); - in_flow.force_strict(); - return flow::strict_cost_diff(estimate(), rate); - } - return 0.0; -} - Blueprint::UP Blueprint::optimize(Blueprint::UP bp) { Blueprint *root = bp.release(); @@ -624,24 +599,6 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double return (count_termwise_nodes(unpack) > 1); } -double -IntermediateBlueprint::estimate_self_cost(InFlow) const noexcept -{ - return 0.0; -} - -double -IntermediateBlueprint::estimate_actual_cost(InFlow in_flow) const noexcept -{ - double res = estimate_strict_cost_diff(in_flow); - auto cost_of = [](const auto &child, InFlow child_flow)noexcept{ - return child->estimate_actual_cost(child_flow); - }; - res += flow::actual_cost_of(flow::DefaultAdapter(), _children, my_flow(in_flow), cost_of); - res += estimate_self_cost(in_flow); - return res; -} - void IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) { diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index a493c725407..a443f34f856 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -313,20 +313,6 @@ public: // optimal ordering. Used for testing. void null_plan(InFlow in_flow, uint32_t docid_limit); - // Estimate the actual cost of evaluating the (sub-)query - // represented by this blueprint with the given in-flow. This - // function should be called after query planning has been - // performed. This function could be useful to predict very - // expensive queries, but the initial use-case is to understand - // query cost better in micro-benchmarks to improve low-level cost - // tuning. - virtual double estimate_actual_cost(InFlow in_flow) const noexcept; - // Estimate the change in cost caused by having a strict iterator - // with a non-strict in-flow. Note that this function might force - // the in_flow to be strict in order to align it with the - // strictness of this blueprint. - double estimate_strict_cost_diff(InFlow &in_flow) const noexcept; - static Blueprint::UP optimize(Blueprint::UP bp); virtual void sort(InFlow in_flow) = 0; static Blueprint::UP optimize_and_sort(Blueprint::UP bp, InFlow in_flow, const Options &opts) { @@ -496,9 +482,6 @@ public: void setDocIdLimit(uint32_t limit) noexcept final; void each_node_post_order(const std::function<void(Blueprint&)> &f) override; - // additional cost not attributed to the children flow (heap merge/unpack/etc) - virtual double estimate_self_cost(InFlow in_flow) const noexcept; - double estimate_actual_cost(InFlow in_flow) const noexcept override; void optimize(Blueprint* &self, OptimizePass pass) final; void sort(InFlow in_flow) override; void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override; diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index be7b9031c00..b7841dc2017 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -204,16 +204,6 @@ double ordered_cost_of(ADAPTER adapter, const T &children, F flow, bool allow_fo return total_cost; } -static double actual_cost_of(auto adapter, const auto &children, auto flow, auto cost_of) noexcept { - double total_cost = 0.0; - for (const auto &child: children) { - double child_cost = cost_of(child, InFlow(flow.strict(), flow.flow())); - flow.update_cost(total_cost, child_cost); - flow.add(adapter.estimate(child)); - } - return total_cost; -} - auto select_strict_and_child(auto adapter, const auto &children, size_t first, double est, bool native_strict) { double cost = 0.0; size_t best_idx = first; diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index b0b3b302e82..5b8fa79b8af 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -318,11 +318,6 @@ OrBlueprint::calculate_flow_stats(uint32_t) const { OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())}; } -double -OrBlueprint::estimate_self_cost(InFlow in_flow) const noexcept { - return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0; -} - Blueprint::HitEstimate OrBlueprint::combine(const std::vector<HitEstimate> &data) const { @@ -436,11 +431,6 @@ WeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const { OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())}; } -double -WeakAndBlueprint::estimate_self_cost(InFlow in_flow) const noexcept { - return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0; -} - Blueprint::HitEstimate WeakAndBlueprint::combine(const std::vector<HitEstimate> &data) const { @@ -519,11 +509,6 @@ NearBlueprint::calculate_flow_stats(uint32_t) const { AndFlow::cost_of(get_children(), true) + childCnt() * est}; } -double -NearBlueprint::estimate_self_cost(InFlow) const noexcept { - return childCnt() * estimate(); -} - Blueprint::HitEstimate NearBlueprint::combine(const std::vector<HitEstimate> &data) const { @@ -589,11 +574,6 @@ ONearBlueprint::calculate_flow_stats(uint32_t) const { AndFlow::cost_of(get_children(), true) + childCnt() * est}; } -double -ONearBlueprint::estimate_self_cost(InFlow) const noexcept { - return childCnt() * estimate(); -} - Blueprint::HitEstimate ONearBlueprint::combine(const std::vector<HitEstimate> &data) const { @@ -762,16 +742,6 @@ SourceBlenderBlueprint::calculate_flow_stats(uint32_t) const { return {my_est, my_cost + 1.0, my_strict_cost + my_est}; } -double -SourceBlenderBlueprint::estimate_self_cost(InFlow in_flow) const noexcept -{ - if (in_flow.strict()) { - return estimate(); - } else { - return in_flow.rate(); - } -} - Blueprint::HitEstimate SourceBlenderBlueprint::combine(const std::vector<HitEstimate> &data) const { diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index f7eeace3e8b..913370caae1 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -67,7 +67,6 @@ public: ~OrBlueprint() override; bool supports_termwise_children() const override { return true; } FlowStats calculate_flow_stats(uint32_t docid_limit) const final; - double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void optimize_self(OptimizePass pass) override; @@ -96,7 +95,6 @@ private: AnyFlow my_flow(InFlow in_flow) const override; public: FlowStats calculate_flow_stats(uint32_t docid_limit) const final; - double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; Blueprint::UP get_replacement() override; @@ -129,7 +127,6 @@ private: AnyFlow my_flow(InFlow in_flow) const override; public: FlowStats calculate_flow_stats(uint32_t docid_limit) const final; - double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, InFlow in_flow) const override; @@ -152,7 +149,6 @@ private: AnyFlow my_flow(InFlow in_flow) const override; public: FlowStats calculate_flow_stats(uint32_t docid_limit) const final; - double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, InFlow in_flow) const override; @@ -201,7 +197,6 @@ public: explicit SourceBlenderBlueprint(const ISourceSelector &selector) noexcept; ~SourceBlenderBlueprint() override; FlowStats calculate_flow_stats(uint32_t docid_limit) const final; - double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, InFlow in_flow) const override; |