From fc17214017585ec294e1abccaa79eb0909eddaec Mon Sep 17 00:00:00 2001
From: Håvard Pettersen <havardpe@yahooinc.com>
Date: Fri, 19 Apr 2024 10:26:38 +0000
Subject: estimate actual cost

---
 .../src/vespa/searchlib/queryeval/blueprint.cpp    | 43 ++++++++++++++++++++++
 .../src/vespa/searchlib/queryeval/blueprint.h      | 17 +++++++++
 searchlib/src/vespa/searchlib/queryeval/flow.h     | 25 +++++++++++--
 .../src/vespa/searchlib/queryeval/flow_tuning.h    |  7 ++--
 .../queryeval/intermediate_blueprints.cpp          | 20 ++++++++++
 .../searchlib/queryeval/intermediate_blueprints.h  |  4 ++
 6 files changed, 109 insertions(+), 7 deletions(-)

(limited to 'searchlib')

diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index d11ee25a7e5..7334db4b716 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -168,6 +168,31 @@ Blueprint::null_plan(InFlow in_flow, uint32_t docid_limit)
     sort(in_flow);
 }
 
+double
+Blueprint::estimate_actual_cost(InFlow in_flow) const noexcept
+{
+    double res = estimate_strict_cost_diff(in_flow);
+    if (in_flow.strict()) {
+        res += strict_cost();
+    } else {
+        res += in_flow.rate() * cost();
+    }
+    return res;
+}
+
+double
+Blueprint::estimate_strict_cost_diff(InFlow &in_flow) const noexcept
+{
+    if (in_flow.strict()) {
+        REQUIRE(strict());
+    } else if (strict()) {
+        double rate = in_flow.rate();
+        in_flow.force_strict();
+        return flow::strict_cost_diff(estimate(), rate);
+    }
+    return 0.0;
+}
+
 Blueprint::UP
 Blueprint::optimize(Blueprint::UP bp) {
     Blueprint *root = bp.release();
@@ -598,6 +623,24 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double
     return (count_termwise_nodes(unpack) > 1);
 }
 
+double
+IntermediateBlueprint::estimate_self_cost(InFlow) const noexcept
+{
+    return 0.0;
+}
+
+double
+IntermediateBlueprint::estimate_actual_cost(InFlow in_flow) const noexcept
+{
+    double res = estimate_strict_cost_diff(in_flow);
+    auto cost_of = [](const auto &child, InFlow child_flow)noexcept{
+                       return child->estimate_actual_cost(child_flow);
+                   };
+    res += flow::actual_cost_of(flow::DefaultAdapter(), _children, my_flow(in_flow), cost_of);
+    res += estimate_self_cost(in_flow);
+    return res;
+}
+
 void
 IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass)
 {
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index a443f34f856..a493c725407 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -313,6 +313,20 @@ public:
     // optimal ordering. Used for testing.
     void null_plan(InFlow in_flow, uint32_t docid_limit);
 
+    // Estimate the actual cost of evaluating the (sub-)query
+    // represented by this blueprint with the given in-flow. This
+    // function should be called after query planning has been
+    // performed. This function could be useful to predict very
+    // expensive queries, but the initial use-case is to understand
+    // query cost better in micro-benchmarks to improve low-level cost
+    // tuning.
+    virtual double estimate_actual_cost(InFlow in_flow) const noexcept;
+    // Estimate the change in cost caused by having a strict iterator
+    // with a non-strict in-flow. Note that this function might force
+    // the in_flow to be strict in order to align it with the
+    // strictness of this blueprint.
+    double estimate_strict_cost_diff(InFlow &in_flow) const noexcept;
+
     static Blueprint::UP optimize(Blueprint::UP bp);
     virtual void sort(InFlow in_flow) = 0;
     static Blueprint::UP optimize_and_sort(Blueprint::UP bp, InFlow in_flow, const Options &opts) {
@@ -482,6 +496,9 @@ public:
     void setDocIdLimit(uint32_t limit) noexcept final;
     void each_node_post_order(const std::function<void(Blueprint&)> &f) override;
 
+    // additional cost not attributed to the children flow (heap merge/unpack/etc)
+    virtual double estimate_self_cost(InFlow in_flow) const noexcept;
+    double estimate_actual_cost(InFlow in_flow) const noexcept override;
     void optimize(Blueprint* &self, OptimizePass pass) final;
     void sort(InFlow in_flow) override;
     void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index 0dc92a6cf88..be7b9031c00 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -122,9 +122,18 @@ struct MinOrCost {
     }
 };
 
+// The difference in cost of doing 'after' seeks instead of 'before'
+// seeks against a collection of strict iterators. This formula is
+// used to estimate the cost of forcing an iterator to be strict in a
+// non-strict context as well as calculating the change in cost when
+// changing the order of strict iterators.
+inline double strict_cost_diff(double before, double after) {
+    return 0.2 * (after - before);
+}
+
 // estimate the cost of evaluating a strict child in a non-strict context
 inline double forced_strict_cost(const FlowStats &stats, double rate) {
-    return 0.2 * (rate - stats.estimate) + stats.strict_cost;
+    return stats.strict_cost + strict_cost_diff(stats.estimate, rate);
 }
 
 // would it be faster to force a non-strict child to be strict
@@ -195,6 +204,16 @@ double ordered_cost_of(ADAPTER adapter, const T &children, F flow, bool allow_fo
     return total_cost;
 }
 
+static double actual_cost_of(auto adapter, const auto &children, auto flow, auto cost_of) noexcept {
+    double total_cost = 0.0;
+    for (const auto &child: children) {
+        double child_cost = cost_of(child, InFlow(flow.strict(), flow.flow()));
+        flow.update_cost(total_cost, child_cost);
+        flow.add(adapter.estimate(child));
+    }
+    return total_cost;
+}
+
 auto select_strict_and_child(auto adapter, const auto &children, size_t first, double est, bool native_strict) {
     double cost = 0.0;
     size_t best_idx = first;
@@ -218,10 +237,10 @@ auto select_strict_and_child(auto adapter, const auto &children, size_t first, d
                 break;
             }
             target = candidate;
-            my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
+            my_diff += strict_cost_diff(other.estimate, child.estimate);
             if (candidate == 0 && native_strict) {
                 // the first iterator produces its own in-flow
-                my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
+                my_diff += strict_cost_diff(other.estimate, child.estimate);
             }
             // note that 'my_diff' might overestimate the cost
             // (underestimate the benefit) of inserting 'child' before
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index e8a58bba9dc..ba066358a26 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -3,6 +3,7 @@
 #pragma once
 #include <cmath>
 #include <cstddef>
+#include "flow.h"
 
 namespace search::queryeval::flow {
 
@@ -63,17 +64,15 @@ inline double lookup_strict_cost(size_t num_indirections) {
  * Estimates the cost of evaluating an always strict iterator (e.g. btree) in a non-strict context.
  *
  * When the estimate and strict cost is low, this models the cost of checking whether
- * the seek docid matches the docid the iterator is already positioned at (the 0.2 factor).
+ * the seek docid matches the docid the iterator is already positioned at.
  *
  * The resulting non-strict cost is most accurate when the inflow is 1.0.
  * The resulting non-strict cost is highly underestimated when the inflow goes to 0.0.
  * It is important to have a better estimate at higher inflows,
  * as the latency (time) penalty is higher if choosing wrong.
- *
- * Note: This formula is equal to forced_strict_cost() in flow.h.
  */
 inline double non_strict_cost_of_strict_iterator(double estimate, double strict_cost) {
-    return 0.2 * (1.0 - estimate) + strict_cost;
+    return strict_cost + strict_cost_diff(estimate, 1.0);
 }
 
 // Strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field).
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 2a0f1de0e44..33b249572f0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -318,6 +318,11 @@ OrBlueprint::calculate_flow_stats(uint32_t) const {
             OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())};
 }
 
+double
+OrBlueprint::estimate_self_cost(InFlow in_flow) const noexcept {
+    return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0;
+}
+
 Blueprint::HitEstimate
 OrBlueprint::combine(const std::vector<HitEstimate> &data) const
 {
@@ -431,6 +436,11 @@ WeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const {
             OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())};
 }
 
+double
+WeakAndBlueprint::estimate_self_cost(InFlow in_flow) const noexcept {
+    return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0;
+}
+
 Blueprint::HitEstimate
 WeakAndBlueprint::combine(const std::vector<HitEstimate> &data) const
 {
@@ -507,6 +517,11 @@ NearBlueprint::calculate_flow_stats(uint32_t) const {
             AndFlow::cost_of(get_children(), true) + childCnt() * est};
 }
 
+double
+NearBlueprint::estimate_self_cost(InFlow) const noexcept {
+    return childCnt() * estimate();
+}
+
 Blueprint::HitEstimate
 NearBlueprint::combine(const std::vector<HitEstimate> &data) const
 {
@@ -572,6 +587,11 @@ ONearBlueprint::calculate_flow_stats(uint32_t) const {
             AndFlow::cost_of(get_children(), true) + childCnt() * est};
 }
 
+double
+ONearBlueprint::estimate_self_cost(InFlow) const noexcept {
+    return childCnt() * estimate();
+}
+
 Blueprint::HitEstimate
 ONearBlueprint::combine(const std::vector<HitEstimate> &data) const
 {
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index dfed40f4d1b..ade4c9318e4 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -67,6 +67,7 @@ public:
     ~OrBlueprint() override;
     bool supports_termwise_children() const override { return true; }
     FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
+    double estimate_self_cost(InFlow in_flow) const noexcept override;
     HitEstimate combine(const std::vector<HitEstimate> &data) const override;
     FieldSpecBaseList exposeFields() const override;
     void optimize_self(OptimizePass pass) override;
@@ -94,6 +95,7 @@ private:
     AnyFlow my_flow(InFlow in_flow) const override;
 public:
     FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
+    double estimate_self_cost(InFlow in_flow) const noexcept override;
     HitEstimate combine(const std::vector<HitEstimate> &data) const override;
     FieldSpecBaseList exposeFields() const override;
     Blueprint::UP get_replacement() override;
@@ -125,6 +127,7 @@ private:
     AnyFlow my_flow(InFlow in_flow) const override;
 public:
     FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
+    double estimate_self_cost(InFlow in_flow) const noexcept override;
     HitEstimate combine(const std::vector<HitEstimate> &data) const override;
     FieldSpecBaseList exposeFields() const override;
     void sort(Children &children, InFlow in_flow) const override;
@@ -147,6 +150,7 @@ private:
     AnyFlow my_flow(InFlow in_flow) const override;
 public:
     FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
+    double estimate_self_cost(InFlow in_flow) const noexcept override;
     HitEstimate combine(const std::vector<HitEstimate> &data) const override;
     FieldSpecBaseList exposeFields() const override;
     void sort(Children &children, InFlow in_flow) const override;
-- 
cgit v1.2.3