aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2024-04-19 10:26:38 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2024-04-25 15:14:14 +0000
commitfc17214017585ec294e1abccaa79eb0909eddaec (patch)
tree4932c2c6059dccb023dbac9e76ec9d2df0b39daf
parent3809a09b0e33ea2203f5165430631d4f1aa74adc (diff)
estimate actual cost
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp43
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h17
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.h25
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow_tuning.h7
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h4
6 files changed, 109 insertions, 7 deletions
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index d11ee25a7e5..7334db4b716 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -168,6 +168,31 @@ Blueprint::null_plan(InFlow in_flow, uint32_t docid_limit)
sort(in_flow);
}
+double
+Blueprint::estimate_actual_cost(InFlow in_flow) const noexcept
+{
+ double res = estimate_strict_cost_diff(in_flow);
+ if (in_flow.strict()) {
+ res += strict_cost();
+ } else {
+ res += in_flow.rate() * cost();
+ }
+ return res;
+}
+
+double
+Blueprint::estimate_strict_cost_diff(InFlow &in_flow) const noexcept
+{
+ if (in_flow.strict()) {
+ REQUIRE(strict());
+ } else if (strict()) {
+ double rate = in_flow.rate();
+ in_flow.force_strict();
+ return flow::strict_cost_diff(estimate(), rate);
+ }
+ return 0.0;
+}
+
Blueprint::UP
Blueprint::optimize(Blueprint::UP bp) {
Blueprint *root = bp.release();
@@ -598,6 +623,24 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double
return (count_termwise_nodes(unpack) > 1);
}
+double
+IntermediateBlueprint::estimate_self_cost(InFlow) const noexcept
+{
+ return 0.0;
+}
+
+double
+IntermediateBlueprint::estimate_actual_cost(InFlow in_flow) const noexcept
+{
+ double res = estimate_strict_cost_diff(in_flow);
+ auto cost_of = [](const auto &child, InFlow child_flow)noexcept{
+ return child->estimate_actual_cost(child_flow);
+ };
+ res += flow::actual_cost_of(flow::DefaultAdapter(), _children, my_flow(in_flow), cost_of);
+ res += estimate_self_cost(in_flow);
+ return res;
+}
+
void
IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass)
{
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index a443f34f856..a493c725407 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -313,6 +313,20 @@ public:
// optimal ordering. Used for testing.
void null_plan(InFlow in_flow, uint32_t docid_limit);
+ // Estimate the actual cost of evaluating the (sub-)query
+ // represented by this blueprint with the given in-flow. This
+ // function should be called after query planning has been
+ // performed. This function could be useful to predict very
+ // expensive queries, but the initial use-case is to understand
+ // query cost better in micro-benchmarks to improve low-level cost
+ // tuning.
+ virtual double estimate_actual_cost(InFlow in_flow) const noexcept;
+ // Estimate the change in cost caused by having a strict iterator
+ // with a non-strict in-flow. Note that this function might force
+ // the in_flow to be strict in order to align it with the
+ // strictness of this blueprint.
+ double estimate_strict_cost_diff(InFlow &in_flow) const noexcept;
+
static Blueprint::UP optimize(Blueprint::UP bp);
virtual void sort(InFlow in_flow) = 0;
static Blueprint::UP optimize_and_sort(Blueprint::UP bp, InFlow in_flow, const Options &opts) {
@@ -482,6 +496,9 @@ public:
void setDocIdLimit(uint32_t limit) noexcept final;
void each_node_post_order(const std::function<void(Blueprint&)> &f) override;
+ // additional cost not attributed to the children flow (heap merge/unpack/etc)
+ virtual double estimate_self_cost(InFlow in_flow) const noexcept;
+ double estimate_actual_cost(InFlow in_flow) const noexcept override;
void optimize(Blueprint* &self, OptimizePass pass) final;
void sort(InFlow in_flow) override;
void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index 0dc92a6cf88..be7b9031c00 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -122,9 +122,18 @@ struct MinOrCost {
}
};
+// The difference in cost of doing 'after' seeks instead of 'before'
+// seeks against a collection of strict iterators. This formula is
+// used to estimate the cost of forcing an iterator to be strict in a
+// non-strict context as well as calculating the change in cost when
+// changing the order of strict iterators.
+inline double strict_cost_diff(double before, double after) {
+ return 0.2 * (after - before);
+}
+
// estimate the cost of evaluating a strict child in a non-strict context
inline double forced_strict_cost(const FlowStats &stats, double rate) {
- return 0.2 * (rate - stats.estimate) + stats.strict_cost;
+ return stats.strict_cost + strict_cost_diff(stats.estimate, rate);
}
// would it be faster to force a non-strict child to be strict
@@ -195,6 +204,16 @@ double ordered_cost_of(ADAPTER adapter, const T &children, F flow, bool allow_fo
return total_cost;
}
+static double actual_cost_of(auto adapter, const auto &children, auto flow, auto cost_of) noexcept {
+ double total_cost = 0.0;
+ for (const auto &child: children) {
+ double child_cost = cost_of(child, InFlow(flow.strict(), flow.flow()));
+ flow.update_cost(total_cost, child_cost);
+ flow.add(adapter.estimate(child));
+ }
+ return total_cost;
+}
+
auto select_strict_and_child(auto adapter, const auto &children, size_t first, double est, bool native_strict) {
double cost = 0.0;
size_t best_idx = first;
@@ -218,10 +237,10 @@ auto select_strict_and_child(auto adapter, const auto &children, size_t first, d
break;
}
target = candidate;
- my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
+ my_diff += strict_cost_diff(other.estimate, child.estimate);
if (candidate == 0 && native_strict) {
// the first iterator produces its own in-flow
- my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
+ my_diff += strict_cost_diff(other.estimate, child.estimate);
}
// note that 'my_diff' might overestimate the cost
// (underestimate the benefit) of inserting 'child' before
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index e8a58bba9dc..ba066358a26 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -3,6 +3,7 @@
#pragma once
#include <cmath>
#include <cstddef>
+#include "flow.h"
namespace search::queryeval::flow {
@@ -63,17 +64,15 @@ inline double lookup_strict_cost(size_t num_indirections) {
* Estimates the cost of evaluating an always strict iterator (e.g. btree) in a non-strict context.
*
* When the estimate and strict cost is low, this models the cost of checking whether
- * the seek docid matches the docid the iterator is already positioned at (the 0.2 factor).
+ * the seek docid matches the docid the iterator is already positioned at.
*
* The resulting non-strict cost is most accurate when the inflow is 1.0.
* The resulting non-strict cost is highly underestimated when the inflow goes to 0.0.
* It is important to have a better estimate at higher inflows,
* as the latency (time) penalty is higher if choosing wrong.
- *
- * Note: This formula is equal to forced_strict_cost() in flow.h.
*/
inline double non_strict_cost_of_strict_iterator(double estimate, double strict_cost) {
- return 0.2 * (1.0 - estimate) + strict_cost;
+ return strict_cost + strict_cost_diff(estimate, 1.0);
}
// Strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field).
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 2a0f1de0e44..33b249572f0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -318,6 +318,11 @@ OrBlueprint::calculate_flow_stats(uint32_t) const {
OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())};
}
+double
+OrBlueprint::estimate_self_cost(InFlow in_flow) const noexcept {
+ return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0;
+}
+
Blueprint::HitEstimate
OrBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -431,6 +436,11 @@ WeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const {
OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())};
}
+double
+WeakAndBlueprint::estimate_self_cost(InFlow in_flow) const noexcept {
+ return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0;
+}
+
Blueprint::HitEstimate
WeakAndBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -507,6 +517,11 @@ NearBlueprint::calculate_flow_stats(uint32_t) const {
AndFlow::cost_of(get_children(), true) + childCnt() * est};
}
+double
+NearBlueprint::estimate_self_cost(InFlow) const noexcept {
+ return childCnt() * estimate();
+}
+
Blueprint::HitEstimate
NearBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -572,6 +587,11 @@ ONearBlueprint::calculate_flow_stats(uint32_t) const {
AndFlow::cost_of(get_children(), true) + childCnt() * est};
}
+double
+ONearBlueprint::estimate_self_cost(InFlow) const noexcept {
+ return childCnt() * estimate();
+}
+
Blueprint::HitEstimate
ONearBlueprint::combine(const std::vector<HitEstimate> &data) const
{
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index dfed40f4d1b..ade4c9318e4 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -67,6 +67,7 @@ public:
~OrBlueprint() override;
bool supports_termwise_children() const override { return true; }
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
+ double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
void optimize_self(OptimizePass pass) override;
@@ -94,6 +95,7 @@ private:
AnyFlow my_flow(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
+ double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
Blueprint::UP get_replacement() override;
@@ -125,6 +127,7 @@ private:
AnyFlow my_flow(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
+ double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
void sort(Children &children, InFlow in_flow) const override;
@@ -147,6 +150,7 @@ private:
AnyFlow my_flow(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
+ double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
void sort(Children &children, InFlow in_flow) const override;