summaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/queryeval/flow.h
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/vespa/searchlib/queryeval/flow.h')
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.h231
1 files changed, 216 insertions, 15 deletions
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index 36c0a259feb..86ce6f8b93b 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -2,60 +2,261 @@
#pragma once
#include <cstddef>
-
-namespace search::queryeval {
+#include <algorithm>
+#include <vespa/vespalib/util/small_vector.h>
// Model how boolean result decisions flow through intermediate nodes
// of different types based on relative estimates for sub-expressions
-class AndFlow {
+namespace search::queryeval {
+
+namespace flow {
+
+// the default adapter expects the shape of std::unique_ptr<Blueprint>
+// with respect to estimate, cost and (coming soon) strict_cost.
+struct DefaultAdapter {
+ double estimate(const auto &child) const noexcept { return child->estimate(); }
+ double cost(const auto &child) const noexcept { return child->cost(); }
+ // Estimate the per-document cost of strict evaluation of this
+ // child. This will typically be something like (estimate() *
+ // cost()) for leafs with posting lists. OR will aggregate strict
+ // cost by calculating the minimal OR flow of strict child
+ // costs. AND will aggregate strict cost by calculating the
+ // minimal AND flow where the cost of the first child is
+ // substituted by its strict cost. This value is currently not
+ // available in Blueprints.
+ double strict_cost(const auto &child) const noexcept { return child->cost(); }
+};
+
+template <typename ADAPTER, typename T>
+struct IndirectAdapter {
+ const T &data;
+ [[no_unique_address]] ADAPTER adapter;
+ IndirectAdapter(ADAPTER adapter_in, const T &data_in) noexcept
+ : data(data_in), adapter(adapter_in) {}
+ double estimate(size_t child) const noexcept { return adapter.estimate(data[child]); }
+ double cost(size_t child) const noexcept { return adapter.cost(data[child]); }
+ double strict_cost(size_t child) const noexcept { return adapter.strict_cost(data[child]); }
+};
+
+auto make_index(const auto &children) {
+ vespalib::SmallVector<uint32_t> index(children.size());
+ for (size_t i = 0; i < index.size(); ++i) {
+ index[i] = i;
+ }
+ return index;
+}
+
+template <typename ADAPTER>
+struct MinAndCost {
+ // sort children to minimize total cost of AND flow
+ [[no_unique_address]] ADAPTER adapter;
+ MinAndCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {}
+ bool operator () (const auto &a, const auto &b) const noexcept {
+ return (1.0 - adapter.estimate(a)) * adapter.cost(b) > (1.0 - adapter.estimate(b)) * adapter.cost(a);
+ }
+};
+
+template <typename ADAPTER>
+struct MinOrCost {
+ // sort children to minimize total cost of OR flow
+ [[no_unique_address]] ADAPTER adapter;
+ MinOrCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {}
+ bool operator () (const auto &a, const auto &b) const noexcept {
+ return adapter.estimate(a) * adapter.cost(b) > adapter.estimate(b) * adapter.cost(a);
+ }
+};
+
+template <typename ADAPTER>
+struct MinOrStrictCost {
+ // sort children to minimize total cost of strict OR flow
+ [[no_unique_address]] ADAPTER adapter;
+ MinOrStrictCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {}
+ bool operator () (const auto &a, const auto &b) const noexcept {
+ return adapter.estimate(a) * adapter.strict_cost(b) > adapter.estimate(b) * adapter.strict_cost(a);
+ }
+};
+
+template <typename ADAPTER, typename T, typename F>
+double estimate_of(ADAPTER adapter, const T &children, F flow) {
+ for (const auto &child: children) {
+ flow.add(adapter.estimate(child));
+ }
+ return flow.estimate();
+}
+
+template <template <typename> typename ORDER, typename ADAPTER, typename T>
+void sort(ADAPTER adapter, T &children) {
+ std::sort(children.begin(), children.end(), ORDER(adapter));
+}
+
+template <template <typename> typename ORDER, typename ADAPTER, typename T>
+void sort_partial(ADAPTER adapter, T &children, size_t offset) {
+ if (children.size() > offset) {
+ std::sort(children.begin() + offset, children.end(), ORDER(adapter));
+ }
+}
+
+template <typename ADAPTER, typename T, typename F>
+double ordered_cost_of(ADAPTER adapter, const T &children, F flow) {
+ double cost = 0.0;
+ for (const auto &child: children) {
+ double child_cost = flow.strict() ? adapter.strict_cost(child) : adapter.cost(child);
+ cost += flow.flow() * child_cost;
+ flow.add(adapter.estimate(child));
+ }
+ return cost;
+}
+
+template <typename ADAPTER, typename T>
+size_t select_strict_and_child(ADAPTER adapter, const T &children) {
+ size_t idx = 0;
+ double cost = 0.0;
+ size_t best_idx = 0;
+ double best_diff = 0.0;
+ double est = 1.0;
+ for (const auto &child: children) {
+ double child_cost = est * adapter.cost(child);
+ double child_strict_cost = adapter.strict_cost(child);
+ double child_est = adapter.estimate(child);
+ if (idx == 0) {
+ best_diff = child_strict_cost - child_cost;
+ } else {
+ double my_diff = (child_strict_cost + child_est * cost) - (cost + child_cost);
+ if (my_diff < best_diff) {
+ best_diff = my_diff;
+ best_idx = idx;
+ }
+ }
+ cost += child_cost;
+ est *= child_est;
+ ++idx;
+ }
+ return best_idx;
+}
+
+} // flow
+
+template <typename FLOW>
+struct FlowMixin {
+ static double estimate_of(auto adapter, const auto &children) {
+ return flow::estimate_of(adapter, children, FLOW(1.0, false));
+ }
+ static double estimate_of(const auto &children) {
+ return estimate_of(flow::DefaultAdapter(), children);
+ }
+ static double cost_of(auto adapter, const auto &children, bool strict) {
+ auto my_adapter = flow::IndirectAdapter(adapter, children);
+ auto order = flow::make_index(children);
+ FLOW::sort(my_adapter, order, strict);
+ return flow::ordered_cost_of(my_adapter, order, FLOW(1.0, strict));
+ }
+ static double cost_of(const auto &children, bool strict) {
+ return cost_of(flow::DefaultAdapter(), children, strict);
+ }
+ // TODO: remove
+ static double cost_of(const auto &children) { return cost_of(children, false); }
+};
+
+class AndFlow : public FlowMixin<AndFlow> {
private:
double _flow;
- size_t _cnt;
+ bool _strict;
+ bool _first;
public:
- AndFlow(double in = 1.0) noexcept : _flow(in), _cnt(0) {}
+ AndFlow(double in, bool strict) noexcept
+ : _flow(in), _strict(strict), _first(true) {}
void add(double est) noexcept {
_flow *= est;
- ++_cnt;
+ _first = false;
}
double flow() const noexcept {
return _flow;
}
+ bool strict() const noexcept {
+ return _strict && _first;
+ }
double estimate() const noexcept {
- return (_cnt > 0) ? _flow : 0.0;
+ return _first ? 0.0 : _flow;
+ }
+ static void sort(auto adapter, auto &children, bool strict) {
+ flow::sort<flow::MinAndCost>(adapter, children);
+ if (strict && children.size() > 1) {
+ size_t idx = flow::select_strict_and_child(adapter, children);
+ auto the_one = std::move(children[idx]);
+ for (; idx > 0; --idx) {
+ children[idx] = std::move(children[idx-1]);
+ }
+ children[0] = std::move(the_one);
+ }
+ }
+ // TODO: add strict
+ static void sort(auto &children) {
+ sort(flow::DefaultAdapter(), children, false);
}
};
-class OrFlow {
+class OrFlow : public FlowMixin<OrFlow>{
private:
double _flow;
+ bool _strict;
+ bool _first;
public:
- OrFlow(double in = 1.0) noexcept : _flow(in) {}
+ OrFlow(double in, bool strict) noexcept
+ : _flow(in), _strict(strict), _first(true) {}
void add(double est) noexcept {
_flow *= (1.0 - est);
+ _first = false;
}
double flow() const noexcept {
return _flow;
}
+ bool strict() const noexcept {
+ return _strict;
+ }
double estimate() const noexcept {
- return (1.0 - _flow);
+ return _first ? 0.0 : (1.0 - _flow);
+ }
+ static void sort(auto adapter, auto &children, bool strict) {
+ if (strict) {
+ flow::sort<flow::MinOrStrictCost>(adapter, children);
+ } else {
+ flow::sort<flow::MinOrCost>(adapter, children);
+ }
+ }
+ // TODO: add strict
+ static void sort(auto &children) {
+ sort(flow::DefaultAdapter(), children, false);
}
};
-class AndNotFlow {
+class AndNotFlow : public FlowMixin<AndNotFlow> {
private:
double _flow;
- size_t _cnt;
+ bool _strict;
+ bool _first;
public:
- AndNotFlow(double in = 1.0) noexcept : _flow(in), _cnt(0) {}
+ AndNotFlow(double in, bool strict) noexcept
+ : _flow(in), _strict(strict), _first(true) {}
void add(double est) noexcept {
- _flow *= (_cnt++ == 0) ? est : (1.0 - est);
+ _flow *= _first ? est : (1.0 - est);
+ _first = false;
}
double flow() const noexcept {
return _flow;
}
+ bool strict() const noexcept {
+ return _strict && _first;
+ }
double estimate() const noexcept {
- return (_cnt > 0) ? _flow : 0.0;
+ return _first ? 0.0 : _flow;
+ }
+ static void sort(auto adapter, auto &children, bool) {
+ flow::sort_partial<flow::MinOrCost>(adapter, children, 1);
+ }
+ // TODO: add strict
+ static void sort(auto &children) {
+ sort(flow::DefaultAdapter(), children, false);
}
};