aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-16 00:49:18 +0100
committerGitHub <noreply@github.com>2024-01-16 00:49:18 +0100
commit7ff3cdf2d652cc5b4a5ee04f4c1218647157b988 (patch)
treeff03601d3c1a2eba8fb0820ac1a6a98a3338c78e
parentccd310972b94deb49c1d7e3ffc8dc06e4a282014 (diff)
parent7a8871c3b9259d05e5cf606e943d69e0e7a160fe (diff)
Merge pull request #29915 from vespa-engine/havardpe/strict-flowv8.287.20
take strictness into account for flow/cost/sorting
-rw-r--r--searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp4
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp2
-rw-r--r--searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp256
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h30
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.h231
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp40
6 files changed, 445 insertions, 118 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
index f800e124bdc..bbd2744119a 100644
--- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
@@ -24,10 +24,10 @@ class MyOr : public IntermediateBlueprint
private:
public:
double calculate_cost() const final {
- return cost_of(get_children(), OrFlow());
+ return OrFlow::cost_of(get_children());
}
double calculate_relative_estimate() const final {
- return estimate_of(get_children(), OrFlow());
+ return OrFlow::estimate_of(get_children());
}
HitEstimate combine(const std::vector<HitEstimate> &data) const override {
return max(data);
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index ab1c004c721..856ac2391f8 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -1380,7 +1380,7 @@ TEST("cost for ONEAR") {
}
TEST("cost for WEAKAND") {
- verify_cost(make::WEAKAND(1000), calc_cost({{1.1, 0.8},{1.2, 0.7},{1.3, 0.5}}));
+ verify_cost(make::WEAKAND(1000), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}));
}
TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index ceda30f169a..9a9adeac2bc 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -5,44 +5,46 @@
#include <vector>
#include <random>
-using search::queryeval::AndFlow;
-using search::queryeval::OrFlow;
+constexpr size_t loop_cnt = 64;
+
+using namespace search::queryeval;
+
+struct ItemAdapter {
+ double estimate(const auto &child) const noexcept { return child.rel_est; }
+ double cost(const auto &child) const noexcept { return child.cost; }
+ double strict_cost(const auto &child) const noexcept { return child.strict_cost; }
+};
struct Item {
double rel_est;
double cost;
- Item(double rel_est_in, double cost_in) noexcept
- : rel_est(rel_est_in), cost(cost_in) {}
- static void sort_for_and(std::vector<Item> &data) {
- std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept {
- return (1.0 - a.rel_est) / a.cost > (1.0 - b.rel_est) / b.cost;
- });
+ double strict_cost;
+ Item(double rel_est_in, double cost_in, double strict_cost_in) noexcept
+ : rel_est(rel_est_in), cost(cost_in), strict_cost(strict_cost_in) {}
+ template <typename FLOW> static double estimate_of(std::vector<Item> &data) {
+ return FLOW::estimate_of(ItemAdapter(), data);
}
- static void sort_for_or(std::vector<Item> &data) {
- std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept {
- return a.rel_est / a.cost > b.rel_est / b.cost;
- });
+ template <typename FLOW> static void sort(std::vector<Item> &data, bool strict) {
+ FLOW::sort(ItemAdapter(), data, strict);
}
- static double cost_of(const std::vector<Item> &data, auto flow) {
- double cost = 0.0;
- for (const Item &item: data) {
- cost += flow.flow() * item.cost;
- flow.add(item.rel_est);
- }
- return cost;
+ template <typename FLOW> static double cost_of(const std::vector<Item> &data, bool strict) {
+ return FLOW::cost_of(ItemAdapter(), data, strict);
+ }
+ template <typename FLOW> static double ordered_cost_of(const std::vector<Item> &data, bool strict) {
+ return flow::ordered_cost_of(ItemAdapter(), data, FLOW(1.0, strict));
}
- static double cost_of_and(const std::vector<Item> &data) { return cost_of(data, AndFlow()); }
- static double cost_of_or(const std::vector<Item> &data) { return cost_of(data, OrFlow()); }
+ auto operator <=>(const Item &rhs) const noexcept = default;
};
std::vector<Item> gen_data(size_t size) {
static std::mt19937 gen;
- static std::uniform_real_distribution<double> rel_est(0.1, 0.9);
- static std::uniform_real_distribution<double> cost(1.0, 10.0);
+ static std::uniform_real_distribution<double> rel_est(0.1, 0.9);
+ static std::uniform_real_distribution<double> cost(1.0, 10.0);
+ static std::uniform_real_distribution<double> strict_cost(0.1, 5.0);
std::vector<Item> result;
result.reserve(size);
for (size_t i = 0; i < size; ++i) {
- result.emplace_back(rel_est(gen), cost(gen));
+ result.emplace_back(rel_est(gen), cost(gen), strict_cost(gen));
}
return result;
}
@@ -80,37 +82,191 @@ TEST(FlowTest, perm_test) {
EXPECT_EQ(seen.size(), 120);
}
+template <template <typename> typename ORDER>
+void verify_ordering_is_strict_weak() {
+ auto cmp = ORDER(ItemAdapter());
+ auto input = gen_data(7);
+ input.emplace_back(0.5, 1.5, 0.5);
+ input.emplace_back(0.5, 1.5, 0.5);
+ input.emplace_back(0.5, 1.5, 0.5);
+ input.emplace_back(0.0, 1.5, 0.5);
+ input.emplace_back(0.0, 1.5, 0.5);
+ input.emplace_back(0.5, 0.0, 0.5);
+ input.emplace_back(0.5, 0.0, 0.5);
+ input.emplace_back(0.5, 1.5, 0.0);
+ input.emplace_back(0.5, 1.5, 0.0);
+ input.emplace_back(0.0, 0.0, 0.0);
+ input.emplace_back(0.0, 0.0, 0.0);
+ std::vector<Item> output;
+ for (const Item &in: input) {
+ EXPECT_FALSE(cmp(in, in)); // Irreflexivity
+ size_t out_idx = 0;
+ bool lower = false;
+ bool upper = false;
+ for (const Item &out: output) {
+ if (cmp(out, in)) {
+ EXPECT_FALSE(cmp(in, out)); // Antisymmetry
+ EXPECT_FALSE(lower); // Transitivity
+ EXPECT_FALSE(upper); // Transitivity
+ ++out_idx;
+ } else {
+ lower = true;
+ if (cmp(in, out)) {
+ upper = true;
+ } else {
+ EXPECT_FALSE(upper); // Transitivity
+ }
+ }
+ }
+ output.insert(output.begin() + out_idx, in);
+ }
+}
+
+TEST(FlowTest, and_ordering_is_strict_weak) {
+ verify_ordering_is_strict_weak<flow::MinAndCost>();
+}
+
+TEST(FlowTest, or_ordering_is_strict_weak) {
+ verify_ordering_is_strict_weak<flow::MinOrCost>();
+}
+
+TEST(FlowTest, strict_or_ordering_is_strict_weak) {
+ verify_ordering_is_strict_weak<flow::MinOrStrictCost>();
+}
+
+struct ExpectFlow {
+ double flow;
+ double est;
+ bool strict;
+};
+
+void verify_flow(auto flow, const std::vector<double> &est_list, const std::vector<ExpectFlow> &expect) {
+ ASSERT_EQ(est_list.size() + 1, expect.size());
+ for (size_t i = 0; i < expect.size(); ++i) {
+ EXPECT_DOUBLE_EQ(flow.flow(), expect[i].flow);
+ EXPECT_DOUBLE_EQ(flow.estimate(), expect[i].est);
+ EXPECT_EQ(flow.strict(), expect[i].strict);
+ if (i < est_list.size()) {
+ flow.add(est_list[i]);
+ }
+ }
+}
+
+TEST(FlowTest, basic_and_flow) {
+ for (double in: {1.0, 0.5, 0.25}) {
+ for (bool strict: {false, true}) {
+ verify_flow(AndFlow(in, strict), {0.4, 0.7, 0.2},
+ {{in, 0.0, strict},
+ {in*0.4, in*0.4, false},
+ {in*0.4*0.7, in*0.4*0.7, false},
+ {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}});
+ }
+ }
+}
+
+TEST(FlowTest, basic_or_flow) {
+ for (double in: {1.0, 0.5, 0.25}) {
+ for (bool strict: {false, true}) {
+ verify_flow(OrFlow(in, strict), {0.4, 0.7, 0.2},
+ {{in, 0.0, strict},
+ {in*0.6, 1.0-in*0.6, strict},
+ {in*0.6*0.3, 1.0-in*0.6*0.3, strict},
+ {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, strict}});
+ }
+ }
+}
+
+TEST(FlowTest, basic_and_not_flow) {
+ for (double in: {1.0, 0.5, 0.25}) {
+ for (bool strict: {false, true}) {
+ verify_flow(AndNotFlow(in, strict), {0.4, 0.7, 0.2},
+ {{in, 0.0, strict},
+ {in*0.4, in*0.4, false},
+ {in*0.4*0.3, in*0.4*0.3, false},
+ {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}});
+ }
+ }
+}
+
+TEST(FlowTest, flow_cost) {
+ std::vector<Item> data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}};
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.6*0.5 + 0.6*0.3*0.4);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3);
+}
+
TEST(FlowTest, optimal_and_flow) {
- for (size_t i = 0; i < 256; ++i) {
- auto data = gen_data(7);
- Item::sort_for_and(data);
- double min_cost = Item::cost_of_and(data);
- double max_cost = 0.0;
- auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept {
- double my_cost = Item::cost_of_and(my_data);
- EXPECT_LE(min_cost, my_cost);
- max_cost = std::max(max_cost, my_cost);
- };
- each_perm(data, check);
- fprintf(stderr, " and cost(%zu): min: %g, max: %g, factor: %g\n",
- i, min_cost, max_cost, max_cost / min_cost);
+ for (size_t i = 0; i < loop_cnt; ++i) {
+ for (bool strict: {false, true}) {
+ auto data = gen_data(7);
+ double ref_est = Item::estimate_of<AndFlow>(data);
+ double min_cost = Item::cost_of<AndFlow>(data, strict);
+ double max_cost = 0.0;
+ Item::sort<AndFlow>(data, strict);
+ EXPECT_EQ(Item::ordered_cost_of<AndFlow>(data, strict), min_cost);
+ auto check = [&](const std::vector<Item> &my_data) noexcept {
+ double my_cost = Item::ordered_cost_of<AndFlow>(my_data, strict);
+ EXPECT_LE(min_cost, my_cost);
+ max_cost = std::max(max_cost, my_cost);
+ };
+ each_perm(data, check);
+ if (loop_cnt < 1024 || i % 1024 == 0) {
+ fprintf(stderr, " AND cost(%zu,%s): min: %g, max: %g, factor: %g\n",
+ i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost);
+ }
+ EXPECT_NEAR(ref_est, Item::estimate_of<AndFlow>(data), 1e-9);
+ }
}
}
TEST(FlowTest, optimal_or_flow) {
- for (size_t i = 0; i < 256; ++i) {
- auto data = gen_data(7);
- Item::sort_for_or(data);
- double min_cost = Item::cost_of_or(data);
- double max_cost = 0.0;
- auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept {
- double my_cost = Item::cost_of_or(my_data);
- EXPECT_LE(min_cost, my_cost);
- max_cost = std::max(max_cost, my_cost);
- };
- each_perm(data, check);
- fprintf(stderr, " or cost(%zu): min: %g, max: %g, factor: %g\n",
- i, min_cost, max_cost, max_cost / min_cost);
+ for (size_t i = 0; i < loop_cnt; ++i) {
+ for (bool strict: {false, true}) {
+ auto data = gen_data(7);
+ double min_cost = Item::cost_of<OrFlow>(data, strict);
+ double max_cost = 0.0;
+ Item::sort<OrFlow>(data, strict);
+ EXPECT_EQ(Item::ordered_cost_of<OrFlow>(data, strict), min_cost);
+ auto check = [&](const std::vector<Item> &my_data) noexcept {
+ double my_cost = Item::ordered_cost_of<OrFlow>(my_data, strict);
+ EXPECT_LE(min_cost, my_cost);
+ max_cost = std::max(max_cost, my_cost);
+ };
+ each_perm(data, check);
+ if (loop_cnt < 1024 || i % 1024 == 0) {
+ fprintf(stderr, " OR cost(%zu,%s): min: %g, max: %g, factor: %g\n",
+ i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost);
+ }
+ }
+ }
+}
+
+TEST(FlowTest, optimal_and_not_flow) {
+ for (size_t i = 0; i < loop_cnt; ++i) {
+ for (bool strict: {false, true}) {
+ auto data = gen_data(7);
+ Item first = data[0];
+ double min_cost = Item::cost_of<AndNotFlow>(data, strict);
+ double max_cost = 0.0;
+ Item::sort<AndNotFlow>(data, strict);
+ EXPECT_EQ(data[0], first);
+ EXPECT_EQ(Item::ordered_cost_of<AndNotFlow>(data, strict), min_cost);
+ auto check = [&](const std::vector<Item> &my_data) noexcept {
+ if (my_data[0] == first) {
+ double my_cost = Item::ordered_cost_of<AndNotFlow>(my_data, strict);
+ EXPECT_LE(min_cost, my_cost);
+ max_cost = std::max(max_cost, my_cost);
+ }
+ };
+ each_perm(data, check);
+ if (loop_cnt < 1024 || i % 1024 == 0) {
+ fprintf(stderr, " ANDNOT cost(%zu,%s): min: %g, max: %g, factor: %g\n",
+ i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost);
+ }
+ }
}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index a78dd092f5a..d998c2e343e 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -144,22 +144,6 @@ public:
return (total_docs == 0) ? 0.0 : double(est) / double(total_docs);
}
- static double cost_of(const Children &children, auto flow) {
- double cost = 0.0;
- for (const auto &child: children) {
- cost += flow.flow() * child->cost();
- flow.add(child->estimate());
- }
- return cost;
- }
-
- static double estimate_of(const Children &children, auto flow) {
- for (const auto &child: children) {
- flow.add(child->estimate());
- }
- return flow.estimate();
- }
-
// utility that just takes maximum estimate
static HitEstimate max(const std::vector<HitEstimate> &data);
@@ -172,20 +156,6 @@ public:
// lower limit for docid_limit: max child estimate
static HitEstimate sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit);
- // sort children to minimize total cost of OR flow
- struct MinimalOrCost {
- bool operator () (const auto &a, const auto &b) const noexcept {
- return a->estimate() / a->cost() > b->estimate() / b->cost();
- }
- };
-
- // sort children to minimize total cost of AND flow
- struct MinimalAndCost {
- bool operator () (const auto &a, const auto &b) const noexcept {
- return (1.0 - a->estimate()) / a->cost() > (1.0 - b->estimate()) / b->cost();
- }
- };
-
// utility to get the greater estimate to sort first, higher tiers last
struct TieredGreaterEstimate {
bool operator () (const auto &a, const auto &b) const noexcept {
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index 36c0a259feb..86ce6f8b93b 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -2,60 +2,261 @@
#pragma once
#include <cstddef>
-
-namespace search::queryeval {
+#include <algorithm>
+#include <vespa/vespalib/util/small_vector.h>
// Model how boolean result decisions flow through intermediate nodes
// of different types based on relative estimates for sub-expressions
-class AndFlow {
+namespace search::queryeval {
+
+namespace flow {
+
+// the default adapter expects the shape of std::unique_ptr<Blueprint>
+// with respect to estimate, cost and (coming soon) strict_cost.
+struct DefaultAdapter {
+ double estimate(const auto &child) const noexcept { return child->estimate(); }
+ double cost(const auto &child) const noexcept { return child->cost(); }
+ // Estimate the per-document cost of strict evaluation of this
+ // child. This will typically be something like (estimate() *
+ // cost()) for leafs with posting lists. OR will aggregate strict
+ // cost by calculating the minimal OR flow of strict child
+ // costs. AND will aggregate strict cost by calculating the
+ // minimal AND flow where the cost of the first child is
+ // substituted by its strict cost. This value is currently not
+ // available in Blueprints.
+ double strict_cost(const auto &child) const noexcept { return child->cost(); }
+};
+
+template <typename ADAPTER, typename T>
+struct IndirectAdapter {
+ const T &data;
+ [[no_unique_address]] ADAPTER adapter;
+ IndirectAdapter(ADAPTER adapter_in, const T &data_in) noexcept
+ : data(data_in), adapter(adapter_in) {}
+ double estimate(size_t child) const noexcept { return adapter.estimate(data[child]); }
+ double cost(size_t child) const noexcept { return adapter.cost(data[child]); }
+ double strict_cost(size_t child) const noexcept { return adapter.strict_cost(data[child]); }
+};
+
+auto make_index(const auto &children) {
+ vespalib::SmallVector<uint32_t> index(children.size());
+ for (size_t i = 0; i < index.size(); ++i) {
+ index[i] = i;
+ }
+ return index;
+}
+
+template <typename ADAPTER>
+struct MinAndCost {
+ // sort children to minimize total cost of AND flow
+ [[no_unique_address]] ADAPTER adapter;
+ MinAndCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {}
+ bool operator () (const auto &a, const auto &b) const noexcept {
+ return (1.0 - adapter.estimate(a)) * adapter.cost(b) > (1.0 - adapter.estimate(b)) * adapter.cost(a);
+ }
+};
+
+template <typename ADAPTER>
+struct MinOrCost {
+ // sort children to minimize total cost of OR flow
+ [[no_unique_address]] ADAPTER adapter;
+ MinOrCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {}
+ bool operator () (const auto &a, const auto &b) const noexcept {
+ return adapter.estimate(a) * adapter.cost(b) > adapter.estimate(b) * adapter.cost(a);
+ }
+};
+
+template <typename ADAPTER>
+struct MinOrStrictCost {
+ // sort children to minimize total cost of strict OR flow
+ [[no_unique_address]] ADAPTER adapter;
+ MinOrStrictCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {}
+ bool operator () (const auto &a, const auto &b) const noexcept {
+ return adapter.estimate(a) * adapter.strict_cost(b) > adapter.estimate(b) * adapter.strict_cost(a);
+ }
+};
+
+template <typename ADAPTER, typename T, typename F>
+double estimate_of(ADAPTER adapter, const T &children, F flow) {
+ for (const auto &child: children) {
+ flow.add(adapter.estimate(child));
+ }
+ return flow.estimate();
+}
+
+template <template <typename> typename ORDER, typename ADAPTER, typename T>
+void sort(ADAPTER adapter, T &children) {
+ std::sort(children.begin(), children.end(), ORDER(adapter));
+}
+
+template <template <typename> typename ORDER, typename ADAPTER, typename T>
+void sort_partial(ADAPTER adapter, T &children, size_t offset) {
+ if (children.size() > offset) {
+ std::sort(children.begin() + offset, children.end(), ORDER(adapter));
+ }
+}
+
+template <typename ADAPTER, typename T, typename F>
+double ordered_cost_of(ADAPTER adapter, const T &children, F flow) {
+ double cost = 0.0;
+ for (const auto &child: children) {
+ double child_cost = flow.strict() ? adapter.strict_cost(child) : adapter.cost(child);
+ cost += flow.flow() * child_cost;
+ flow.add(adapter.estimate(child));
+ }
+ return cost;
+}
+
+template <typename ADAPTER, typename T>
+size_t select_strict_and_child(ADAPTER adapter, const T &children) {
+ size_t idx = 0;
+ double cost = 0.0;
+ size_t best_idx = 0;
+ double best_diff = 0.0;
+ double est = 1.0;
+ for (const auto &child: children) {
+ double child_cost = est * adapter.cost(child);
+ double child_strict_cost = adapter.strict_cost(child);
+ double child_est = adapter.estimate(child);
+ if (idx == 0) {
+ best_diff = child_strict_cost - child_cost;
+ } else {
+ double my_diff = (child_strict_cost + child_est * cost) - (cost + child_cost);
+ if (my_diff < best_diff) {
+ best_diff = my_diff;
+ best_idx = idx;
+ }
+ }
+ cost += child_cost;
+ est *= child_est;
+ ++idx;
+ }
+ return best_idx;
+}
+
+} // flow
+
+template <typename FLOW>
+struct FlowMixin {
+ static double estimate_of(auto adapter, const auto &children) {
+ return flow::estimate_of(adapter, children, FLOW(1.0, false));
+ }
+ static double estimate_of(const auto &children) {
+ return estimate_of(flow::DefaultAdapter(), children);
+ }
+ static double cost_of(auto adapter, const auto &children, bool strict) {
+ auto my_adapter = flow::IndirectAdapter(adapter, children);
+ auto order = flow::make_index(children);
+ FLOW::sort(my_adapter, order, strict);
+ return flow::ordered_cost_of(my_adapter, order, FLOW(1.0, strict));
+ }
+ static double cost_of(const auto &children, bool strict) {
+ return cost_of(flow::DefaultAdapter(), children, strict);
+ }
+ // TODO: remove
+ static double cost_of(const auto &children) { return cost_of(children, false); }
+};
+
+class AndFlow : public FlowMixin<AndFlow> {
private:
double _flow;
- size_t _cnt;
+ bool _strict;
+ bool _first;
public:
- AndFlow(double in = 1.0) noexcept : _flow(in), _cnt(0) {}
+ AndFlow(double in, bool strict) noexcept
+ : _flow(in), _strict(strict), _first(true) {}
void add(double est) noexcept {
_flow *= est;
- ++_cnt;
+ _first = false;
}
double flow() const noexcept {
return _flow;
}
+ bool strict() const noexcept {
+ return _strict && _first;
+ }
double estimate() const noexcept {
- return (_cnt > 0) ? _flow : 0.0;
+ return _first ? 0.0 : _flow;
+ }
+ static void sort(auto adapter, auto &children, bool strict) {
+ flow::sort<flow::MinAndCost>(adapter, children);
+ if (strict && children.size() > 1) {
+ size_t idx = flow::select_strict_and_child(adapter, children);
+ auto the_one = std::move(children[idx]);
+ for (; idx > 0; --idx) {
+ children[idx] = std::move(children[idx-1]);
+ }
+ children[0] = std::move(the_one);
+ }
+ }
+ // TODO: add strict
+ static void sort(auto &children) {
+ sort(flow::DefaultAdapter(), children, false);
}
};
-class OrFlow {
+class OrFlow : public FlowMixin<OrFlow>{
private:
double _flow;
+ bool _strict;
+ bool _first;
public:
- OrFlow(double in = 1.0) noexcept : _flow(in) {}
+ OrFlow(double in, bool strict) noexcept
+ : _flow(in), _strict(strict), _first(true) {}
void add(double est) noexcept {
_flow *= (1.0 - est);
+ _first = false;
}
double flow() const noexcept {
return _flow;
}
+ bool strict() const noexcept {
+ return _strict;
+ }
double estimate() const noexcept {
- return (1.0 - _flow);
+ return _first ? 0.0 : (1.0 - _flow);
+ }
+ static void sort(auto adapter, auto &children, bool strict) {
+ if (strict) {
+ flow::sort<flow::MinOrStrictCost>(adapter, children);
+ } else {
+ flow::sort<flow::MinOrCost>(adapter, children);
+ }
+ }
+ // TODO: add strict
+ static void sort(auto &children) {
+ sort(flow::DefaultAdapter(), children, false);
}
};
-class AndNotFlow {
+class AndNotFlow : public FlowMixin<AndNotFlow> {
private:
double _flow;
- size_t _cnt;
+ bool _strict;
+ bool _first;
public:
- AndNotFlow(double in = 1.0) noexcept : _flow(in), _cnt(0) {}
+ AndNotFlow(double in, bool strict) noexcept
+ : _flow(in), _strict(strict), _first(true) {}
void add(double est) noexcept {
- _flow *= (_cnt++ == 0) ? est : (1.0 - est);
+ _flow *= _first ? est : (1.0 - est);
+ _first = false;
}
double flow() const noexcept {
return _flow;
}
+ bool strict() const noexcept {
+ return _strict && _first;
+ }
double estimate() const noexcept {
- return (_cnt > 0) ? _flow : 0.0;
+ return _first ? 0.0 : _flow;
+ }
+ static void sort(auto adapter, auto &children, bool) {
+ flow::sort_partial<flow::MinOrCost>(adapter, children, 1);
+ }
+ // TODO: add strict
+ static void sort(auto &children) {
+ sort(flow::DefaultAdapter(), children, false);
}
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index bebc1f433f7..e60fe3d3f85 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -89,13 +89,13 @@ need_normal_features_for_children(const IntermediateBlueprint &blueprint, fef::M
double
AndNotBlueprint::calculate_cost() const
{
- return cost_of(get_children(), AndNotFlow());
+ return AndNotFlow::cost_of(get_children());
}
double
AndNotBlueprint::calculate_relative_estimate() const
{
- return estimate_of(get_children(), AndNotFlow());
+ return AndNotFlow::estimate_of(get_children());
}
Blueprint::HitEstimate
@@ -168,10 +168,10 @@ AndNotBlueprint::get_replacement()
void
AndNotBlueprint::sort(Children &children, bool sort_by_cost) const
{
- if (children.size() > 2) {
- if (sort_by_cost) {
- std::sort(children.begin() + 1, children.end(), MinimalOrCost());
- } else {
+ if (sort_by_cost) {
+ AndNotFlow::sort(children);
+ } else {
+ if (children.size() > 2) {
std::sort(children.begin() + 1, children.end(), TieredGreaterEstimate());
}
}
@@ -214,12 +214,12 @@ AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) co
double
AndBlueprint::calculate_cost() const {
- return cost_of(get_children(), AndFlow());
+ return AndFlow::cost_of(get_children());
}
double
AndBlueprint::calculate_relative_estimate() const {
- return estimate_of(get_children(), AndFlow());
+ return AndFlow::estimate_of(get_children());
}
Blueprint::HitEstimate
@@ -265,7 +265,7 @@ void
AndBlueprint::sort(Children &children, bool sort_by_cost) const
{
if (sort_by_cost) {
- std::sort(children.begin(), children.end(), MinimalAndCost());
+ AndFlow::sort(children);
} else {
std::sort(children.begin(), children.end(), TieredLessEstimate());
}
@@ -323,12 +323,12 @@ OrBlueprint::~OrBlueprint() = default;
double
OrBlueprint::calculate_cost() const {
- return cost_of(get_children(), OrFlow());
+ return OrFlow::cost_of(get_children());
}
double
OrBlueprint::calculate_relative_estimate() const {
- return estimate_of(get_children(), OrFlow());
+ return OrFlow::estimate_of(get_children());
}
Blueprint::HitEstimate
@@ -376,7 +376,7 @@ void
OrBlueprint::sort(Children &children, bool sort_by_cost) const
{
if (sort_by_cost) {
- std::sort(children.begin(), children.end(), MinimalOrCost());
+ OrFlow::sort(children);
} else {
std::sort(children.begin(), children.end(), TieredGreaterEstimate());
}
@@ -428,12 +428,12 @@ WeakAndBlueprint::~WeakAndBlueprint() = default;
double
WeakAndBlueprint::calculate_cost() const {
- return cost_of(get_children(), OrFlow());
+ return OrFlow::cost_of(get_children());
}
double
WeakAndBlueprint::calculate_relative_estimate() const {
- double child_est = estimate_of(get_children(), OrFlow());
+ double child_est = OrFlow::estimate_of(get_children());
double my_est = abs_to_rel_est(_n, get_docid_limit());
return std::min(my_est, child_est);
}
@@ -499,12 +499,12 @@ WeakAndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) c
double
NearBlueprint::calculate_cost() const {
- return cost_of(get_children(), AndFlow()) + childCnt() * 1.0;
+ return AndFlow::cost_of(get_children()) + childCnt() * 1.0;
}
double
NearBlueprint::calculate_relative_estimate() const {
- return estimate_of(get_children(), AndFlow());
+ return AndFlow::estimate_of(get_children());
}
Blueprint::HitEstimate
@@ -523,7 +523,7 @@ void
NearBlueprint::sort(Children &children, bool sort_by_cost) const
{
if (sort_by_cost) {
- std::sort(children.begin(), children.end(), MinimalAndCost());
+ AndFlow::sort(children);
} else {
std::sort(children.begin(), children.end(), TieredLessEstimate());
}
@@ -566,12 +566,12 @@ NearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons
double
ONearBlueprint::calculate_cost() const {
- return cost_of(get_children(), AndFlow()) + (childCnt() * 1.0);
+ return AndFlow::cost_of(get_children()) + (childCnt() * 1.0);
}
double
ONearBlueprint::calculate_relative_estimate() const {
- return estimate_of(get_children(), AndFlow());
+ return AndFlow::estimate_of(get_children());
}
Blueprint::HitEstimate
@@ -741,7 +741,7 @@ SourceBlenderBlueprint::calculate_cost() const {
double
SourceBlenderBlueprint::calculate_relative_estimate() const {
- return estimate_of(get_children(), OrFlow());
+ return OrFlow::estimate_of(get_children());
}
Blueprint::HitEstimate