summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2024-04-09 13:55:10 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2024-04-10 12:39:47 +0000
commitceccf4c1a0cc1767480dc6d9303651c37c2222a1 (patch)
tree2e0ca2ffbec02e2392b5880a38c23e4336da6d47
parent6ef55fa1ccf7e11655aba4d2f25e311620c3e2e2 (diff)
consolidate solutions into a single heuristic algorithm
use error relative to minimal cost rather than error potential
-rw-r--r--searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp64
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.h40
2 files changed, 18 insertions, 86 deletions
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 9d03ad2657c..5eb3a2ebf47 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -7,6 +7,7 @@
constexpr size_t loop_cnt = 64;
constexpr size_t max_work = 1; // 500'000'000;
+constexpr bool dump_unexpected = false;
constexpr bool verbose = false;
using namespace search::queryeval;
@@ -515,10 +516,7 @@ void test_strict_AND_sort_strategy(auto my_sort) {
};
each_perm(data, check);
double rel_err = 0.0;
- double cost_range = (max_cost - min_cost);
- if (cost_range > 1e-9) {
- rel_err = (est_cost - min_cost) / cost_range;
- }
+ rel_err = (est_cost - min_cost) / min_cost;
if (rel_err > max_rel_err) {
max_rel_err = rel_err;
my_worst_order = my_order;
@@ -526,7 +524,7 @@ void test_strict_AND_sort_strategy(auto my_sort) {
}
sum_rel_err += rel_err;
errs.push_back(rel_err);
- if (verbose && !verify_order(best_order)) {
+ if (dump_unexpected && !verify_order(best_order)) {
fprintf(stderr, " BEST ORDER IS UNEXPECTED:\n");
dump_flow(best_order, best_order);
fprintf(stderr, " UNEXPECTED case, my_order:\n");
@@ -551,60 +549,12 @@ TEST(FlowTest, strict_and_with_allow_force_strict_basic_order) {
test_strict_AND_sort_strategy(my_sort);
}
-TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection) {
- auto my_sort = [](auto &data) {
- AndFlow::sort(data, true);
- for (size_t next = 1; (next + 1) < data.size(); ++next) {
- auto [idx, score] = flow::select_forced_strict_and_child(flow::DirectAdapter(), data, next);
- if (score >= 0.0) {
- break;
- }
- auto pos = data.begin() + idx;
- std::rotate(data.begin() + next, pos, pos + 1);
- }
- };
- test_strict_AND_sort_strategy(my_sort);
-}
-
-TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_strict_re_sorting) {
- auto my_sort = [](auto &data) {
- AndFlow::sort(data, true);
- size_t strict_cnt = 1;
- for (; strict_cnt < data.size(); ++strict_cnt) {
- auto [idx, score] = flow::select_forced_strict_and_child(flow::DirectAdapter(), data, strict_cnt);
- if (score >= 0.0) {
- break;
- }
- auto pos = data.begin() + idx;
- std::rotate(data.begin() + strict_cnt, pos, pos + 1);
- }
- std::sort(data.begin(), data.begin() + strict_cnt,
- [](const auto &a, const auto &b){ return (a.estimate < b.estimate); });
- };
- test_strict_AND_sort_strategy(my_sort);
-}
-
-TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_order) {
- auto my_sort = [](auto &data) {
- AndFlow::sort(data, true);
- for (size_t next = 1; next < data.size(); ++next) {
- auto [idx, target, score] = flow::select_forced_strict_and_child_with_order(flow::DirectAdapter(), data, next);
- if (score >= 0.0) {
- break;
- }
- auto pos = data.begin() + idx;
- std::rotate(data.begin() + target, pos, pos + 1);
- }
- };
- test_strict_AND_sort_strategy(my_sort);
-}
-
-TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_destructive_order) {
+TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_destructive_order_max_3_extra_strict) {
auto my_sort = [](auto &data) {
AndFlow::sort(data, true);
- for (size_t next = 1; next < data.size(); ++next) {
- auto [idx, target, score] = flow::select_forced_strict_and_child_with_destructive_order(flow::DirectAdapter(), data, next);
- if (score >= 0.0) {
+ for (size_t next = 1; next <= 3 && next < data.size(); ++next) {
+ auto [idx, target, diff] = flow::select_forced_strict_and_child(flow::DirectAdapter(), data, next);
+ if (diff >= 0.0) {
break;
}
auto pos = data.begin() + idx;
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index b57e823f4d7..9ac79244755 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -214,44 +214,39 @@ size_t select_strict_and_child(auto adapter, const auto &children) {
return best_idx;
}
-auto select_forced_strict_and_child_impl(auto adapter, const auto &children, size_t first, auto ordered, auto destructive) {
- auto est_until = [&](size_t limit)noexcept{
- double my_est = 1.0;
- for (size_t i = 0; i < limit; ++i) {
- my_est *= adapter.estimate(children[i]);
- }
- return my_est;
- };
- double est = est_until(first);
+auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) {
+ double est = 1.0;
double cost = 0.0;
size_t best_idx = first;
size_t best_target = first;
double best_diff = std::numeric_limits<double>::max();
+ for (size_t i = 0; i < first; ++i) {
+ est *= adapter.estimate(children[i]);
+ }
for (size_t idx = first; idx < children.size(); ++idx) {
auto child = FlowStats::from(adapter, children[idx]);
double child_abs_cost = est * child.cost;
double forced_cost = forced_strict_cost(child, est);
double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost);
size_t target = first;
- while (ordered && target > 0) {
+ while (target > 0) {
size_t candidate = target - 1;
auto other = FlowStats::from(adapter, children[candidate]);
if (other.estimate < child.estimate) {
// do not move past someone with lower estimate
break;
}
- if (!destructive && !should_force_strict(other, (est_until(candidate) * child.estimate))) {
- // no not move past someone who will lose strictness,
- // unless it is explicitly allowed by enabling
- // destructive (re-)ordering
- break;
- }
target = candidate;
my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
if (candidate == 0) {
// the first iterator produces its own in-flow
my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
}
+ // note that 'my_diff' might overestimate the cost
+ // (underestimate the benefit) of inserting 'child' before
+ // 'other' if it leads to 'other' becoming
+ // non-strict. This will also leave 'other' in a
+ // potentially unoptimal location.
}
if (my_diff < best_diff) {
best_diff = my_diff;
@@ -264,19 +259,6 @@ auto select_forced_strict_and_child_impl(auto adapter, const auto &children, siz
return std::make_tuple(best_idx, best_target, best_diff);
}
-auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) {
- auto [idx, target, diff] = select_forced_strict_and_child_impl(adapter, children, first, std::false_type{}, std::false_type{});
- return std::make_pair(idx, diff);
-}
-
-auto select_forced_strict_and_child_with_order(auto adapter, const auto &children, size_t first) {
- return select_forced_strict_and_child_impl(adapter, children, first, std::true_type{}, std::false_type{});
-}
-
-auto select_forced_strict_and_child_with_destructive_order(auto adapter, const auto &children, size_t first) {
- return select_forced_strict_and_child_impl(adapter, children, first, std::true_type{}, std::true_type{});
-}
-
} // flow
template <typename FLOW>