diff options
author | Geir Storli <geirst@vespa.ai> | 2024-04-10 15:27:35 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-10 15:27:35 +0200 |
commit | a37dba62c9eb879a29f0128d4966cbf9310dbdbe (patch) | |
tree | 2e0ca2ffbec02e2392b5880a38c23e4336da6d47 | |
parent | 6ef55fa1ccf7e11655aba4d2f25e311620c3e2e2 (diff) | |
parent | ceccf4c1a0cc1767480dc6d9303651c37c2222a1 (diff) |
Merge pull request #30870 from vespa-engine/havardpe/consolidate-sorting-strategies
consolidate solutions into a single heuristic algorithm
-rw-r--r-- | searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp | 64 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/queryeval/flow.h | 40 |
2 files changed, 18 insertions, 86 deletions
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index 9d03ad2657c..5eb3a2ebf47 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -7,6 +7,7 @@ constexpr size_t loop_cnt = 64; constexpr size_t max_work = 1; // 500'000'000; +constexpr bool dump_unexpected = false; constexpr bool verbose = false; using namespace search::queryeval; @@ -515,10 +516,7 @@ void test_strict_AND_sort_strategy(auto my_sort) { }; each_perm(data, check); double rel_err = 0.0; - double cost_range = (max_cost - min_cost); - if (cost_range > 1e-9) { - rel_err = (est_cost - min_cost) / cost_range; - } + rel_err = (est_cost - min_cost) / min_cost; if (rel_err > max_rel_err) { max_rel_err = rel_err; my_worst_order = my_order; @@ -526,7 +524,7 @@ void test_strict_AND_sort_strategy(auto my_sort) { } sum_rel_err += rel_err; errs.push_back(rel_err); - if (verbose && !verify_order(best_order)) { + if (dump_unexpected && !verify_order(best_order)) { fprintf(stderr, " BEST ORDER IS UNEXPECTED:\n"); dump_flow(best_order, best_order); fprintf(stderr, " UNEXPECTED case, my_order:\n"); @@ -551,60 +549,12 @@ TEST(FlowTest, strict_and_with_allow_force_strict_basic_order) { test_strict_AND_sort_strategy(my_sort); } -TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection) { - auto my_sort = [](auto &data) { - AndFlow::sort(data, true); - for (size_t next = 1; (next + 1) < data.size(); ++next) { - auto [idx, score] = flow::select_forced_strict_and_child(flow::DirectAdapter(), data, next); - if (score >= 0.0) { - break; - } - auto pos = data.begin() + idx; - std::rotate(data.begin() + next, pos, pos + 1); - } - }; - test_strict_AND_sort_strategy(my_sort); -} - -TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_strict_re_sorting) { - auto my_sort = [](auto &data) { - AndFlow::sort(data, true); - size_t strict_cnt = 1; - for (; strict_cnt < data.size(); ++strict_cnt) { - auto [idx, score] = flow::select_forced_strict_and_child(flow::DirectAdapter(), data, strict_cnt); - if (score >= 0.0) { - break; - } - auto pos = data.begin() + idx; - std::rotate(data.begin() + strict_cnt, pos, pos + 1); - } - std::sort(data.begin(), data.begin() + strict_cnt, - [](const auto &a, const auto &b){ return (a.estimate < b.estimate); }); - }; - test_strict_AND_sort_strategy(my_sort); -} - -TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_order) { - auto my_sort = [](auto &data) { - AndFlow::sort(data, true); - for (size_t next = 1; next < data.size(); ++next) { - auto [idx, target, score] = flow::select_forced_strict_and_child_with_order(flow::DirectAdapter(), data, next); - if (score >= 0.0) { - break; - } - auto pos = data.begin() + idx; - std::rotate(data.begin() + target, pos, pos + 1); - } - }; - test_strict_AND_sort_strategy(my_sort); -} - -TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_destructive_order) { +TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_destructive_order_max_3_extra_strict) { auto my_sort = [](auto &data) { AndFlow::sort(data, true); - for (size_t next = 1; next < data.size(); ++next) { - auto [idx, target, score] = flow::select_forced_strict_and_child_with_destructive_order(flow::DirectAdapter(), data, next); - if (score >= 0.0) { + for (size_t next = 1; next <= 3 && next < data.size(); ++next) { + auto [idx, target, diff] = flow::select_forced_strict_and_child(flow::DirectAdapter(), data, next); + if (diff >= 0.0) { break; } auto pos = data.begin() + idx; diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index b57e823f4d7..9ac79244755 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -214,44 +214,39 @@ size_t select_strict_and_child(auto adapter, const auto &children) { return best_idx; } -auto select_forced_strict_and_child_impl(auto adapter, const auto &children, size_t first, auto ordered, auto destructive) { - auto est_until = [&](size_t limit)noexcept{ - double my_est = 1.0; - for (size_t i = 0; i < limit; ++i) { - my_est *= adapter.estimate(children[i]); - } - return my_est; - }; - double est = est_until(first); +auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) { + double est = 1.0; double cost = 0.0; size_t best_idx = first; size_t best_target = first; double best_diff = std::numeric_limits<double>::max(); + for (size_t i = 0; i < first; ++i) { + est *= adapter.estimate(children[i]); + } for (size_t idx = first; idx < children.size(); ++idx) { auto child = FlowStats::from(adapter, children[idx]); double child_abs_cost = est * child.cost; double forced_cost = forced_strict_cost(child, est); double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost); size_t target = first; - while (ordered && target > 0) { + while (target > 0) { size_t candidate = target - 1; auto other = FlowStats::from(adapter, children[candidate]); if (other.estimate < child.estimate) { // do not move past someone with lower estimate break; } - if (!destructive && !should_force_strict(other, (est_until(candidate) * child.estimate))) { - // no not move past someone who will lose strictness, - // unless it is explicitly allowed by enabling - // destructive (re-)ordering - break; - } target = candidate; my_diff += (0.2 * child.estimate - 0.2 * other.estimate); if (candidate == 0) { // the first iterator produces its own in-flow my_diff += (0.2 * child.estimate - 0.2 * other.estimate); } + // note that 'my_diff' might overestimate the cost + // (underestimate the benefit) of inserting 'child' before + // 'other' if it leads to 'other' becoming + // non-strict. This will also leave 'other' in a + // potentially unoptimal location. } if (my_diff < best_diff) { best_diff = my_diff; @@ -264,19 +259,6 @@ auto select_forced_strict_and_child_impl(auto adapter, const auto &children, siz return std::make_tuple(best_idx, best_target, best_diff); } -auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) { - auto [idx, target, diff] = select_forced_strict_and_child_impl(adapter, children, first, std::false_type{}, std::false_type{}); - return std::make_pair(idx, diff); -} - -auto select_forced_strict_and_child_with_order(auto adapter, const auto &children, size_t first) { - return select_forced_strict_and_child_impl(adapter, children, first, std::true_type{}, std::false_type{}); -} - -auto select_forced_strict_and_child_with_destructive_order(auto adapter, const auto &children, size_t first) { - return select_forced_strict_and_child_impl(adapter, children, first, std::true_type{}, std::true_type{}); -} - } // flow template <typename FLOW> |