consolidate solutions into a single heuristic algorithm

use error relative to minimal cost rather than error potential
author: Håvard Pettersen <havardpe@yahooinc.com> 2024-04-09 13:55:10 +0000
committer: Håvard Pettersen <havardpe@yahooinc.com> 2024-04-10 12:39:47 +0000
commit: ceccf4c1a0cc1767480dc6d9303651c37c2222a1 (patch)
tree: 2e0ca2ffbec02e2392b5880a38c23e4336da6d47
parent: 6ef55fa1ccf7e11655aba4d2f25e311620c3e2e2 (diff)
2 files changed, 18 insertions, 86 deletions
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 9d03ad2657c..5eb3a2ebf47 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -7,6 +7,7 @@
 
 constexpr size_t loop_cnt = 64;
 constexpr size_t max_work = 1; // 500'000'000;
+constexpr bool dump_unexpected = false;
 constexpr bool verbose = false;
 
 using namespace search::queryeval;
@@ -515,10 +516,7 @@ void test_strict_AND_sort_strategy(auto my_sort) {
                          };
             each_perm(data, check);
             double rel_err = 0.0;
-            double cost_range = (max_cost - min_cost);
-            if (cost_range > 1e-9) {
-                rel_err = (est_cost - min_cost) / cost_range;
-            }
+            rel_err = (est_cost - min_cost) / min_cost;
             if (rel_err > max_rel_err) {
                 max_rel_err = rel_err;
                 my_worst_order = my_order;
@@ -526,7 +524,7 @@ void test_strict_AND_sort_strategy(auto my_sort) {
             }
             sum_rel_err += rel_err;
             errs.push_back(rel_err);
-            if (verbose && !verify_order(best_order)) {
+            if (dump_unexpected && !verify_order(best_order)) {
                 fprintf(stderr, "  BEST ORDER IS UNEXPECTED:\n");
                 dump_flow(best_order, best_order);
                 fprintf(stderr, "  UNEXPECTED case, my_order:\n");
@@ -551,60 +549,12 @@ TEST(FlowTest, strict_and_with_allow_force_strict_basic_order) {
     test_strict_AND_sort_strategy(my_sort);
 }
 
-TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection) {
-    auto my_sort = [](auto &data) {
-                       AndFlow::sort(data, true);
-                       for (size_t next = 1; (next + 1) < data.size(); ++next) {
-                           auto [idx, score] = flow::select_forced_strict_and_child(flow::DirectAdapter(), data, next);
-                           if (score >= 0.0) {
-                               break;
-                           }
-                           auto pos = data.begin() + idx;
-                           std::rotate(data.begin() + next, pos, pos + 1);
-                       }
-                   };
-    test_strict_AND_sort_strategy(my_sort);
-}
-
-TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_strict_re_sorting) {
-    auto my_sort = [](auto &data) {
-                       AndFlow::sort(data, true);
-                       size_t strict_cnt = 1;
-                       for (; strict_cnt < data.size(); ++strict_cnt) {
-                           auto [idx, score] = flow::select_forced_strict_and_child(flow::DirectAdapter(), data, strict_cnt);
-                           if (score >= 0.0) {
-                               break;
-                           }
-                           auto pos = data.begin() + idx;
-                           std::rotate(data.begin() + strict_cnt, pos, pos + 1);
-                       }
-                       std::sort(data.begin(), data.begin() + strict_cnt,
-                                 [](const auto &a, const auto &b){ return (a.estimate < b.estimate); });
-                   };
-    test_strict_AND_sort_strategy(my_sort);
-}
-
-TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_order) {
-    auto my_sort = [](auto &data) {
-                       AndFlow::sort(data, true);
-                       for (size_t next = 1; next < data.size(); ++next) {
-                           auto [idx, target, score] = flow::select_forced_strict_and_child_with_order(flow::DirectAdapter(), data, next);
-                           if (score >= 0.0) {
-                               break;
-                           }
-                           auto pos = data.begin() + idx;
-                           std::rotate(data.begin() + target, pos, pos + 1);
-                       }
-                   };
-    test_strict_AND_sort_strategy(my_sort);
-}
-
-TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_destructive_order) {
+TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_destructive_order_max_3_extra_strict) {
     auto my_sort = [](auto &data) {
                        AndFlow::sort(data, true);
-                       for (size_t next = 1; next < data.size(); ++next) {
-                           auto [idx, target, score] = flow::select_forced_strict_and_child_with_destructive_order(flow::DirectAdapter(), data, next);
-                           if (score >= 0.0) {
+                       for (size_t next = 1; next <= 3 && next < data.size(); ++next) {
+                           auto [idx, target, diff] = flow::select_forced_strict_and_child(flow::DirectAdapter(), data, next);
+                           if (diff >= 0.0) {
                                break;
                            }
                            auto pos = data.begin() + idx;
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index b57e823f4d7..9ac79244755 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -214,44 +214,39 @@ size_t select_strict_and_child(auto adapter, const auto &children) {
     return best_idx;
 }
 
-auto select_forced_strict_and_child_impl(auto adapter, const auto &children, size_t first, auto ordered, auto destructive) {
-    auto est_until = [&](size_t limit)noexcept{
-                         double my_est = 1.0;
-                         for (size_t i = 0; i < limit; ++i) {
-                             my_est *= adapter.estimate(children[i]);
-                         }
-                         return my_est;
-                     };
-    double est = est_until(first);
+auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) {
+    double est = 1.0;
     double cost = 0.0;
     size_t best_idx = first;
     size_t best_target = first;
     double best_diff = std::numeric_limits<double>::max();
+    for (size_t i = 0; i < first; ++i) {
+        est *= adapter.estimate(children[i]);
+    }
     for (size_t idx = first; idx < children.size(); ++idx) {
         auto child = FlowStats::from(adapter, children[idx]);
         double child_abs_cost = est * child.cost;
         double forced_cost = forced_strict_cost(child, est);
         double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost);
         size_t target = first;
-        while (ordered && target > 0) {
+        while (target > 0) {
             size_t candidate = target - 1;
             auto other = FlowStats::from(adapter, children[candidate]);
             if (other.estimate < child.estimate) {
                 // do not move past someone with lower estimate
                 break;
             }
-            if (!destructive && !should_force_strict(other, (est_until(candidate) * child.estimate))) {
-                // no not move past someone who will lose strictness,
-                // unless it is explicitly allowed by enabling
-                // destructive (re-)ordering
-                break;
-            }
             target = candidate;
             my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
             if (candidate == 0) {
                 // the first iterator produces its own in-flow
                 my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
             }
+            // note that 'my_diff' might overestimate the cost
+            // (underestimate the benefit) of inserting 'child' before
+            // 'other' if it leads to 'other' becoming
+            // non-strict. This will also leave 'other' in a
+            // potentially unoptimal location.
         }
         if (my_diff < best_diff) {
             best_diff = my_diff;
@@ -264,19 +259,6 @@ auto select_forced_strict_and_child_impl(auto adapter, const auto &children, siz
     return std::make_tuple(best_idx, best_target, best_diff);
 }
 
-auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) {
-    auto [idx, target, diff] = select_forced_strict_and_child_impl(adapter, children, first, std::false_type{}, std::false_type{});
-    return std::make_pair(idx, diff);
-}
-
-auto select_forced_strict_and_child_with_order(auto adapter, const auto &children, size_t first) {
-    return select_forced_strict_and_child_impl(adapter, children, first, std::true_type{}, std::false_type{});
-}
-
-auto select_forced_strict_and_child_with_destructive_order(auto adapter, const auto &children, size_t first) {
-    return select_forced_strict_and_child_impl(adapter, children, first, std::true_type{}, std::true_type{});
-}
-
 } // flow
 
 template <typename FLOW>
author	Håvard Pettersen <havardpe@yahooinc.com>	2024-04-09 13:55:10 +0000
committer	Håvard Pettersen <havardpe@yahooinc.com>	2024-04-10 12:39:47 +0000
commit	ceccf4c1a0cc1767480dc6d9303651c37c2222a1 (patch)
tree	2e0ca2ffbec02e2392b5880a38c23e4336da6d47
parent	6ef55fa1ccf7e11655aba4d2f25e311620c3e2e2 (diff)