From 4b4e101654caaf137787c090b3c9036b18b604d8 Mon Sep 17 00:00:00 2001
From: Håvard Pettersen <havardpe@yahooinc.com>
Date: Thu, 4 Apr 2024 09:27:35 +0000
Subject: more experiments

also pruned some of the less promising alternatives
---
 .../tests/queryeval/flow/queryeval_flow_test.cpp   | 210 +++++++++++++--------
 searchlib/src/vespa/searchlib/queryeval/flow.h     |  44 +++++
 2 files changed, 171 insertions(+), 83 deletions(-)

(limited to 'searchlib')
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 70c52f1d6c2..181c591ab20 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -6,9 +6,25 @@
 #include <random>
 
 constexpr size_t loop_cnt = 64;
+constexpr size_t max_work = 1; // 500'000'000;
+constexpr bool verbose = false;
 
 using namespace search::queryeval;
 
+// at what in-flow (non-strict) rate is it equally cheap to be (forced) strict and non-strict
+double strict_crossover(const FlowStats &stats) {
+    return (stats.strict_cost - 0.2 * stats.estimate) / (stats.cost - 0.2);
+}
+
+// how much cost do we save by having an iterator strict vs non-strict with the given in-flow
+double strict_gain(const FlowStats &stats, InFlow in_flow) {
+    if (in_flow.strict()) {
+        return stats.cost - stats.strict_cost;
+    } else {
+        return (in_flow.rate() * stats.cost) - flow::forced_strict_cost(stats, in_flow.rate());
+    }
+}
+
 template <typename FLOW>
 double ordered_cost_of(const std::vector<FlowStats> &data, InFlow in_flow, bool allow_force_strict) {
     return flow::ordered_cost_of(flow::DirectAdapter(), data, FLOW(in_flow), allow_force_strict);
@@ -30,13 +46,14 @@ double dual_ordered_cost_of(const std::vector<FlowStats> &data, InFlow in_flow,
 
 std::vector<FlowStats> gen_data(size_t size) {
     static std::mt19937 gen;
-    static std::uniform_real_distribution<double>    estimate(0.1,  0.9);
+    static std::uniform_real_distribution<double>    estimate(0.0,  1.0);
     static std::uniform_real_distribution<double>        cost(1.0, 10.0);
-    static std::uniform_real_distribution<double> strict_cost(0.1,  5.0);
     std::vector<FlowStats> result;
     result.reserve(size);
     for (size_t i = 0; i < size; ++i) {
-        result.emplace_back(estimate(gen), cost(gen), strict_cost(gen));
+        double est = estimate(gen);
+        std::uniform_real_distribution<double> strict_cost(est,  5.0);
+        result.emplace_back(est, cost(gen), strict_cost(gen));
     }
     if (size == 0) {
         gen.seed(gen.default_seed);
@@ -71,6 +88,15 @@ void each_perm(std::vector<T> &data, F fun) {
     each_perm(data, data.size(), fun);
 }
 
+TEST(FlowTest, strict_crossover_and_gain) {
+    auto list = gen_data(64);
+    for (const auto &item: list) {
+        double limit = strict_crossover(item);
+        double gain = strict_gain(item, limit);
+        EXPECT_NEAR(gain, 0.0, 1e-9);
+    }
+}
+
 TEST(FlowTest, perm_test) {
     std::set<std::vector<int>> seen;
     std::vector<int> data = {1,2,3,4,5};
@@ -337,7 +363,7 @@ TEST(FlowTest, optimal_and_flow) {
                              max_cost = std::max(max_cost, my_cost);
                          };
             each_perm(data, check);
-            if (loop_cnt < 1024 || i % 1024 == 0) {
+            if (verbose && (loop_cnt < 1024 || i % 1024 == 0)) {
                 fprintf(stderr, "  AND cost(%zu,%s): min: %g, max: %g, factor: %g\n",
                         i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost);
             }
@@ -360,7 +386,7 @@ TEST(FlowTest, optimal_or_flow) {
                              max_cost = std::max(max_cost, my_cost);
                          };
             each_perm(data, check);
-            if (loop_cnt < 1024 || i % 1024 == 0) {
+            if (verbose && (loop_cnt < 1024 || i % 1024 == 0)) {
                 fprintf(stderr, "  OR cost(%zu,%s): min: %g, max: %g, factor: %g\n",
                         i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost);
             }
@@ -386,7 +412,7 @@ TEST(FlowTest, optimal_and_not_flow) {
                              }
                          };
             each_perm(data, check);
-            if (loop_cnt < 1024 || i % 1024 == 0) {
+            if (verbose && (loop_cnt < 1024 || i % 1024 == 0)) {
                 fprintf(stderr, "  ANDNOT cost(%zu,%s): min: %g, max: %g, factor: %g\n",
                         i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost);
             }
@@ -396,10 +422,69 @@ TEST(FlowTest, optimal_and_not_flow) {
 
 void test_strict_AND_sort_strategy(auto my_sort) {
     re_seed();
-    // size_t max_work = 500'000'000;
-    size_t max_work = 1;
+    const char *tags = "ABCDEFGHI";
     for (size_t child_cnt: {2, 3, 5, 7, 9}) {
         size_t cnt = std::max(size_t(10), std::min(size_t(128'000), (max_work / count_perms(child_cnt))));
+        if (verbose) {
+            fprintf(stderr, "AND/%zu: checking all permutations for %zu random cases\n", child_cnt, cnt);
+        }
+        std::vector<FlowStats> my_worst_order;
+        std::vector<FlowStats> best_worst_order;
+        auto get_tag = [&](const FlowStats &stats, const std::vector<FlowStats> &ref)noexcept->char{
+                           for (size_t i = 0; i < ref.size(); ++i) {
+                               if (stats == ref[i]) {
+                                   return tags[i];
+                               }
+                           }
+                           return 'X';
+                       };
+        auto dump_flow = [&](const std::vector<FlowStats> &list, const std::vector<FlowStats> &ref){
+                             double total_cost = 0.0;
+                             auto flow = AndFlow(true);
+                             for (const auto &item: list) {
+                                 auto in_flow = InFlow(flow.strict(), flow.flow());
+                                 bool strict = flow.strict() || flow::should_force_strict(item, flow.flow());
+                                 double child_cost = flow::min_child_cost(in_flow, item, true);
+                                 fprintf(stderr, "    %10f -> %c (estimate: %10f, cost: %10f, strict_cost: %10f, cross: %10f, gain: %10f, gain@est: %10f) cost: %10f%s\n",
+                                         flow.flow(), get_tag(item, ref), item.estimate, item.cost, item.strict_cost, strict_crossover(item), strict_gain(item, in_flow), strict_gain(item, item.estimate),
+                                         child_cost, strict ? " STRICT" : "");
+                                 flow.add(item.estimate);
+                                 total_cost += child_cost;
+                             }
+                             EXPECT_EQ(total_cost, ordered_cost_of<AndFlow>(list, true, true));
+                             fprintf(stderr, "    total cost: %10f\n", total_cost);
+                         };
+        auto verify_order = [&](const std::vector<FlowStats> &list){
+                                // check the following constraints for the given order:
+                                //
+                                // (1) never strict after non-strict
+                                // (2) strict items are sorted by estimate
+                                // (3) non-strict items are sorted by max(reduction/cost)
+                                auto flow = AndFlow(true);
+                                size_t strict_limit = list.size();
+                                auto my_cmp = flow::MinAndCost(flow::DirectAdapter());
+                                for (size_t i = 0; i < list.size(); ++i) {
+                                    const auto &item = list[i];
+                                    if (i > 0) {
+                                        const auto &prev = list[i-1];
+                                        bool strict = flow::should_force_strict(item, flow.flow());
+                                        if (strict) {
+                                            if (i > strict_limit) {
+                                                return false; // (1)
+                                            } else if (item.estimate < prev.estimate) {
+                                                return false; // (2)
+                                            }
+                                        } else {
+                                            strict_limit = std::min(i, strict_limit);
+                                            if ((strict_limit < i) && my_cmp(item, prev)) {
+                                                return false; // (3)
+                                            }
+                                        }
+                                    }
+                                    flow.add(item.estimate);
+                                }
+                                return true;
+                            };
         double max_rel_err = 0.0;
         double sum_rel_err = 0.0;
         std::vector<double> errs;
@@ -414,13 +499,18 @@ void test_strict_AND_sort_strategy(auto my_sort) {
         for (size_t i = 0; i < cnt; ++i) {
             auto data = gen_data(child_cnt);
             double ref_est = AndFlow::estimate_of(data);
-            double min_cost = 1'000'000.0;
-            double max_cost = 0.0;
             my_sort(data);
+            auto my_order = data;
+            auto best_order = my_order;
             double est_cost = ordered_cost_of<AndFlow>(data, true, true);
+            double min_cost = est_cost;
+            double max_cost = est_cost;
             auto check = [&](const std::vector<FlowStats> &my_data) noexcept {
                              double my_cost = ordered_cost_of<AndFlow>(my_data, true, true);
-                             min_cost = std::min(min_cost, my_cost);
+                             if (my_cost < min_cost) {
+                                 min_cost = my_cost;
+                                 best_order = my_data;
+                             }
                              max_cost = std::max(max_cost, my_cost);
                          };
             each_perm(data, check);
@@ -429,13 +519,29 @@ void test_strict_AND_sort_strategy(auto my_sort) {
             if (cost_range > 1e-9) {
                 rel_err = (est_cost - min_cost) / cost_range;
             }
-            max_rel_err = std::max(max_rel_err, rel_err);
+            if (rel_err > max_rel_err) {
+                max_rel_err = rel_err;
+                my_worst_order = my_order;
+                best_worst_order = best_order;
+            }
             sum_rel_err += rel_err;
             errs.push_back(rel_err);
+            if (verbose && !verify_order(best_order)) {
+                fprintf(stderr, "  BEST ORDER IS UNEXPECTED:\n");
+                dump_flow(best_order, best_order);
+                fprintf(stderr, "  UNEXPECTED case, my_order:\n");
+                dump_flow(my_order, best_order);
+            }
             EXPECT_NEAR(ref_est, AndFlow::estimate_of(data), 1e-9);
         }
         std::sort(errs.begin(), errs.end());
-        fprintf(stderr, "  AND/%zu: avg: %10f, p90: %10f, p99: %10f, p99.9: %10f, max: %10f\n",
+        if (verbose && !my_worst_order.empty()) {
+            fprintf(stderr, "  worst case, best order:\n");
+            dump_flow(best_worst_order, best_worst_order);
+            fprintf(stderr, "  worst case, my order:\n");
+            dump_flow(my_worst_order, best_worst_order);
+        }
+        fprintf(stderr, "AND/%zu: avg: %10f, p90: %10f, p99: %10f, p99.9: %10f, max: %10f\n",
                 child_cnt, (sum_rel_err / cnt), p(0.9), p(0.99), p(0.999), max_rel_err);
     }
 }
@@ -445,54 +551,6 @@ TEST(FlowTest, strict_and_with_allow_force_strict_basic_order) {
     test_strict_AND_sort_strategy(my_sort);
 }
 
-void greedy_sort(std::vector<FlowStats> &data, auto flow, auto score_of) {
-    InFlow in_flow = InFlow(flow.strict(), flow.flow());
-    for (size_t next = 0; (next + 1) < data.size(); ++next) {
-        size_t best_idx = next;
-        double best_score = score_of(in_flow, data[next]);
-        for (size_t i = next + 1; i < data.size(); ++i) {
-            double score = score_of(in_flow, data[i]);
-            if (score > best_score) {
-                best_score = score;
-                best_idx = i;
-            }
-        }
-        std::swap(data[next], data[best_idx]);
-        flow.add(data[next].estimate);
-    }
-}
-
-TEST(FlowTest, strict_and_with_allow_force_strict_greedy_reduction_efficiency) {
-    auto score_of = [](InFlow in_flow, const FlowStats &stats) {
-                        double child_cost = flow::min_child_cost(in_flow, stats, true);
-                        double reduction = (1.0 - stats.estimate);
-                        return reduction / child_cost;
-                    };
-    auto my_sort = [&](auto &data){ greedy_sort(data, AndFlow(true), score_of); };
-    test_strict_AND_sort_strategy(my_sort);
-}
-
-TEST(FlowTest, strict_and_with_allow_force_strict_partitioning) {
-    auto my_sort = [](auto &data) {
-                       AndFlow::sort(data, false);
-                       double rate = 1.0;
-                       size_t a = 0;
-                       for (size_t b = 0; b < data.size(); ++b) {
-                           double est = data[b].estimate;
-                           if (flow::should_force_strict(data[b], rate)) {
-                               auto pos = data.begin() + b;
-                               std::rotate(data.begin() + a, pos, pos + 1);
-                               ++a;
-                           }
-                           rate *= est;
-                       }
-                       if (a == 0) { // need at least 1 strict child
-                           AndFlow::sort(data, true);
-                       }
-                   };
-    test_strict_AND_sort_strategy(my_sort);
-}
-
 TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection) {
     auto my_sort = [](auto &data) {
                        AndFlow::sort(data, true);
@@ -541,30 +599,16 @@ TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_w
     test_strict_AND_sort_strategy(my_sort);
 }
 
-TEST(FlowTest, strict_and_with_allow_force_strict_assume_strict_then_partition_and_re_sort_non_strict) {
+TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_destructive_order) {
     auto my_sort = [](auto &data) {
-                       std::sort(data.begin(), data.end(),
-                                 [](const auto &a, const auto &b){ return (a.estimate < b.estimate); });
-                       double est = 1.0;
-                       size_t last = data.size();
-                       auto best_strict = [&](size_t i)noexcept{
-                                              if (i == 0) {
-                                                  return data[i].strict_cost < data[i].cost;
-                                              } else {
-                                                  return flow::should_force_strict(data[i], est);
-                                              }
-                                          };
-                       for (size_t i = 0; i < last; ++i) {
-                           while (i < last && !best_strict(i)) {
-                               std::rotate(data.begin()+i, data.begin()+i+1, data.begin()+last);
-                               --last;
+                       AndFlow::sort(data, true);
+                       for (size_t next = 1; next < data.size(); ++next) {
+                           auto [idx, target, score] = flow::select_forced_strict_and_child_with_destructive_order(flow::DirectAdapter(), data, next);
+                           if (score >= 0.0) {
+                               break;
                            }
-                           est *= data[i].estimate;
-                       }
-                       if (last > 0) {
-                           std::sort(data.begin() + last, data.end(), flow::MinAndCost(flow::DirectAdapter()));
-                       } else {
-                           AndFlow::sort(data, true);
+                           auto pos = data.begin() + idx;
+                           std::rotate(data.begin() + target, pos, pos + 1);
                        }
                    };
     test_strict_AND_sort_strategy(my_sort);
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index bc76856dac1..2868fbab49d 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -285,6 +285,50 @@ auto select_forced_strict_and_child_with_order(auto adapter, const auto &childre
     return std::make_tuple(best_idx, best_target, best_diff);
 }
 
+auto select_forced_strict_and_child_with_destructive_order(auto adapter, const auto &children, size_t first) {
+    auto est_until = [&](size_t limit)noexcept{
+                         double my_est = 1.0;
+                         for (size_t i = 0; i < limit; ++i) {
+                             my_est *= adapter.estimate(children[i]);
+                         }
+                         return my_est;
+                     };
+    double est = est_until(first);
+    double cost = 0.0;
+    size_t best_idx = 0;
+    size_t best_target = 0;
+    double best_diff = std::numeric_limits<double>::max();
+    for (size_t idx = first; idx < children.size(); ++idx) {
+        auto child = FlowStats::from(adapter, children[idx]);
+        double child_abs_cost = est * child.cost;
+        double forced_cost = forced_strict_cost(child, est);
+        double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost);
+        size_t target = first;
+        while (target > 0) {
+            size_t candidate = target - 1;
+            auto other = FlowStats::from(adapter, children[candidate]);
+            if (other.estimate < child.estimate) {
+                // do not move past someone with lower estimate
+                break;
+            }
+            target = candidate;
+            my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
+            if (candidate == 0) {
+                // the first iterator produces its own in-flow
+                my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
+            }
+        }
+        if (my_diff < best_diff) {
+            best_diff = my_diff;
+            best_idx = idx;
+            best_target = target;
+        }
+        cost += child_abs_cost;
+        est *= child.estimate;
+    }
+    return std::make_tuple(best_idx, best_target, best_diff);
+}
+
 } // flow
 
 template <typename FLOW>
-- 
cgit v1.2.3


From 2d5c6001950b472def902553528a1313f6418cac Mon Sep 17 00:00:00 2001
From: Håvard Pettersen <havardpe@yahooinc.com>
Date: Mon, 8 Apr 2024 09:26:02 +0000
Subject: common code for force strict child selection

---
 searchlib/src/vespa/searchlib/queryeval/flow.h | 88 ++++++--------------------
 1 file changed, 18 insertions(+), 70 deletions(-)

(limited to 'searchlib')

diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index 2868fbab49d..b57e823f4d7 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -214,30 +214,7 @@ size_t select_strict_and_child(auto adapter, const auto &children) {
     return best_idx;
 }
 
-auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) {
-    double est = 1.0;
-    double cost = 0.0;
-    size_t best_idx = 0;
-    double best_diff = std::numeric_limits<double>::max();
-    for (size_t idx = 0; idx < first && idx < children.size(); ++idx) {
-        est *= adapter.estimate(children[idx]);
-    }
-    for (size_t idx = first; idx < children.size(); ++idx) {
-        auto child = FlowStats::from(adapter, children[idx]);
-        double child_abs_cost = est * child.cost;
-        double forced_cost = forced_strict_cost(child, est);
-        double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost);
-        if (my_diff < best_diff) {
-            best_diff = my_diff;
-            best_idx = idx;
-        }
-        cost += child_abs_cost;
-        est *= child.estimate;
-    }
-    return std::make_pair(best_idx, best_diff);
-}
-
-auto select_forced_strict_and_child_with_order(auto adapter, const auto &children, size_t first) {
+auto select_forced_strict_and_child_impl(auto adapter, const auto &children, size_t first, auto ordered, auto destructive) {
     auto est_until = [&](size_t limit)noexcept{
                          double my_est = 1.0;
                          for (size_t i = 0; i < limit; ++i) {
@@ -247,8 +224,8 @@ auto select_forced_strict_and_child_with_order(auto adapter, const auto &childre
                      };
     double est = est_until(first);
     double cost = 0.0;
-    size_t best_idx = 0;
-    size_t best_target = 0;
+    size_t best_idx = first;
+    size_t best_target = first;
     double best_diff = std::numeric_limits<double>::max();
     for (size_t idx = first; idx < children.size(); ++idx) {
         auto child = FlowStats::from(adapter, children[idx]);
@@ -256,15 +233,17 @@ auto select_forced_strict_and_child_with_order(auto adapter, const auto &childre
         double forced_cost = forced_strict_cost(child, est);
         double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost);
         size_t target = first;
-        while (target > 0) {
+        while (ordered && target > 0) {
             size_t candidate = target - 1;
             auto other = FlowStats::from(adapter, children[candidate]);
             if (other.estimate < child.estimate) {
                 // do not move past someone with lower estimate
                 break;
             }
-            if (!should_force_strict(other, (est_until(candidate) * child.estimate))) {
-                // no not move past someone who will lose strictness
+            if (!destructive && !should_force_strict(other, (est_until(candidate) * child.estimate))) {
+                // no not move past someone who will lose strictness,
+                // unless it is explicitly allowed by enabling
+                // destructive (re-)ordering
                 break;
             }
             target = candidate;
@@ -285,48 +264,17 @@ auto select_forced_strict_and_child_with_order(auto adapter, const auto &childre
     return std::make_tuple(best_idx, best_target, best_diff);
 }
 
+auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) {
+    auto [idx, target, diff] = select_forced_strict_and_child_impl(adapter, children, first, std::false_type{}, std::false_type{});
+    return std::make_pair(idx, diff);
+}
+
+auto select_forced_strict_and_child_with_order(auto adapter, const auto &children, size_t first) {
+    return select_forced_strict_and_child_impl(adapter, children, first, std::true_type{}, std::false_type{});
+}
+
 auto select_forced_strict_and_child_with_destructive_order(auto adapter, const auto &children, size_t first) {
-    auto est_until = [&](size_t limit)noexcept{
-                         double my_est = 1.0;
-                         for (size_t i = 0; i < limit; ++i) {
-                             my_est *= adapter.estimate(children[i]);
-                         }
-                         return my_est;
-                     };
-    double est = est_until(first);
-    double cost = 0.0;
-    size_t best_idx = 0;
-    size_t best_target = 0;
-    double best_diff = std::numeric_limits<double>::max();
-    for (size_t idx = first; idx < children.size(); ++idx) {
-        auto child = FlowStats::from(adapter, children[idx]);
-        double child_abs_cost = est * child.cost;
-        double forced_cost = forced_strict_cost(child, est);
-        double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost);
-        size_t target = first;
-        while (target > 0) {
-            size_t candidate = target - 1;
-            auto other = FlowStats::from(adapter, children[candidate]);
-            if (other.estimate < child.estimate) {
-                // do not move past someone with lower estimate
-                break;
-            }
-            target = candidate;
-            my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
-            if (candidate == 0) {
-                // the first iterator produces its own in-flow
-                my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
-            }
-        }
-        if (my_diff < best_diff) {
-            best_diff = my_diff;
-            best_idx = idx;
-            best_target = target;
-        }
-        cost += child_abs_cost;
-        est *= child.estimate;
-    }
-    return std::make_tuple(best_idx, best_target, best_diff);
+    return select_forced_strict_and_child_impl(adapter, children, first, std::true_type{}, std::true_type{});
 }
 
 } // flow
-- 
cgit v1.2.3