Merge pull request #30903 from vespa-engine/havardpe/andflow-strictness-reorder

add code to AndFlow that can perform additional incremental reordering
author: Geir Storli <geirst@vespa.ai> 2024-04-12 18:21:56 +0200
committer: GitHub <noreply@github.com> 2024-04-12 18:21:56 +0200
commit: ff368acc961eca78e681fc33400c9ad93e95895a (patch)
tree: 5c043b3bc83a2f60c1918939ce82fc8a0cddacf4 /searchlib
parent: 2996ec56a6e4b1bb74a0555ff33bedf0912bd892 (diff)
parent: 64c62c25b9f2de5d131852f042b6cd8999d5e3f7 (diff)
2 files changed, 146 insertions, 148 deletions
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 5eb3a2ebf47..3992fa007f9 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -421,147 +421,146 @@ TEST(FlowTest, optimal_and_not_flow) {
     }
 }
 
-void test_strict_AND_sort_strategy(auto my_sort) {
-    re_seed();
+void test_AND_sort_strategy(auto my_sort) {
     const char *tags = "ABCDEFGHI";
-    for (size_t child_cnt: {2, 3, 5, 7, 9}) {
-        size_t cnt = std::max(size_t(10), std::min(size_t(128'000), (max_work / count_perms(child_cnt))));
-        if (verbose) {
-            fprintf(stderr, "AND/%zu: checking all permutations for %zu random cases\n", child_cnt, cnt);
-        }
-        std::vector<FlowStats> my_worst_order;
-        std::vector<FlowStats> best_worst_order;
-        auto get_tag = [&](const FlowStats &stats, const std::vector<FlowStats> &ref)noexcept->char{
-                           for (size_t i = 0; i < ref.size(); ++i) {
-                               if (stats == ref[i]) {
-                                   return tags[i];
+    for (InFlow in_flow: {InFlow(true), InFlow(0.5)}) {
+        re_seed();
+        for (size_t child_cnt: {2, 3, 5, 7, 9}) {
+            size_t cnt = std::max(size_t(10), std::min(size_t(128'000), (max_work / count_perms(child_cnt))));
+            if (verbose) {
+                fprintf(stderr, "%6f %s -> AND/%zu: checking all permutations for %zu random cases\n",
+                        in_flow.rate(), in_flow.strict() ? "S" : " ", child_cnt, cnt);
+            }
+            std::vector<FlowStats> my_worst_order;
+            std::vector<FlowStats> best_worst_order;
+            auto get_tag = [&](const FlowStats &stats, const std::vector<FlowStats> &ref)noexcept->char{
+                               for (size_t i = 0; i < ref.size(); ++i) {
+                                   if (stats == ref[i]) {
+                                       return tags[i];
+                                   }
                                }
-                           }
-                           return 'X';
-                       };
-        auto dump_flow = [&](const std::vector<FlowStats> &list, const std::vector<FlowStats> &ref){
-                             double total_cost = 0.0;
-                             auto flow = AndFlow(true);
-                             for (const auto &item: list) {
-                                 auto in_flow = InFlow(flow.strict(), flow.flow());
-                                 bool strict = flow.strict() || flow::should_force_strict(item, flow.flow());
-                                 double child_cost = flow::min_child_cost(in_flow, item, true);
-                                 fprintf(stderr, "    %10f -> %c (estimate: %10f, cost: %10f, strict_cost: %10f, cross: %10f, gain: %10f, gain@est: %10f) cost: %10f%s\n",
-                                         flow.flow(), get_tag(item, ref), item.estimate, item.cost, item.strict_cost, strict_crossover(item), strict_gain(item, in_flow), strict_gain(item, item.estimate),
-                                         child_cost, strict ? " STRICT" : "");
-                                 flow.add(item.estimate);
-                                 total_cost += child_cost;
-                             }
-                             EXPECT_DOUBLE_EQ(total_cost, ordered_cost_of<AndFlow>(list, true, true));
-                             fprintf(stderr, "    total cost: %10f\n", total_cost);
-                         };
-        auto verify_order = [&](const std::vector<FlowStats> &list){
-                                // check the following constraints for the given order:
-                                //
-                                // (1) never strict after non-strict
-                                // (2) strict items are sorted by estimate
-                                // (3) non-strict items are sorted by max(reduction/cost)
-                                auto flow = AndFlow(true);
-                                size_t strict_limit = list.size();
-                                auto my_cmp = flow::MinAndCost(flow::DirectAdapter());
-                                for (size_t i = 0; i < list.size(); ++i) {
-                                    const auto &item = list[i];
-                                    if (i > 0) {
-                                        const auto &prev = list[i-1];
-                                        bool strict = flow::should_force_strict(item, flow.flow());
+                               return 'X';
+                           };
+            auto dump_flow = [&](const std::vector<FlowStats> &list, const std::vector<FlowStats> &ref){
+                                 double total_cost = 0.0;
+                                 auto flow = AndFlow(in_flow);
+                                 for (const auto &item: list) {
+                                     auto child_flow = InFlow(flow.strict(), flow.flow());
+                                     bool strict = flow.strict() || flow::should_force_strict(item, flow.flow());
+                                     double child_cost = flow::min_child_cost(child_flow, item, true);
+                                     fprintf(stderr, "    %6f %s -> %c (estimate: %10f, cost: %10f, strict_cost: %10f, cross: %10f, gain: %10f, gain@est: %10f) cost: %10f%s\n",
+                                             flow.flow(), flow.strict() ? "S" : " ", get_tag(item, ref), item.estimate, item.cost, item.strict_cost, strict_crossover(item),
+                                             strict_gain(item, child_flow), strict_gain(item, item.estimate),
+                                             child_cost, strict ? " STRICT" : "");
+                                     flow.add(item.estimate);
+                                     total_cost += child_cost;
+                                 }
+                                 EXPECT_DOUBLE_EQ(total_cost, ordered_cost_of<AndFlow>(list, in_flow, true));
+                                 fprintf(stderr, "    total cost: %10f\n", total_cost);
+                             };
+            auto verify_order = [&](const std::vector<FlowStats> &list){
+                                    // check the following constraints for the given order:
+                                    //
+                                    // (1) never strict after non-strict
+                                    // (2) strict items are sorted by estimate
+                                    // (3) non-strict items are sorted by max(reduction/cost)
+                                    auto flow = AndFlow(in_flow);
+                                    auto my_cmp = flow::MinAndCost(flow::DirectAdapter());
+                                    size_t num_strict = 0;
+                                    size_t num_non_strict = 0;
+                                    FlowStats prev_strict(0.0, 0.0, 0.0);
+                                    FlowStats prev_non_strict(0.0, 0.0, 0.0);
+                                    for (const auto &item: list) {
+                                        bool strict = flow.strict() || flow::should_force_strict(item, flow.flow());
                                         if (strict) {
-                                            if (i > strict_limit) {
+                                            if (num_non_strict > 0) {
                                                 return false; // (1)
-                                            } else if (item.estimate < prev.estimate) {
+                                            } else if (item.estimate < prev_strict.estimate) {
                                                 return false; // (2)
                                             }
+                                            ++num_strict;
+                                            prev_strict = item;
                                         } else {
-                                            strict_limit = std::min(i, strict_limit);
-                                            if ((strict_limit < i) && my_cmp(item, prev)) {
+                                            if (my_cmp(item, prev_non_strict)) {
                                                 return false; // (3)
                                             }
+                                            ++num_non_strict;
+                                            prev_non_strict = item;
                                         }
+                                        flow.add(item.estimate);
                                     }
-                                    flow.add(item.estimate);
-                                }
-                                return true;
-                            };
-        double max_rel_err = 0.0;
-        double sum_rel_err = 0.0;
-        std::vector<double> errs;
-        errs.reserve(cnt);
-        auto p = [&](double arg){
-                     size_t idx = std::lround(arg * (errs.size() - 1));
-                     if (idx < errs.size()) {
-                         return errs[idx];
-                     }
-                     return errs.back();
-                 };
-        for (size_t i = 0; i < cnt; ++i) {
-            auto data = gen_data(child_cnt);
-            double ref_est = AndFlow::estimate_of(data);
-            my_sort(data);
-            auto my_order = data;
-            auto best_order = my_order;
-            double est_cost = ordered_cost_of<AndFlow>(data, true, true);
-            double min_cost = est_cost;
-            double max_cost = est_cost;
-            auto check = [&](const std::vector<FlowStats> &my_data) noexcept {
-                             double my_cost = ordered_cost_of<AndFlow>(my_data, true, true);
-                             if (my_cost < min_cost) {
-                                 min_cost = my_cost;
-                                 best_order = my_data;
-                             }
-                             max_cost = std::max(max_cost, my_cost);
-                         };
-            each_perm(data, check);
-            double rel_err = 0.0;
-            rel_err = (est_cost - min_cost) / min_cost;
-            if (rel_err > max_rel_err) {
-                max_rel_err = rel_err;
-                my_worst_order = my_order;
-                best_worst_order = best_order;
+                                    return true;
+                                };
+            double max_rel_err = 0.0;
+            double sum_rel_err = 0.0;
+            std::vector<double> errs;
+            errs.reserve(cnt);
+            auto p = [&](double arg){
+                         size_t idx = std::lround(arg * (errs.size() - 1));
+                         if (idx < errs.size()) {
+                             return errs[idx];
+                         }
+                         return errs.back();
+                     };
+            for (size_t i = 0; i < cnt; ++i) {
+                auto data = gen_data(child_cnt);
+                double ref_est = AndFlow::estimate_of(data);
+                my_sort(data, in_flow);
+                auto my_order = data;
+                auto best_order = my_order;
+                double est_cost = ordered_cost_of<AndFlow>(data, in_flow, true);
+                double min_cost = est_cost;
+                double max_cost = est_cost;
+                auto check = [&](const std::vector<FlowStats> &my_data) noexcept {
+                                 double my_cost = ordered_cost_of<AndFlow>(my_data, in_flow, true);
+                                 if (my_cost < min_cost) {
+                                     min_cost = my_cost;
+                                     best_order = my_data;
+                                 }
+                                 max_cost = std::max(max_cost, my_cost);
+                             };
+                each_perm(data, check);
+                double rel_err = 0.0;
+                rel_err = (est_cost - min_cost) / min_cost;
+                if (rel_err > max_rel_err) {
+                    max_rel_err = rel_err;
+                    my_worst_order = my_order;
+                    best_worst_order = best_order;
+                }
+                sum_rel_err += rel_err;
+                errs.push_back(rel_err);
+                if (dump_unexpected && !verify_order(best_order)) {
+                    fprintf(stderr, "  BEST ORDER IS UNEXPECTED:\n");
+                    dump_flow(best_order, best_order);
+                    fprintf(stderr, "  UNEXPECTED case, my_order:\n");
+                    dump_flow(my_order, best_order);
+                }
+                EXPECT_NEAR(ref_est, AndFlow::estimate_of(data), 1e-9);
             }
-            sum_rel_err += rel_err;
-            errs.push_back(rel_err);
-            if (dump_unexpected && !verify_order(best_order)) {
-                fprintf(stderr, "  BEST ORDER IS UNEXPECTED:\n");
-                dump_flow(best_order, best_order);
-                fprintf(stderr, "  UNEXPECTED case, my_order:\n");
-                dump_flow(my_order, best_order);
+            std::sort(errs.begin(), errs.end());
+            if (verbose && !my_worst_order.empty()) {
+                fprintf(stderr, "  worst case, best order:\n");
+                dump_flow(best_worst_order, best_worst_order);
+                fprintf(stderr, "  worst case, my order:\n");
+                dump_flow(my_worst_order, best_worst_order);
             }
-            EXPECT_NEAR(ref_est, AndFlow::estimate_of(data), 1e-9);
-        }
-        std::sort(errs.begin(), errs.end());
-        if (verbose && !my_worst_order.empty()) {
-            fprintf(stderr, "  worst case, best order:\n");
-            dump_flow(best_worst_order, best_worst_order);
-            fprintf(stderr, "  worst case, my order:\n");
-            dump_flow(my_worst_order, best_worst_order);
+            fprintf(stderr, "%6f %s -> AND/%zu: avg: %10f, p90: %10f, p99: %10f, p99.9: %10f, max: %10f\n",
+                    in_flow.rate(), in_flow.strict() ? "S" : " ", child_cnt, (sum_rel_err / cnt), p(0.9), p(0.99), p(0.999), max_rel_err);
         }
-        fprintf(stderr, "AND/%zu: avg: %10f, p90: %10f, p99: %10f, p99.9: %10f, max: %10f\n",
-                child_cnt, (sum_rel_err / cnt), p(0.9), p(0.99), p(0.999), max_rel_err);
     }
 }
 
-TEST(FlowTest, strict_and_with_allow_force_strict_basic_order) {
-    auto my_sort = [](auto &data){ AndFlow::sort(data, true); };
-    test_strict_AND_sort_strategy(my_sort);
+TEST(FlowTest, and_with_allow_force_strict_basic_order) {
+    auto my_sort = [](auto &data, InFlow in_flow){ AndFlow::sort(data, in_flow.strict()); };
+    test_AND_sort_strategy(my_sort);
 }
 
-TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_destructive_order_max_3_extra_strict) {
-    auto my_sort = [](auto &data) {
-                       AndFlow::sort(data, true);
-                       for (size_t next = 1; next <= 3 && next < data.size(); ++next) {
-                           auto [idx, target, diff] = flow::select_forced_strict_and_child(flow::DirectAdapter(), data, next);
-                           if (diff >= 0.0) {
-                               break;
-                           }
-                           auto pos = data.begin() + idx;
-                           std::rotate(data.begin() + target, pos, pos + 1);
-                       }
+TEST(FlowTest, and_with_allow_force_strict_incremental_strict_selection_destructive_order_max_3_extra_strict) {
+    auto my_sort = [](auto &data, InFlow in_flow) {
+                       AndFlow::sort(data, in_flow.strict());
+                       AndFlow::reorder_for_extra_strictness(data, in_flow, 3);
                    };
-    test_strict_AND_sort_strategy(my_sort);
+    test_AND_sort_strategy(my_sort);
 }
 
 GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index 9ac79244755..0dc92a6cf88 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -195,27 +195,7 @@ double ordered_cost_of(ADAPTER adapter, const T &children, F flow, bool allow_fo
     return total_cost;
 }
 
-size_t select_strict_and_child(auto adapter, const auto &children) {
-    double est = 1.0;
-    double cost = 0.0;
-    size_t best_idx = 0;
-    double best_diff = std::numeric_limits<double>::max();
-    for (size_t idx = 0; idx < children.size(); ++idx) {
-        auto child = FlowStats::from(adapter, children[idx]);
-        double child_abs_cost = est * child.cost;
-        double my_diff = (child.strict_cost + child.estimate * cost) - (cost + child_abs_cost);
-        if (my_diff < best_diff) {
-            best_diff = my_diff;
-            best_idx = idx;
-        }
-        cost += child_abs_cost;
-        est *= child.estimate;
-    }
-    return best_idx;
-}
-
-auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) {
-    double est = 1.0;
+auto select_strict_and_child(auto adapter, const auto &children, size_t first, double est, bool native_strict) {
     double cost = 0.0;
     size_t best_idx = first;
     size_t best_target = first;
@@ -223,11 +203,12 @@ auto select_forced_strict_and_child(auto adapter, const auto &children, size_t f
     for (size_t i = 0; i < first; ++i) {
         est *= adapter.estimate(children[i]);
     }
+    double first_est = est;
     for (size_t idx = first; idx < children.size(); ++idx) {
         auto child = FlowStats::from(adapter, children[idx]);
         double child_abs_cost = est * child.cost;
-        double forced_cost = forced_strict_cost(child, est);
-        double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost);
+        double child_strict_cost = (first == 0 && native_strict) ? child.strict_cost : forced_strict_cost(child, first_est);
+        double my_diff = (child_strict_cost + child.estimate * cost) - (cost + child_abs_cost);
         size_t target = first;
         while (target > 0) {
             size_t candidate = target - 1;
@@ -238,7 +219,7 @@ auto select_forced_strict_and_child(auto adapter, const auto &children, size_t f
             }
             target = candidate;
             my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
-            if (candidate == 0) {
+            if (candidate == 0 && native_strict) {
                 // the first iterator produces its own in-flow
                 my_diff += (0.2 * child.estimate - 0.2 * other.estimate);
             }
@@ -246,7 +227,8 @@ auto select_forced_strict_and_child(auto adapter, const auto &children, size_t f
             // (underestimate the benefit) of inserting 'child' before
             // 'other' if it leads to 'other' becoming
             // non-strict. This will also leave 'other' in a
-            // potentially unoptimal location.
+            // potentially unoptimal location. Unit tests indicate
+            // that the effects of this are minor.
         }
         if (my_diff < best_diff) {
             best_diff = my_diff;
@@ -295,11 +277,28 @@ public:
     static double estimate_of(const auto &children) {
         return estimate_of(flow::make_adapter(children), children);
     }
+    // assume children are already ordered by calling sort (with same strictness as in_flow)
+    static void reorder_for_extra_strictness(auto adapter, auto &children, InFlow in_flow, size_t max_extra) {
+        size_t num_strict = in_flow.strict() ? 1 : 0;
+        size_t max_strict = num_strict + max_extra;
+        for (size_t next = num_strict; (next < max_strict) && (next < children.size()); ++next) {
+            auto [idx, target, diff] = flow::select_strict_and_child(adapter, children, next, in_flow.rate(), in_flow.strict());
+            if (diff >= 0.0) {
+                break;
+            }
+            auto pos = children.begin() + idx;
+            std::rotate(children.begin() + target, pos, pos + 1);
+        }
+    }
+    static void reorder_for_extra_strictness(auto &children, InFlow in_flow, size_t max_extra) {
+        reorder_for_extra_strictness(flow::make_adapter(children), children, in_flow, max_extra);
+    }
     static void sort(auto adapter, auto &children, bool strict) {
         flow::sort<flow::MinAndCost>(adapter, children);
         if (strict && children.size() > 1) {
-            auto pos = children.begin() + flow::select_strict_and_child(adapter, children);
-            std::rotate(children.begin(), pos, pos + 1);
+            auto [idx, target, ignore_diff] = flow::select_strict_and_child(adapter, children, 0, 1.0, true);
+            auto pos = children.begin() + idx;
+            std::rotate(children.begin() + target, pos, pos + 1);
         }
     }
     static void sort(auto &children, bool strict) {
author	Geir Storli <geirst@vespa.ai>	2024-04-12 18:21:56 +0200
committer	GitHub <noreply@github.com>	2024-04-12 18:21:56 +0200
commit	ff368acc961eca78e681fc33400c9ad93e95895a (patch)
tree	5c043b3bc83a2f60c1918939ce82fc8a0cddacf4 /searchlib
parent	2996ec56a6e4b1bb74a0555ff33bedf0912bd892 (diff)
parent	64c62c25b9f2de5d131852f042b6cd8999d5e3f7 (diff)