From 4b4e101654caaf137787c090b3c9036b18b604d8 Mon Sep 17 00:00:00 2001 From: Håvard Pettersen Date: Thu, 4 Apr 2024 09:27:35 +0000 Subject: more experiments also pruned some of the less promising alternatives --- .../tests/queryeval/flow/queryeval_flow_test.cpp | 210 +++++++++++++-------- searchlib/src/vespa/searchlib/queryeval/flow.h | 44 +++++ 2 files changed, 171 insertions(+), 83 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index 70c52f1d6c2..181c591ab20 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -6,9 +6,25 @@ #include constexpr size_t loop_cnt = 64; +constexpr size_t max_work = 1; // 500'000'000; +constexpr bool verbose = false; using namespace search::queryeval; +// at what in-flow (non-strict) rate is it equally cheap to be (forced) strict and non-strict +double strict_crossover(const FlowStats &stats) { + return (stats.strict_cost - 0.2 * stats.estimate) / (stats.cost - 0.2); +} + +// how much cost do we save by having an iterator strict vs non-strict with the given in-flow +double strict_gain(const FlowStats &stats, InFlow in_flow) { + if (in_flow.strict()) { + return stats.cost - stats.strict_cost; + } else { + return (in_flow.rate() * stats.cost) - flow::forced_strict_cost(stats, in_flow.rate()); + } +} + template double ordered_cost_of(const std::vector &data, InFlow in_flow, bool allow_force_strict) { return flow::ordered_cost_of(flow::DirectAdapter(), data, FLOW(in_flow), allow_force_strict); @@ -30,13 +46,14 @@ double dual_ordered_cost_of(const std::vector &data, InFlow in_flow, std::vector gen_data(size_t size) { static std::mt19937 gen; - static std::uniform_real_distribution estimate(0.1, 0.9); + static std::uniform_real_distribution estimate(0.0, 1.0); static std::uniform_real_distribution cost(1.0, 10.0); - static std::uniform_real_distribution strict_cost(0.1, 5.0); std::vector result; result.reserve(size); for (size_t i = 0; i < size; ++i) { - result.emplace_back(estimate(gen), cost(gen), strict_cost(gen)); + double est = estimate(gen); + std::uniform_real_distribution strict_cost(est, 5.0); + result.emplace_back(est, cost(gen), strict_cost(gen)); } if (size == 0) { gen.seed(gen.default_seed); @@ -71,6 +88,15 @@ void each_perm(std::vector &data, F fun) { each_perm(data, data.size(), fun); } +TEST(FlowTest, strict_crossover_and_gain) { + auto list = gen_data(64); + for (const auto &item: list) { + double limit = strict_crossover(item); + double gain = strict_gain(item, limit); + EXPECT_NEAR(gain, 0.0, 1e-9); + } +} + TEST(FlowTest, perm_test) { std::set> seen; std::vector data = {1,2,3,4,5}; @@ -337,7 +363,7 @@ TEST(FlowTest, optimal_and_flow) { max_cost = std::max(max_cost, my_cost); }; each_perm(data, check); - if (loop_cnt < 1024 || i % 1024 == 0) { + if (verbose && (loop_cnt < 1024 || i % 1024 == 0)) { fprintf(stderr, " AND cost(%zu,%s): min: %g, max: %g, factor: %g\n", i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost); } @@ -360,7 +386,7 @@ TEST(FlowTest, optimal_or_flow) { max_cost = std::max(max_cost, my_cost); }; each_perm(data, check); - if (loop_cnt < 1024 || i % 1024 == 0) { + if (verbose && (loop_cnt < 1024 || i % 1024 == 0)) { fprintf(stderr, " OR cost(%zu,%s): min: %g, max: %g, factor: %g\n", i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost); } @@ -386,7 +412,7 @@ TEST(FlowTest, optimal_and_not_flow) { } }; each_perm(data, check); - if (loop_cnt < 1024 || i % 1024 == 0) { + if (verbose && (loop_cnt < 1024 || i % 1024 == 0)) { fprintf(stderr, " ANDNOT cost(%zu,%s): min: %g, max: %g, factor: %g\n", i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost); } @@ -396,10 +422,69 @@ TEST(FlowTest, optimal_and_not_flow) { void test_strict_AND_sort_strategy(auto my_sort) { re_seed(); - // size_t max_work = 500'000'000; - size_t max_work = 1; + const char *tags = "ABCDEFGHI"; for (size_t child_cnt: {2, 3, 5, 7, 9}) { size_t cnt = std::max(size_t(10), std::min(size_t(128'000), (max_work / count_perms(child_cnt)))); + if (verbose) { + fprintf(stderr, "AND/%zu: checking all permutations for %zu random cases\n", child_cnt, cnt); + } + std::vector my_worst_order; + std::vector best_worst_order; + auto get_tag = [&](const FlowStats &stats, const std::vector &ref)noexcept->char{ + for (size_t i = 0; i < ref.size(); ++i) { + if (stats == ref[i]) { + return tags[i]; + } + } + return 'X'; + }; + auto dump_flow = [&](const std::vector &list, const std::vector &ref){ + double total_cost = 0.0; + auto flow = AndFlow(true); + for (const auto &item: list) { + auto in_flow = InFlow(flow.strict(), flow.flow()); + bool strict = flow.strict() || flow::should_force_strict(item, flow.flow()); + double child_cost = flow::min_child_cost(in_flow, item, true); + fprintf(stderr, " %10f -> %c (estimate: %10f, cost: %10f, strict_cost: %10f, cross: %10f, gain: %10f, gain@est: %10f) cost: %10f%s\n", + flow.flow(), get_tag(item, ref), item.estimate, item.cost, item.strict_cost, strict_crossover(item), strict_gain(item, in_flow), strict_gain(item, item.estimate), + child_cost, strict ? " STRICT" : ""); + flow.add(item.estimate); + total_cost += child_cost; + } + EXPECT_EQ(total_cost, ordered_cost_of(list, true, true)); + fprintf(stderr, " total cost: %10f\n", total_cost); + }; + auto verify_order = [&](const std::vector &list){ + // check the following constraints for the given order: + // + // (1) never strict after non-strict + // (2) strict items are sorted by estimate + // (3) non-strict items are sorted by max(reduction/cost) + auto flow = AndFlow(true); + size_t strict_limit = list.size(); + auto my_cmp = flow::MinAndCost(flow::DirectAdapter()); + for (size_t i = 0; i < list.size(); ++i) { + const auto &item = list[i]; + if (i > 0) { + const auto &prev = list[i-1]; + bool strict = flow::should_force_strict(item, flow.flow()); + if (strict) { + if (i > strict_limit) { + return false; // (1) + } else if (item.estimate < prev.estimate) { + return false; // (2) + } + } else { + strict_limit = std::min(i, strict_limit); + if ((strict_limit < i) && my_cmp(item, prev)) { + return false; // (3) + } + } + } + flow.add(item.estimate); + } + return true; + }; double max_rel_err = 0.0; double sum_rel_err = 0.0; std::vector errs; @@ -414,13 +499,18 @@ void test_strict_AND_sort_strategy(auto my_sort) { for (size_t i = 0; i < cnt; ++i) { auto data = gen_data(child_cnt); double ref_est = AndFlow::estimate_of(data); - double min_cost = 1'000'000.0; - double max_cost = 0.0; my_sort(data); + auto my_order = data; + auto best_order = my_order; double est_cost = ordered_cost_of(data, true, true); + double min_cost = est_cost; + double max_cost = est_cost; auto check = [&](const std::vector &my_data) noexcept { double my_cost = ordered_cost_of(my_data, true, true); - min_cost = std::min(min_cost, my_cost); + if (my_cost < min_cost) { + min_cost = my_cost; + best_order = my_data; + } max_cost = std::max(max_cost, my_cost); }; each_perm(data, check); @@ -429,13 +519,29 @@ void test_strict_AND_sort_strategy(auto my_sort) { if (cost_range > 1e-9) { rel_err = (est_cost - min_cost) / cost_range; } - max_rel_err = std::max(max_rel_err, rel_err); + if (rel_err > max_rel_err) { + max_rel_err = rel_err; + my_worst_order = my_order; + best_worst_order = best_order; + } sum_rel_err += rel_err; errs.push_back(rel_err); + if (verbose && !verify_order(best_order)) { + fprintf(stderr, " BEST ORDER IS UNEXPECTED:\n"); + dump_flow(best_order, best_order); + fprintf(stderr, " UNEXPECTED case, my_order:\n"); + dump_flow(my_order, best_order); + } EXPECT_NEAR(ref_est, AndFlow::estimate_of(data), 1e-9); } std::sort(errs.begin(), errs.end()); - fprintf(stderr, " AND/%zu: avg: %10f, p90: %10f, p99: %10f, p99.9: %10f, max: %10f\n", + if (verbose && !my_worst_order.empty()) { + fprintf(stderr, " worst case, best order:\n"); + dump_flow(best_worst_order, best_worst_order); + fprintf(stderr, " worst case, my order:\n"); + dump_flow(my_worst_order, best_worst_order); + } + fprintf(stderr, "AND/%zu: avg: %10f, p90: %10f, p99: %10f, p99.9: %10f, max: %10f\n", child_cnt, (sum_rel_err / cnt), p(0.9), p(0.99), p(0.999), max_rel_err); } } @@ -445,54 +551,6 @@ TEST(FlowTest, strict_and_with_allow_force_strict_basic_order) { test_strict_AND_sort_strategy(my_sort); } -void greedy_sort(std::vector &data, auto flow, auto score_of) { - InFlow in_flow = InFlow(flow.strict(), flow.flow()); - for (size_t next = 0; (next + 1) < data.size(); ++next) { - size_t best_idx = next; - double best_score = score_of(in_flow, data[next]); - for (size_t i = next + 1; i < data.size(); ++i) { - double score = score_of(in_flow, data[i]); - if (score > best_score) { - best_score = score; - best_idx = i; - } - } - std::swap(data[next], data[best_idx]); - flow.add(data[next].estimate); - } -} - -TEST(FlowTest, strict_and_with_allow_force_strict_greedy_reduction_efficiency) { - auto score_of = [](InFlow in_flow, const FlowStats &stats) { - double child_cost = flow::min_child_cost(in_flow, stats, true); - double reduction = (1.0 - stats.estimate); - return reduction / child_cost; - }; - auto my_sort = [&](auto &data){ greedy_sort(data, AndFlow(true), score_of); }; - test_strict_AND_sort_strategy(my_sort); -} - -TEST(FlowTest, strict_and_with_allow_force_strict_partitioning) { - auto my_sort = [](auto &data) { - AndFlow::sort(data, false); - double rate = 1.0; - size_t a = 0; - for (size_t b = 0; b < data.size(); ++b) { - double est = data[b].estimate; - if (flow::should_force_strict(data[b], rate)) { - auto pos = data.begin() + b; - std::rotate(data.begin() + a, pos, pos + 1); - ++a; - } - rate *= est; - } - if (a == 0) { // need at least 1 strict child - AndFlow::sort(data, true); - } - }; - test_strict_AND_sort_strategy(my_sort); -} - TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection) { auto my_sort = [](auto &data) { AndFlow::sort(data, true); @@ -541,30 +599,16 @@ TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_w test_strict_AND_sort_strategy(my_sort); } -TEST(FlowTest, strict_and_with_allow_force_strict_assume_strict_then_partition_and_re_sort_non_strict) { +TEST(FlowTest, strict_and_with_allow_force_strict_incremental_strict_selection_with_destructive_order) { auto my_sort = [](auto &data) { - std::sort(data.begin(), data.end(), - [](const auto &a, const auto &b){ return (a.estimate < b.estimate); }); - double est = 1.0; - size_t last = data.size(); - auto best_strict = [&](size_t i)noexcept{ - if (i == 0) { - return data[i].strict_cost < data[i].cost; - } else { - return flow::should_force_strict(data[i], est); - } - }; - for (size_t i = 0; i < last; ++i) { - while (i < last && !best_strict(i)) { - std::rotate(data.begin()+i, data.begin()+i+1, data.begin()+last); - --last; + AndFlow::sort(data, true); + for (size_t next = 1; next < data.size(); ++next) { + auto [idx, target, score] = flow::select_forced_strict_and_child_with_destructive_order(flow::DirectAdapter(), data, next); + if (score >= 0.0) { + break; } - est *= data[i].estimate; - } - if (last > 0) { - std::sort(data.begin() + last, data.end(), flow::MinAndCost(flow::DirectAdapter())); - } else { - AndFlow::sort(data, true); + auto pos = data.begin() + idx; + std::rotate(data.begin() + target, pos, pos + 1); } }; test_strict_AND_sort_strategy(my_sort); diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index bc76856dac1..2868fbab49d 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -285,6 +285,50 @@ auto select_forced_strict_and_child_with_order(auto adapter, const auto &childre return std::make_tuple(best_idx, best_target, best_diff); } +auto select_forced_strict_and_child_with_destructive_order(auto adapter, const auto &children, size_t first) { + auto est_until = [&](size_t limit)noexcept{ + double my_est = 1.0; + for (size_t i = 0; i < limit; ++i) { + my_est *= adapter.estimate(children[i]); + } + return my_est; + }; + double est = est_until(first); + double cost = 0.0; + size_t best_idx = 0; + size_t best_target = 0; + double best_diff = std::numeric_limits::max(); + for (size_t idx = first; idx < children.size(); ++idx) { + auto child = FlowStats::from(adapter, children[idx]); + double child_abs_cost = est * child.cost; + double forced_cost = forced_strict_cost(child, est); + double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost); + size_t target = first; + while (target > 0) { + size_t candidate = target - 1; + auto other = FlowStats::from(adapter, children[candidate]); + if (other.estimate < child.estimate) { + // do not move past someone with lower estimate + break; + } + target = candidate; + my_diff += (0.2 * child.estimate - 0.2 * other.estimate); + if (candidate == 0) { + // the first iterator produces its own in-flow + my_diff += (0.2 * child.estimate - 0.2 * other.estimate); + } + } + if (my_diff < best_diff) { + best_diff = my_diff; + best_idx = idx; + best_target = target; + } + cost += child_abs_cost; + est *= child.estimate; + } + return std::make_tuple(best_idx, best_target, best_diff); +} + } // flow template -- cgit v1.2.3 From 2d5c6001950b472def902553528a1313f6418cac Mon Sep 17 00:00:00 2001 From: Håvard Pettersen Date: Mon, 8 Apr 2024 09:26:02 +0000 Subject: common code for force strict child selection --- searchlib/src/vespa/searchlib/queryeval/flow.h | 88 ++++++-------------------- 1 file changed, 18 insertions(+), 70 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index 2868fbab49d..b57e823f4d7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -214,30 +214,7 @@ size_t select_strict_and_child(auto adapter, const auto &children) { return best_idx; } -auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) { - double est = 1.0; - double cost = 0.0; - size_t best_idx = 0; - double best_diff = std::numeric_limits::max(); - for (size_t idx = 0; idx < first && idx < children.size(); ++idx) { - est *= adapter.estimate(children[idx]); - } - for (size_t idx = first; idx < children.size(); ++idx) { - auto child = FlowStats::from(adapter, children[idx]); - double child_abs_cost = est * child.cost; - double forced_cost = forced_strict_cost(child, est); - double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost); - if (my_diff < best_diff) { - best_diff = my_diff; - best_idx = idx; - } - cost += child_abs_cost; - est *= child.estimate; - } - return std::make_pair(best_idx, best_diff); -} - -auto select_forced_strict_and_child_with_order(auto adapter, const auto &children, size_t first) { +auto select_forced_strict_and_child_impl(auto adapter, const auto &children, size_t first, auto ordered, auto destructive) { auto est_until = [&](size_t limit)noexcept{ double my_est = 1.0; for (size_t i = 0; i < limit; ++i) { @@ -247,8 +224,8 @@ auto select_forced_strict_and_child_with_order(auto adapter, const auto &childre }; double est = est_until(first); double cost = 0.0; - size_t best_idx = 0; - size_t best_target = 0; + size_t best_idx = first; + size_t best_target = first; double best_diff = std::numeric_limits::max(); for (size_t idx = first; idx < children.size(); ++idx) { auto child = FlowStats::from(adapter, children[idx]); @@ -256,15 +233,17 @@ auto select_forced_strict_and_child_with_order(auto adapter, const auto &childre double forced_cost = forced_strict_cost(child, est); double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost); size_t target = first; - while (target > 0) { + while (ordered && target > 0) { size_t candidate = target - 1; auto other = FlowStats::from(adapter, children[candidate]); if (other.estimate < child.estimate) { // do not move past someone with lower estimate break; } - if (!should_force_strict(other, (est_until(candidate) * child.estimate))) { - // no not move past someone who will lose strictness + if (!destructive && !should_force_strict(other, (est_until(candidate) * child.estimate))) { + // no not move past someone who will lose strictness, + // unless it is explicitly allowed by enabling + // destructive (re-)ordering break; } target = candidate; @@ -285,48 +264,17 @@ auto select_forced_strict_and_child_with_order(auto adapter, const auto &childre return std::make_tuple(best_idx, best_target, best_diff); } +auto select_forced_strict_and_child(auto adapter, const auto &children, size_t first) { + auto [idx, target, diff] = select_forced_strict_and_child_impl(adapter, children, first, std::false_type{}, std::false_type{}); + return std::make_pair(idx, diff); +} + +auto select_forced_strict_and_child_with_order(auto adapter, const auto &children, size_t first) { + return select_forced_strict_and_child_impl(adapter, children, first, std::true_type{}, std::false_type{}); +} + auto select_forced_strict_and_child_with_destructive_order(auto adapter, const auto &children, size_t first) { - auto est_until = [&](size_t limit)noexcept{ - double my_est = 1.0; - for (size_t i = 0; i < limit; ++i) { - my_est *= adapter.estimate(children[i]); - } - return my_est; - }; - double est = est_until(first); - double cost = 0.0; - size_t best_idx = 0; - size_t best_target = 0; - double best_diff = std::numeric_limits::max(); - for (size_t idx = first; idx < children.size(); ++idx) { - auto child = FlowStats::from(adapter, children[idx]); - double child_abs_cost = est * child.cost; - double forced_cost = forced_strict_cost(child, est); - double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost); - size_t target = first; - while (target > 0) { - size_t candidate = target - 1; - auto other = FlowStats::from(adapter, children[candidate]); - if (other.estimate < child.estimate) { - // do not move past someone with lower estimate - break; - } - target = candidate; - my_diff += (0.2 * child.estimate - 0.2 * other.estimate); - if (candidate == 0) { - // the first iterator produces its own in-flow - my_diff += (0.2 * child.estimate - 0.2 * other.estimate); - } - } - if (my_diff < best_diff) { - best_diff = my_diff; - best_idx = idx; - best_target = target; - } - cost += child_abs_cost; - est *= child.estimate; - } - return std::make_tuple(best_idx, best_target, best_diff); + return select_forced_strict_and_child_impl(adapter, children, first, std::true_type{}, std::true_type{}); } } // flow -- cgit v1.2.3