diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-16 00:49:18 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-16 00:49:18 +0100 |
commit | 7ff3cdf2d652cc5b4a5ee04f4c1218647157b988 (patch) | |
tree | ff03601d3c1a2eba8fb0820ac1a6a98a3338c78e /searchlib/src/tests | |
parent | ccd310972b94deb49c1d7e3ffc8dc06e4a282014 (diff) | |
parent | 7a8871c3b9259d05e5cf606e943d69e0e7a160fe (diff) |
Merge pull request #29915 from vespa-engine/havardpe/strict-flowv8.287.20
take strictness into account for flow/cost/sorting
Diffstat (limited to 'searchlib/src/tests')
3 files changed, 209 insertions, 53 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index f800e124bdc..bbd2744119a 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -24,10 +24,10 @@ class MyOr : public IntermediateBlueprint private: public: double calculate_cost() const final { - return cost_of(get_children(), OrFlow()); + return OrFlow::cost_of(get_children()); } double calculate_relative_estimate() const final { - return estimate_of(get_children(), OrFlow()); + return OrFlow::estimate_of(get_children()); } HitEstimate combine(const std::vector<HitEstimate> &data) const override { return max(data); diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index ab1c004c721..856ac2391f8 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -1380,7 +1380,7 @@ TEST("cost for ONEAR") { } TEST("cost for WEAKAND") { - verify_cost(make::WEAKAND(1000), calc_cost({{1.1, 0.8},{1.2, 0.7},{1.3, 0.5}})); + verify_cost(make::WEAKAND(1000), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}})); } TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index ceda30f169a..9a9adeac2bc 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -5,44 +5,46 @@ #include <vector> #include <random> -using search::queryeval::AndFlow; -using search::queryeval::OrFlow; +constexpr size_t loop_cnt = 64; + +using namespace search::queryeval; + +struct ItemAdapter { + double estimate(const auto &child) const noexcept { return child.rel_est; } + double cost(const auto &child) const noexcept { return child.cost; } + double strict_cost(const auto &child) const noexcept { return child.strict_cost; } +}; struct Item { double rel_est; double cost; - Item(double rel_est_in, double cost_in) noexcept - : rel_est(rel_est_in), cost(cost_in) {} - static void sort_for_and(std::vector<Item> &data) { - std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept { - return (1.0 - a.rel_est) / a.cost > (1.0 - b.rel_est) / b.cost; - }); + double strict_cost; + Item(double rel_est_in, double cost_in, double strict_cost_in) noexcept + : rel_est(rel_est_in), cost(cost_in), strict_cost(strict_cost_in) {} + template <typename FLOW> static double estimate_of(std::vector<Item> &data) { + return FLOW::estimate_of(ItemAdapter(), data); } - static void sort_for_or(std::vector<Item> &data) { - std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept { - return a.rel_est / a.cost > b.rel_est / b.cost; - }); + template <typename FLOW> static void sort(std::vector<Item> &data, bool strict) { + FLOW::sort(ItemAdapter(), data, strict); } - static double cost_of(const std::vector<Item> &data, auto flow) { - double cost = 0.0; - for (const Item &item: data) { - cost += flow.flow() * item.cost; - flow.add(item.rel_est); - } - return cost; + template <typename FLOW> static double cost_of(const std::vector<Item> &data, bool strict) { + return FLOW::cost_of(ItemAdapter(), data, strict); + } + template <typename FLOW> static double ordered_cost_of(const std::vector<Item> &data, bool strict) { + return flow::ordered_cost_of(ItemAdapter(), data, FLOW(1.0, strict)); } - static double cost_of_and(const std::vector<Item> &data) { return cost_of(data, AndFlow()); } - static double cost_of_or(const std::vector<Item> &data) { return cost_of(data, OrFlow()); } + auto operator <=>(const Item &rhs) const noexcept = default; }; std::vector<Item> gen_data(size_t size) { static std::mt19937 gen; - static std::uniform_real_distribution<double> rel_est(0.1, 0.9); - static std::uniform_real_distribution<double> cost(1.0, 10.0); + static std::uniform_real_distribution<double> rel_est(0.1, 0.9); + static std::uniform_real_distribution<double> cost(1.0, 10.0); + static std::uniform_real_distribution<double> strict_cost(0.1, 5.0); std::vector<Item> result; result.reserve(size); for (size_t i = 0; i < size; ++i) { - result.emplace_back(rel_est(gen), cost(gen)); + result.emplace_back(rel_est(gen), cost(gen), strict_cost(gen)); } return result; } @@ -80,37 +82,191 @@ TEST(FlowTest, perm_test) { EXPECT_EQ(seen.size(), 120); } +template <template <typename> typename ORDER> +void verify_ordering_is_strict_weak() { + auto cmp = ORDER(ItemAdapter()); + auto input = gen_data(7); + input.emplace_back(0.5, 1.5, 0.5); + input.emplace_back(0.5, 1.5, 0.5); + input.emplace_back(0.5, 1.5, 0.5); + input.emplace_back(0.0, 1.5, 0.5); + input.emplace_back(0.0, 1.5, 0.5); + input.emplace_back(0.5, 0.0, 0.5); + input.emplace_back(0.5, 0.0, 0.5); + input.emplace_back(0.5, 1.5, 0.0); + input.emplace_back(0.5, 1.5, 0.0); + input.emplace_back(0.0, 0.0, 0.0); + input.emplace_back(0.0, 0.0, 0.0); + std::vector<Item> output; + for (const Item &in: input) { + EXPECT_FALSE(cmp(in, in)); // Irreflexivity + size_t out_idx = 0; + bool lower = false; + bool upper = false; + for (const Item &out: output) { + if (cmp(out, in)) { + EXPECT_FALSE(cmp(in, out)); // Antisymmetry + EXPECT_FALSE(lower); // Transitivity + EXPECT_FALSE(upper); // Transitivity + ++out_idx; + } else { + lower = true; + if (cmp(in, out)) { + upper = true; + } else { + EXPECT_FALSE(upper); // Transitivity + } + } + } + output.insert(output.begin() + out_idx, in); + } +} + +TEST(FlowTest, and_ordering_is_strict_weak) { + verify_ordering_is_strict_weak<flow::MinAndCost>(); +} + +TEST(FlowTest, or_ordering_is_strict_weak) { + verify_ordering_is_strict_weak<flow::MinOrCost>(); +} + +TEST(FlowTest, strict_or_ordering_is_strict_weak) { + verify_ordering_is_strict_weak<flow::MinOrStrictCost>(); +} + +struct ExpectFlow { + double flow; + double est; + bool strict; +}; + +void verify_flow(auto flow, const std::vector<double> &est_list, const std::vector<ExpectFlow> &expect) { + ASSERT_EQ(est_list.size() + 1, expect.size()); + for (size_t i = 0; i < expect.size(); ++i) { + EXPECT_DOUBLE_EQ(flow.flow(), expect[i].flow); + EXPECT_DOUBLE_EQ(flow.estimate(), expect[i].est); + EXPECT_EQ(flow.strict(), expect[i].strict); + if (i < est_list.size()) { + flow.add(est_list[i]); + } + } +} + +TEST(FlowTest, basic_and_flow) { + for (double in: {1.0, 0.5, 0.25}) { + for (bool strict: {false, true}) { + verify_flow(AndFlow(in, strict), {0.4, 0.7, 0.2}, + {{in, 0.0, strict}, + {in*0.4, in*0.4, false}, + {in*0.4*0.7, in*0.4*0.7, false}, + {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}}); + } + } +} + +TEST(FlowTest, basic_or_flow) { + for (double in: {1.0, 0.5, 0.25}) { + for (bool strict: {false, true}) { + verify_flow(OrFlow(in, strict), {0.4, 0.7, 0.2}, + {{in, 0.0, strict}, + {in*0.6, 1.0-in*0.6, strict}, + {in*0.6*0.3, 1.0-in*0.6*0.3, strict}, + {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, strict}}); + } + } +} + +TEST(FlowTest, basic_and_not_flow) { + for (double in: {1.0, 0.5, 0.25}) { + for (bool strict: {false, true}) { + verify_flow(AndNotFlow(in, strict), {0.4, 0.7, 0.2}, + {{in, 0.0, strict}, + {in*0.4, in*0.4, false}, + {in*0.4*0.3, in*0.4*0.3, false}, + {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}}); + } + } +} + +TEST(FlowTest, flow_cost) { + std::vector<Item> data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}}; + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.6*0.5 + 0.6*0.3*0.4); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3); +} + TEST(FlowTest, optimal_and_flow) { - for (size_t i = 0; i < 256; ++i) { - auto data = gen_data(7); - Item::sort_for_and(data); - double min_cost = Item::cost_of_and(data); - double max_cost = 0.0; - auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept { - double my_cost = Item::cost_of_and(my_data); - EXPECT_LE(min_cost, my_cost); - max_cost = std::max(max_cost, my_cost); - }; - each_perm(data, check); - fprintf(stderr, " and cost(%zu): min: %g, max: %g, factor: %g\n", - i, min_cost, max_cost, max_cost / min_cost); + for (size_t i = 0; i < loop_cnt; ++i) { + for (bool strict: {false, true}) { + auto data = gen_data(7); + double ref_est = Item::estimate_of<AndFlow>(data); + double min_cost = Item::cost_of<AndFlow>(data, strict); + double max_cost = 0.0; + Item::sort<AndFlow>(data, strict); + EXPECT_EQ(Item::ordered_cost_of<AndFlow>(data, strict), min_cost); + auto check = [&](const std::vector<Item> &my_data) noexcept { + double my_cost = Item::ordered_cost_of<AndFlow>(my_data, strict); + EXPECT_LE(min_cost, my_cost); + max_cost = std::max(max_cost, my_cost); + }; + each_perm(data, check); + if (loop_cnt < 1024 || i % 1024 == 0) { + fprintf(stderr, " AND cost(%zu,%s): min: %g, max: %g, factor: %g\n", + i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost); + } + EXPECT_NEAR(ref_est, Item::estimate_of<AndFlow>(data), 1e-9); + } } } TEST(FlowTest, optimal_or_flow) { - for (size_t i = 0; i < 256; ++i) { - auto data = gen_data(7); - Item::sort_for_or(data); - double min_cost = Item::cost_of_or(data); - double max_cost = 0.0; - auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept { - double my_cost = Item::cost_of_or(my_data); - EXPECT_LE(min_cost, my_cost); - max_cost = std::max(max_cost, my_cost); - }; - each_perm(data, check); - fprintf(stderr, " or cost(%zu): min: %g, max: %g, factor: %g\n", - i, min_cost, max_cost, max_cost / min_cost); + for (size_t i = 0; i < loop_cnt; ++i) { + for (bool strict: {false, true}) { + auto data = gen_data(7); + double min_cost = Item::cost_of<OrFlow>(data, strict); + double max_cost = 0.0; + Item::sort<OrFlow>(data, strict); + EXPECT_EQ(Item::ordered_cost_of<OrFlow>(data, strict), min_cost); + auto check = [&](const std::vector<Item> &my_data) noexcept { + double my_cost = Item::ordered_cost_of<OrFlow>(my_data, strict); + EXPECT_LE(min_cost, my_cost); + max_cost = std::max(max_cost, my_cost); + }; + each_perm(data, check); + if (loop_cnt < 1024 || i % 1024 == 0) { + fprintf(stderr, " OR cost(%zu,%s): min: %g, max: %g, factor: %g\n", + i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost); + } + } + } +} + +TEST(FlowTest, optimal_and_not_flow) { + for (size_t i = 0; i < loop_cnt; ++i) { + for (bool strict: {false, true}) { + auto data = gen_data(7); + Item first = data[0]; + double min_cost = Item::cost_of<AndNotFlow>(data, strict); + double max_cost = 0.0; + Item::sort<AndNotFlow>(data, strict); + EXPECT_EQ(data[0], first); + EXPECT_EQ(Item::ordered_cost_of<AndNotFlow>(data, strict), min_cost); + auto check = [&](const std::vector<Item> &my_data) noexcept { + if (my_data[0] == first) { + double my_cost = Item::ordered_cost_of<AndNotFlow>(my_data, strict); + EXPECT_LE(min_cost, my_cost); + max_cost = std::max(max_cost, my_cost); + } + }; + each_perm(data, check); + if (loop_cnt < 1024 || i % 1024 == 0) { + fprintf(stderr, " ANDNOT cost(%zu,%s): min: %g, max: %g, factor: %g\n", + i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost); + } + } } } |