diff options
author | Håvard Pettersen <havardpe@yahooinc.com> | 2024-02-12 12:18:05 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@yahooinc.com> | 2024-02-12 14:39:38 +0000 |
commit | 64990b4bb6fd3cba16275f431a24212fe4c1abf7 (patch) | |
tree | 1e10acb43747155a937355e4af179c9a7ed96af2 /searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp | |
parent | 05a7aa2760f8f14dfd2128eef6e8c6ab2ee5b8e9 (diff) |
account for heap cost in strict OR
- make it easier to do flow analysis directly on FlowStats
- use FlowStats in tests (less custom test code)
- added flow_tuning.h for special sauce
- strict flow must now have in-flow of 1.0
- split low-level flow tests into full/partial
Diffstat (limited to 'searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp')
-rw-r--r-- | searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp | 166 |
1 files changed, 84 insertions, 82 deletions
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index ec3e0066134..7a3950dbf1c 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -9,42 +9,20 @@ constexpr size_t loop_cnt = 64; using namespace search::queryeval; -struct ItemAdapter { - double estimate(const auto &child) const noexcept { return child.rel_est; } - double cost(const auto &child) const noexcept { return child.cost; } - double strict_cost(const auto &child) const noexcept { return child.strict_cost; } -}; - -struct Item { - double rel_est; - double cost; - double strict_cost; - Item(double rel_est_in, double cost_in, double strict_cost_in) noexcept - : rel_est(rel_est_in), cost(cost_in), strict_cost(strict_cost_in) {} - template <typename FLOW> static double estimate_of(std::vector<Item> &data) { - return FLOW::estimate_of(ItemAdapter(), data); - } - template <typename FLOW> static void sort(std::vector<Item> &data, bool strict) { - FLOW::sort(ItemAdapter(), data, strict); - } - template <typename FLOW> static double cost_of(const std::vector<Item> &data, bool strict) { - return FLOW::cost_of(ItemAdapter(), data, strict); - } - template <typename FLOW> static double ordered_cost_of(const std::vector<Item> &data, bool strict) { - return flow::ordered_cost_of(ItemAdapter(), data, FLOW(1.0, strict)); - } - auto operator <=>(const Item &rhs) const noexcept = default; -}; +template <typename FLOW> +double ordered_cost_of(const std::vector<FlowStats> &data, bool strict) { + return flow::ordered_cost_of(flow::DirectAdapter(), data, FLOW(strict)); +} -std::vector<Item> gen_data(size_t size) { +std::vector<FlowStats> gen_data(size_t size) { static std::mt19937 gen; - static std::uniform_real_distribution<double> rel_est(0.1, 0.9); + static std::uniform_real_distribution<double> estimate(0.1, 0.9); static std::uniform_real_distribution<double> cost(1.0, 10.0); static std::uniform_real_distribution<double> strict_cost(0.1, 5.0); - std::vector<Item> result; + std::vector<FlowStats> result; result.reserve(size); for (size_t i = 0; i < size; ++i) { - result.emplace_back(rel_est(gen), cost(gen), strict_cost(gen)); + result.emplace_back(estimate(gen), cost(gen), strict_cost(gen)); } return result; } @@ -84,7 +62,7 @@ TEST(FlowTest, perm_test) { template <template <typename> typename ORDER> void verify_ordering_is_strict_weak() { - auto cmp = ORDER(ItemAdapter()); + auto cmp = ORDER(flow::DirectAdapter()); auto input = gen_data(7); input.emplace_back(0.5, 1.5, 0.5); input.emplace_back(0.5, 1.5, 0.5); @@ -97,13 +75,13 @@ void verify_ordering_is_strict_weak() { input.emplace_back(0.5, 1.5, 0.0); input.emplace_back(0.0, 0.0, 0.0); input.emplace_back(0.0, 0.0, 0.0); - std::vector<Item> output; - for (const Item &in: input) { + std::vector<FlowStats> output; + for (const FlowStats &in: input) { EXPECT_FALSE(cmp(in, in)); // Irreflexivity size_t out_idx = 0; bool lower = false; bool upper = false; - for (const Item &out: output) { + for (const FlowStats &out: output) { if (cmp(out, in)) { EXPECT_FALSE(cmp(in, out)); // Antisymmetry EXPECT_FALSE(lower); // Transitivity @@ -148,66 +126,90 @@ void verify_flow(auto flow, const std::vector<double> &est_list, const std::vect } } -TEST(FlowTest, basic_and_flow) { +TEST(FlowTest, full_and_flow) { + for (bool strict: {false, true}) { + verify_flow(AndFlow(strict), {0.4, 0.7, 0.2}, + {{1.0, 0.0, strict}, + {0.4, 0.4, false}, + {0.4*0.7, 0.4*0.7, false}, + {0.4*0.7*0.2, 0.4*0.7*0.2, false}}); + } +} + +TEST(FlowTest, partial_and_flow) { for (double in: {1.0, 0.5, 0.25}) { - for (bool strict: {false, true}) { - verify_flow(AndFlow(in, strict), {0.4, 0.7, 0.2}, - {{in, 0.0, strict}, - {in*0.4, in*0.4, false}, - {in*0.4*0.7, in*0.4*0.7, false}, - {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}}); - } + verify_flow(AndFlow(in), {0.4, 0.7, 0.2}, + {{in, 0.0, false}, + {in*0.4, in*0.4, false}, + {in*0.4*0.7, in*0.4*0.7, false}, + {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}}); } } -TEST(FlowTest, basic_or_flow) { +TEST(FlowTest, full_or_flow) { + verify_flow(OrFlow(false), {0.4, 0.7, 0.2}, + {{1.0, 0.0, false}, + {0.6, 1.0-0.6, false}, + {0.6*0.3, 1.0-0.6*0.3, false}, + {0.6*0.3*0.8, 1.0-0.6*0.3*0.8, false}}); + verify_flow(OrFlow(true), {0.4, 0.7, 0.2}, + {{1.0, 0.0, true}, + {1.0, 1.0-0.6, true}, + {1.0, 1.0-0.6*0.3, true}, + {1.0, 1.0-0.6*0.3*0.8, true}}); +} + +TEST(FlowTest, partial_or_flow) { for (double in: {1.0, 0.5, 0.25}) { - verify_flow(OrFlow(in, false), {0.4, 0.7, 0.2}, + verify_flow(OrFlow(in), {0.4, 0.7, 0.2}, {{in, 0.0, false}, {in*0.6, 1.0-in*0.6, false}, {in*0.6*0.3, 1.0-in*0.6*0.3, false}, {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}}); - verify_flow(OrFlow(in, true), {0.4, 0.7, 0.2}, - {{in, 0.0, true}, - {in, 1.0-in*0.6, true}, - {in, 1.0-in*0.6*0.3, true}, - {in, 1.0-in*0.6*0.3*0.8, true}}); } } -TEST(FlowTest, basic_and_not_flow) { +TEST(FlowTest, full_and_not_flow) { + for (bool strict: {false, true}) { + verify_flow(AndNotFlow(strict), {0.4, 0.7, 0.2}, + {{1.0, 0.0, strict}, + {0.4, 0.4, false}, + {0.4*0.3, 0.4*0.3, false}, + {0.4*0.3*0.8, 0.4*0.3*0.8, false}}); + } +} + +TEST(FlowTest, partial_and_not_flow) { for (double in: {1.0, 0.5, 0.25}) { - for (bool strict: {false, true}) { - verify_flow(AndNotFlow(in, strict), {0.4, 0.7, 0.2}, - {{in, 0.0, strict}, - {in*0.4, in*0.4, false}, - {in*0.4*0.3, in*0.4*0.3, false}, - {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}}); - } + verify_flow(AndNotFlow(in), {0.4, 0.7, 0.2}, + {{in, 0.0, false}, + {in*0.4, in*0.4, false}, + {in*0.4*0.3, in*0.4*0.3, false}, + {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}}); } } TEST(FlowTest, flow_cost) { - std::vector<Item> data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}}; - EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3); - EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3); - EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3); - EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.5 + 0.4); - EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3); - EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3); + std::vector<FlowStats> data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}}; + EXPECT_DOUBLE_EQ(ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3); + EXPECT_DOUBLE_EQ(ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3); + EXPECT_DOUBLE_EQ(ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3); + EXPECT_DOUBLE_EQ(ordered_cost_of<OrFlow>(data, true), 0.6 + 0.5 + 0.4); + EXPECT_DOUBLE_EQ(ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3); + EXPECT_DOUBLE_EQ(ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3); } TEST(FlowTest, optimal_and_flow) { for (size_t i = 0; i < loop_cnt; ++i) { for (bool strict: {false, true}) { auto data = gen_data(7); - double ref_est = Item::estimate_of<AndFlow>(data); - double min_cost = Item::cost_of<AndFlow>(data, strict); + double ref_est = AndFlow::estimate_of(data); + double min_cost = AndFlow::cost_of(data, strict); double max_cost = 0.0; - Item::sort<AndFlow>(data, strict); - EXPECT_EQ(Item::ordered_cost_of<AndFlow>(data, strict), min_cost); - auto check = [&](const std::vector<Item> &my_data) noexcept { - double my_cost = Item::ordered_cost_of<AndFlow>(my_data, strict); + AndFlow::sort(data, strict); + EXPECT_EQ(ordered_cost_of<AndFlow>(data, strict), min_cost); + auto check = [&](const std::vector<FlowStats> &my_data) noexcept { + double my_cost = ordered_cost_of<AndFlow>(my_data, strict); EXPECT_LE(min_cost, my_cost); max_cost = std::max(max_cost, my_cost); }; @@ -216,7 +218,7 @@ TEST(FlowTest, optimal_and_flow) { fprintf(stderr, " AND cost(%zu,%s): min: %g, max: %g, factor: %g\n", i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost); } - EXPECT_NEAR(ref_est, Item::estimate_of<AndFlow>(data), 1e-9); + EXPECT_NEAR(ref_est, AndFlow::estimate_of(data), 1e-9); } } } @@ -225,12 +227,12 @@ TEST(FlowTest, optimal_or_flow) { for (size_t i = 0; i < loop_cnt; ++i) { for (bool strict: {false, true}) { auto data = gen_data(7); - double min_cost = Item::cost_of<OrFlow>(data, strict); + double min_cost = OrFlow::cost_of(data, strict); double max_cost = 0.0; - Item::sort<OrFlow>(data, strict); - EXPECT_EQ(Item::ordered_cost_of<OrFlow>(data, strict), min_cost); - auto check = [&](const std::vector<Item> &my_data) noexcept { - double my_cost = Item::ordered_cost_of<OrFlow>(my_data, strict); + OrFlow::sort(data, strict); + EXPECT_EQ(ordered_cost_of<OrFlow>(data, strict), min_cost); + auto check = [&](const std::vector<FlowStats> &my_data) noexcept { + double my_cost = ordered_cost_of<OrFlow>(my_data, strict); EXPECT_LE(min_cost, my_cost + 1e-9); max_cost = std::max(max_cost, my_cost); }; @@ -247,15 +249,15 @@ TEST(FlowTest, optimal_and_not_flow) { for (size_t i = 0; i < loop_cnt; ++i) { for (bool strict: {false, true}) { auto data = gen_data(7); - Item first = data[0]; - double min_cost = Item::cost_of<AndNotFlow>(data, strict); + FlowStats first = data[0]; + double min_cost = AndNotFlow::cost_of(data, strict); double max_cost = 0.0; - Item::sort<AndNotFlow>(data, strict); + AndNotFlow::sort(data, strict); EXPECT_EQ(data[0], first); - EXPECT_EQ(Item::ordered_cost_of<AndNotFlow>(data, strict), min_cost); - auto check = [&](const std::vector<Item> &my_data) noexcept { + EXPECT_EQ(ordered_cost_of<AndNotFlow>(data, strict), min_cost); + auto check = [&](const std::vector<FlowStats> &my_data) noexcept { if (my_data[0] == first) { - double my_cost = Item::ordered_cost_of<AndNotFlow>(my_data, strict); + double my_cost = ordered_cost_of<AndNotFlow>(my_data, strict); EXPECT_LE(min_cost, my_cost); max_cost = std::max(max_cost, my_cost); } |