summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp')
-rw-r--r--searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp256
1 files changed, 206 insertions, 50 deletions
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index ceda30f169a..9a9adeac2bc 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -5,44 +5,46 @@
#include <vector>
#include <random>
-using search::queryeval::AndFlow;
-using search::queryeval::OrFlow;
+constexpr size_t loop_cnt = 64;
+
+using namespace search::queryeval;
+
+struct ItemAdapter {
+ double estimate(const auto &child) const noexcept { return child.rel_est; }
+ double cost(const auto &child) const noexcept { return child.cost; }
+ double strict_cost(const auto &child) const noexcept { return child.strict_cost; }
+};
struct Item {
double rel_est;
double cost;
- Item(double rel_est_in, double cost_in) noexcept
- : rel_est(rel_est_in), cost(cost_in) {}
- static void sort_for_and(std::vector<Item> &data) {
- std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept {
- return (1.0 - a.rel_est) / a.cost > (1.0 - b.rel_est) / b.cost;
- });
+ double strict_cost;
+ Item(double rel_est_in, double cost_in, double strict_cost_in) noexcept
+ : rel_est(rel_est_in), cost(cost_in), strict_cost(strict_cost_in) {}
+ template <typename FLOW> static double estimate_of(std::vector<Item> &data) {
+ return FLOW::estimate_of(ItemAdapter(), data);
}
- static void sort_for_or(std::vector<Item> &data) {
- std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept {
- return a.rel_est / a.cost > b.rel_est / b.cost;
- });
+ template <typename FLOW> static void sort(std::vector<Item> &data, bool strict) {
+ FLOW::sort(ItemAdapter(), data, strict);
}
- static double cost_of(const std::vector<Item> &data, auto flow) {
- double cost = 0.0;
- for (const Item &item: data) {
- cost += flow.flow() * item.cost;
- flow.add(item.rel_est);
- }
- return cost;
+ template <typename FLOW> static double cost_of(const std::vector<Item> &data, bool strict) {
+ return FLOW::cost_of(ItemAdapter(), data, strict);
+ }
+ template <typename FLOW> static double ordered_cost_of(const std::vector<Item> &data, bool strict) {
+ return flow::ordered_cost_of(ItemAdapter(), data, FLOW(1.0, strict));
}
- static double cost_of_and(const std::vector<Item> &data) { return cost_of(data, AndFlow()); }
- static double cost_of_or(const std::vector<Item> &data) { return cost_of(data, OrFlow()); }
+ auto operator <=>(const Item &rhs) const noexcept = default;
};
std::vector<Item> gen_data(size_t size) {
static std::mt19937 gen;
- static std::uniform_real_distribution<double> rel_est(0.1, 0.9);
- static std::uniform_real_distribution<double> cost(1.0, 10.0);
+ static std::uniform_real_distribution<double> rel_est(0.1, 0.9);
+ static std::uniform_real_distribution<double> cost(1.0, 10.0);
+ static std::uniform_real_distribution<double> strict_cost(0.1, 5.0);
std::vector<Item> result;
result.reserve(size);
for (size_t i = 0; i < size; ++i) {
- result.emplace_back(rel_est(gen), cost(gen));
+ result.emplace_back(rel_est(gen), cost(gen), strict_cost(gen));
}
return result;
}
@@ -80,37 +82,191 @@ TEST(FlowTest, perm_test) {
EXPECT_EQ(seen.size(), 120);
}
+template <template <typename> typename ORDER>
+void verify_ordering_is_strict_weak() {
+ auto cmp = ORDER(ItemAdapter());
+ auto input = gen_data(7);
+ input.emplace_back(0.5, 1.5, 0.5);
+ input.emplace_back(0.5, 1.5, 0.5);
+ input.emplace_back(0.5, 1.5, 0.5);
+ input.emplace_back(0.0, 1.5, 0.5);
+ input.emplace_back(0.0, 1.5, 0.5);
+ input.emplace_back(0.5, 0.0, 0.5);
+ input.emplace_back(0.5, 0.0, 0.5);
+ input.emplace_back(0.5, 1.5, 0.0);
+ input.emplace_back(0.5, 1.5, 0.0);
+ input.emplace_back(0.0, 0.0, 0.0);
+ input.emplace_back(0.0, 0.0, 0.0);
+ std::vector<Item> output;
+ for (const Item &in: input) {
+ EXPECT_FALSE(cmp(in, in)); // Irreflexivity
+ size_t out_idx = 0;
+ bool lower = false;
+ bool upper = false;
+ for (const Item &out: output) {
+ if (cmp(out, in)) {
+ EXPECT_FALSE(cmp(in, out)); // Antisymmetry
+ EXPECT_FALSE(lower); // Transitivity
+ EXPECT_FALSE(upper); // Transitivity
+ ++out_idx;
+ } else {
+ lower = true;
+ if (cmp(in, out)) {
+ upper = true;
+ } else {
+ EXPECT_FALSE(upper); // Transitivity
+ }
+ }
+ }
+ output.insert(output.begin() + out_idx, in);
+ }
+}
+
+TEST(FlowTest, and_ordering_is_strict_weak) {
+ verify_ordering_is_strict_weak<flow::MinAndCost>();
+}
+
+TEST(FlowTest, or_ordering_is_strict_weak) {
+ verify_ordering_is_strict_weak<flow::MinOrCost>();
+}
+
+TEST(FlowTest, strict_or_ordering_is_strict_weak) {
+ verify_ordering_is_strict_weak<flow::MinOrStrictCost>();
+}
+
+struct ExpectFlow {
+ double flow;
+ double est;
+ bool strict;
+};
+
+void verify_flow(auto flow, const std::vector<double> &est_list, const std::vector<ExpectFlow> &expect) {
+ ASSERT_EQ(est_list.size() + 1, expect.size());
+ for (size_t i = 0; i < expect.size(); ++i) {
+ EXPECT_DOUBLE_EQ(flow.flow(), expect[i].flow);
+ EXPECT_DOUBLE_EQ(flow.estimate(), expect[i].est);
+ EXPECT_EQ(flow.strict(), expect[i].strict);
+ if (i < est_list.size()) {
+ flow.add(est_list[i]);
+ }
+ }
+}
+
+TEST(FlowTest, basic_and_flow) {
+ for (double in: {1.0, 0.5, 0.25}) {
+ for (bool strict: {false, true}) {
+ verify_flow(AndFlow(in, strict), {0.4, 0.7, 0.2},
+ {{in, 0.0, strict},
+ {in*0.4, in*0.4, false},
+ {in*0.4*0.7, in*0.4*0.7, false},
+ {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}});
+ }
+ }
+}
+
+TEST(FlowTest, basic_or_flow) {
+ for (double in: {1.0, 0.5, 0.25}) {
+ for (bool strict: {false, true}) {
+ verify_flow(OrFlow(in, strict), {0.4, 0.7, 0.2},
+ {{in, 0.0, strict},
+ {in*0.6, 1.0-in*0.6, strict},
+ {in*0.6*0.3, 1.0-in*0.6*0.3, strict},
+ {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, strict}});
+ }
+ }
+}
+
+TEST(FlowTest, basic_and_not_flow) {
+ for (double in: {1.0, 0.5, 0.25}) {
+ for (bool strict: {false, true}) {
+ verify_flow(AndNotFlow(in, strict), {0.4, 0.7, 0.2},
+ {{in, 0.0, strict},
+ {in*0.4, in*0.4, false},
+ {in*0.4*0.3, in*0.4*0.3, false},
+ {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}});
+ }
+ }
+}
+
+TEST(FlowTest, flow_cost) {
+ std::vector<Item> data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}};
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.6*0.5 + 0.6*0.3*0.4);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3);
+}
+
TEST(FlowTest, optimal_and_flow) {
- for (size_t i = 0; i < 256; ++i) {
- auto data = gen_data(7);
- Item::sort_for_and(data);
- double min_cost = Item::cost_of_and(data);
- double max_cost = 0.0;
- auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept {
- double my_cost = Item::cost_of_and(my_data);
- EXPECT_LE(min_cost, my_cost);
- max_cost = std::max(max_cost, my_cost);
- };
- each_perm(data, check);
- fprintf(stderr, " and cost(%zu): min: %g, max: %g, factor: %g\n",
- i, min_cost, max_cost, max_cost / min_cost);
+ for (size_t i = 0; i < loop_cnt; ++i) {
+ for (bool strict: {false, true}) {
+ auto data = gen_data(7);
+ double ref_est = Item::estimate_of<AndFlow>(data);
+ double min_cost = Item::cost_of<AndFlow>(data, strict);
+ double max_cost = 0.0;
+ Item::sort<AndFlow>(data, strict);
+ EXPECT_EQ(Item::ordered_cost_of<AndFlow>(data, strict), min_cost);
+ auto check = [&](const std::vector<Item> &my_data) noexcept {
+ double my_cost = Item::ordered_cost_of<AndFlow>(my_data, strict);
+ EXPECT_LE(min_cost, my_cost);
+ max_cost = std::max(max_cost, my_cost);
+ };
+ each_perm(data, check);
+ if (loop_cnt < 1024 || i % 1024 == 0) {
+ fprintf(stderr, " AND cost(%zu,%s): min: %g, max: %g, factor: %g\n",
+ i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost);
+ }
+ EXPECT_NEAR(ref_est, Item::estimate_of<AndFlow>(data), 1e-9);
+ }
}
}
TEST(FlowTest, optimal_or_flow) {
- for (size_t i = 0; i < 256; ++i) {
- auto data = gen_data(7);
- Item::sort_for_or(data);
- double min_cost = Item::cost_of_or(data);
- double max_cost = 0.0;
- auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept {
- double my_cost = Item::cost_of_or(my_data);
- EXPECT_LE(min_cost, my_cost);
- max_cost = std::max(max_cost, my_cost);
- };
- each_perm(data, check);
- fprintf(stderr, " or cost(%zu): min: %g, max: %g, factor: %g\n",
- i, min_cost, max_cost, max_cost / min_cost);
+ for (size_t i = 0; i < loop_cnt; ++i) {
+ for (bool strict: {false, true}) {
+ auto data = gen_data(7);
+ double min_cost = Item::cost_of<OrFlow>(data, strict);
+ double max_cost = 0.0;
+ Item::sort<OrFlow>(data, strict);
+ EXPECT_EQ(Item::ordered_cost_of<OrFlow>(data, strict), min_cost);
+ auto check = [&](const std::vector<Item> &my_data) noexcept {
+ double my_cost = Item::ordered_cost_of<OrFlow>(my_data, strict);
+ EXPECT_LE(min_cost, my_cost);
+ max_cost = std::max(max_cost, my_cost);
+ };
+ each_perm(data, check);
+ if (loop_cnt < 1024 || i % 1024 == 0) {
+ fprintf(stderr, " OR cost(%zu,%s): min: %g, max: %g, factor: %g\n",
+ i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost);
+ }
+ }
+ }
+}
+
+TEST(FlowTest, optimal_and_not_flow) {
+ for (size_t i = 0; i < loop_cnt; ++i) {
+ for (bool strict: {false, true}) {
+ auto data = gen_data(7);
+ Item first = data[0];
+ double min_cost = Item::cost_of<AndNotFlow>(data, strict);
+ double max_cost = 0.0;
+ Item::sort<AndNotFlow>(data, strict);
+ EXPECT_EQ(data[0], first);
+ EXPECT_EQ(Item::ordered_cost_of<AndNotFlow>(data, strict), min_cost);
+ auto check = [&](const std::vector<Item> &my_data) noexcept {
+ if (my_data[0] == first) {
+ double my_cost = Item::ordered_cost_of<AndNotFlow>(my_data, strict);
+ EXPECT_LE(min_cost, my_cost);
+ max_cost = std::max(max_cost, my_cost);
+ }
+ };
+ each_perm(data, check);
+ if (loop_cnt < 1024 || i % 1024 == 0) {
+ fprintf(stderr, " ANDNOT cost(%zu,%s): min: %g, max: %g, factor: %g\n",
+ i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost);
+ }
+ }
}
}