aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2024-02-12 12:18:05 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2024-02-12 14:39:38 +0000
commit64990b4bb6fd3cba16275f431a24212fe4c1abf7 (patch)
tree1e10acb43747155a937355e4af179c9a7ed96af2 /searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
parent05a7aa2760f8f14dfd2128eef6e8c6ab2ee5b8e9 (diff)
account for heap cost in strict OR
- make it easier to do flow analysis directly on FlowStats - use FlowStats in tests (less custom test code) - added flow_tuning.h for special sauce - strict flow must now have in-flow of 1.0 - split low-level flow tests into full/partial
Diffstat (limited to 'searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp')
-rw-r--r--searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp166
1 files changed, 84 insertions, 82 deletions
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index ec3e0066134..7a3950dbf1c 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -9,42 +9,20 @@ constexpr size_t loop_cnt = 64;
using namespace search::queryeval;
-struct ItemAdapter {
- double estimate(const auto &child) const noexcept { return child.rel_est; }
- double cost(const auto &child) const noexcept { return child.cost; }
- double strict_cost(const auto &child) const noexcept { return child.strict_cost; }
-};
-
-struct Item {
- double rel_est;
- double cost;
- double strict_cost;
- Item(double rel_est_in, double cost_in, double strict_cost_in) noexcept
- : rel_est(rel_est_in), cost(cost_in), strict_cost(strict_cost_in) {}
- template <typename FLOW> static double estimate_of(std::vector<Item> &data) {
- return FLOW::estimate_of(ItemAdapter(), data);
- }
- template <typename FLOW> static void sort(std::vector<Item> &data, bool strict) {
- FLOW::sort(ItemAdapter(), data, strict);
- }
- template <typename FLOW> static double cost_of(const std::vector<Item> &data, bool strict) {
- return FLOW::cost_of(ItemAdapter(), data, strict);
- }
- template <typename FLOW> static double ordered_cost_of(const std::vector<Item> &data, bool strict) {
- return flow::ordered_cost_of(ItemAdapter(), data, FLOW(1.0, strict));
- }
- auto operator <=>(const Item &rhs) const noexcept = default;
-};
+template <typename FLOW>
+double ordered_cost_of(const std::vector<FlowStats> &data, bool strict) {
+ return flow::ordered_cost_of(flow::DirectAdapter(), data, FLOW(strict));
+}
-std::vector<Item> gen_data(size_t size) {
+std::vector<FlowStats> gen_data(size_t size) {
static std::mt19937 gen;
- static std::uniform_real_distribution<double> rel_est(0.1, 0.9);
+ static std::uniform_real_distribution<double> estimate(0.1, 0.9);
static std::uniform_real_distribution<double> cost(1.0, 10.0);
static std::uniform_real_distribution<double> strict_cost(0.1, 5.0);
- std::vector<Item> result;
+ std::vector<FlowStats> result;
result.reserve(size);
for (size_t i = 0; i < size; ++i) {
- result.emplace_back(rel_est(gen), cost(gen), strict_cost(gen));
+ result.emplace_back(estimate(gen), cost(gen), strict_cost(gen));
}
return result;
}
@@ -84,7 +62,7 @@ TEST(FlowTest, perm_test) {
template <template <typename> typename ORDER>
void verify_ordering_is_strict_weak() {
- auto cmp = ORDER(ItemAdapter());
+ auto cmp = ORDER(flow::DirectAdapter());
auto input = gen_data(7);
input.emplace_back(0.5, 1.5, 0.5);
input.emplace_back(0.5, 1.5, 0.5);
@@ -97,13 +75,13 @@ void verify_ordering_is_strict_weak() {
input.emplace_back(0.5, 1.5, 0.0);
input.emplace_back(0.0, 0.0, 0.0);
input.emplace_back(0.0, 0.0, 0.0);
- std::vector<Item> output;
- for (const Item &in: input) {
+ std::vector<FlowStats> output;
+ for (const FlowStats &in: input) {
EXPECT_FALSE(cmp(in, in)); // Irreflexivity
size_t out_idx = 0;
bool lower = false;
bool upper = false;
- for (const Item &out: output) {
+ for (const FlowStats &out: output) {
if (cmp(out, in)) {
EXPECT_FALSE(cmp(in, out)); // Antisymmetry
EXPECT_FALSE(lower); // Transitivity
@@ -148,66 +126,90 @@ void verify_flow(auto flow, const std::vector<double> &est_list, const std::vect
}
}
-TEST(FlowTest, basic_and_flow) {
+TEST(FlowTest, full_and_flow) {
+ for (bool strict: {false, true}) {
+ verify_flow(AndFlow(strict), {0.4, 0.7, 0.2},
+ {{1.0, 0.0, strict},
+ {0.4, 0.4, false},
+ {0.4*0.7, 0.4*0.7, false},
+ {0.4*0.7*0.2, 0.4*0.7*0.2, false}});
+ }
+}
+
+TEST(FlowTest, partial_and_flow) {
for (double in: {1.0, 0.5, 0.25}) {
- for (bool strict: {false, true}) {
- verify_flow(AndFlow(in, strict), {0.4, 0.7, 0.2},
- {{in, 0.0, strict},
- {in*0.4, in*0.4, false},
- {in*0.4*0.7, in*0.4*0.7, false},
- {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}});
- }
+ verify_flow(AndFlow(in), {0.4, 0.7, 0.2},
+ {{in, 0.0, false},
+ {in*0.4, in*0.4, false},
+ {in*0.4*0.7, in*0.4*0.7, false},
+ {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}});
}
}
-TEST(FlowTest, basic_or_flow) {
+TEST(FlowTest, full_or_flow) {
+ verify_flow(OrFlow(false), {0.4, 0.7, 0.2},
+ {{1.0, 0.0, false},
+ {0.6, 1.0-0.6, false},
+ {0.6*0.3, 1.0-0.6*0.3, false},
+ {0.6*0.3*0.8, 1.0-0.6*0.3*0.8, false}});
+ verify_flow(OrFlow(true), {0.4, 0.7, 0.2},
+ {{1.0, 0.0, true},
+ {1.0, 1.0-0.6, true},
+ {1.0, 1.0-0.6*0.3, true},
+ {1.0, 1.0-0.6*0.3*0.8, true}});
+}
+
+TEST(FlowTest, partial_or_flow) {
for (double in: {1.0, 0.5, 0.25}) {
- verify_flow(OrFlow(in, false), {0.4, 0.7, 0.2},
+ verify_flow(OrFlow(in), {0.4, 0.7, 0.2},
{{in, 0.0, false},
{in*0.6, 1.0-in*0.6, false},
{in*0.6*0.3, 1.0-in*0.6*0.3, false},
{in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}});
- verify_flow(OrFlow(in, true), {0.4, 0.7, 0.2},
- {{in, 0.0, true},
- {in, 1.0-in*0.6, true},
- {in, 1.0-in*0.6*0.3, true},
- {in, 1.0-in*0.6*0.3*0.8, true}});
}
}
-TEST(FlowTest, basic_and_not_flow) {
+TEST(FlowTest, full_and_not_flow) {
+ for (bool strict: {false, true}) {
+ verify_flow(AndNotFlow(strict), {0.4, 0.7, 0.2},
+ {{1.0, 0.0, strict},
+ {0.4, 0.4, false},
+ {0.4*0.3, 0.4*0.3, false},
+ {0.4*0.3*0.8, 0.4*0.3*0.8, false}});
+ }
+}
+
+TEST(FlowTest, partial_and_not_flow) {
for (double in: {1.0, 0.5, 0.25}) {
- for (bool strict: {false, true}) {
- verify_flow(AndNotFlow(in, strict), {0.4, 0.7, 0.2},
- {{in, 0.0, strict},
- {in*0.4, in*0.4, false},
- {in*0.4*0.3, in*0.4*0.3, false},
- {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}});
- }
+ verify_flow(AndNotFlow(in), {0.4, 0.7, 0.2},
+ {{in, 0.0, false},
+ {in*0.4, in*0.4, false},
+ {in*0.4*0.3, in*0.4*0.3, false},
+ {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}});
}
}
TEST(FlowTest, flow_cost) {
- std::vector<Item> data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}};
- EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3);
- EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3);
- EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3);
- EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.5 + 0.4);
- EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3);
- EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3);
+ std::vector<FlowStats> data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}};
+ EXPECT_DOUBLE_EQ(ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3);
+ EXPECT_DOUBLE_EQ(ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3);
+ EXPECT_DOUBLE_EQ(ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3);
+ EXPECT_DOUBLE_EQ(ordered_cost_of<OrFlow>(data, true), 0.6 + 0.5 + 0.4);
+ EXPECT_DOUBLE_EQ(ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3);
+ EXPECT_DOUBLE_EQ(ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3);
}
TEST(FlowTest, optimal_and_flow) {
for (size_t i = 0; i < loop_cnt; ++i) {
for (bool strict: {false, true}) {
auto data = gen_data(7);
- double ref_est = Item::estimate_of<AndFlow>(data);
- double min_cost = Item::cost_of<AndFlow>(data, strict);
+ double ref_est = AndFlow::estimate_of(data);
+ double min_cost = AndFlow::cost_of(data, strict);
double max_cost = 0.0;
- Item::sort<AndFlow>(data, strict);
- EXPECT_EQ(Item::ordered_cost_of<AndFlow>(data, strict), min_cost);
- auto check = [&](const std::vector<Item> &my_data) noexcept {
- double my_cost = Item::ordered_cost_of<AndFlow>(my_data, strict);
+ AndFlow::sort(data, strict);
+ EXPECT_EQ(ordered_cost_of<AndFlow>(data, strict), min_cost);
+ auto check = [&](const std::vector<FlowStats> &my_data) noexcept {
+ double my_cost = ordered_cost_of<AndFlow>(my_data, strict);
EXPECT_LE(min_cost, my_cost);
max_cost = std::max(max_cost, my_cost);
};
@@ -216,7 +218,7 @@ TEST(FlowTest, optimal_and_flow) {
fprintf(stderr, " AND cost(%zu,%s): min: %g, max: %g, factor: %g\n",
i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost);
}
- EXPECT_NEAR(ref_est, Item::estimate_of<AndFlow>(data), 1e-9);
+ EXPECT_NEAR(ref_est, AndFlow::estimate_of(data), 1e-9);
}
}
}
@@ -225,12 +227,12 @@ TEST(FlowTest, optimal_or_flow) {
for (size_t i = 0; i < loop_cnt; ++i) {
for (bool strict: {false, true}) {
auto data = gen_data(7);
- double min_cost = Item::cost_of<OrFlow>(data, strict);
+ double min_cost = OrFlow::cost_of(data, strict);
double max_cost = 0.0;
- Item::sort<OrFlow>(data, strict);
- EXPECT_EQ(Item::ordered_cost_of<OrFlow>(data, strict), min_cost);
- auto check = [&](const std::vector<Item> &my_data) noexcept {
- double my_cost = Item::ordered_cost_of<OrFlow>(my_data, strict);
+ OrFlow::sort(data, strict);
+ EXPECT_EQ(ordered_cost_of<OrFlow>(data, strict), min_cost);
+ auto check = [&](const std::vector<FlowStats> &my_data) noexcept {
+ double my_cost = ordered_cost_of<OrFlow>(my_data, strict);
EXPECT_LE(min_cost, my_cost + 1e-9);
max_cost = std::max(max_cost, my_cost);
};
@@ -247,15 +249,15 @@ TEST(FlowTest, optimal_and_not_flow) {
for (size_t i = 0; i < loop_cnt; ++i) {
for (bool strict: {false, true}) {
auto data = gen_data(7);
- Item first = data[0];
- double min_cost = Item::cost_of<AndNotFlow>(data, strict);
+ FlowStats first = data[0];
+ double min_cost = AndNotFlow::cost_of(data, strict);
double max_cost = 0.0;
- Item::sort<AndNotFlow>(data, strict);
+ AndNotFlow::sort(data, strict);
EXPECT_EQ(data[0], first);
- EXPECT_EQ(Item::ordered_cost_of<AndNotFlow>(data, strict), min_cost);
- auto check = [&](const std::vector<Item> &my_data) noexcept {
+ EXPECT_EQ(ordered_cost_of<AndNotFlow>(data, strict), min_cost);
+ auto check = [&](const std::vector<FlowStats> &my_data) noexcept {
if (my_data[0] == first) {
- double my_cost = Item::ordered_cost_of<AndNotFlow>(my_data, strict);
+ double my_cost = ordered_cost_of<AndNotFlow>(my_data, strict);
EXPECT_LE(min_cost, my_cost);
max_cost = std::max(max_cost, my_cost);
}