From 64990b4bb6fd3cba16275f431a24212fe4c1abf7 Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Mon, 12 Feb 2024 12:18:05 +0000 Subject: account for heap cost in strict OR - make it easier to do flow analysis directly on FlowStats - use FlowStats in tests (less custom test code) - added flow_tuning.h for special sauce - strict flow must now have in-flow of 1.0 - split low-level flow tests into full/partial --- .../blueprint/intermediate_blueprints_test.cpp | 37 ++--- .../tests/queryeval/flow/queryeval_flow_test.cpp | 166 +++++++++++---------- searchlib/src/vespa/searchlib/queryeval/flow.h | 65 ++++++-- .../src/vespa/searchlib/queryeval/flow_tuning.h | 13 ++ .../queryeval/intermediate_blueprints.cpp | 6 +- 5 files changed, 166 insertions(+), 121 deletions(-) create mode 100644 searchlib/src/vespa/searchlib/queryeval/flow_tuning.h diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index aed6f6e60f2..510c6be4bf3 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -1332,26 +1333,20 @@ void verify_cost(make &&mk, double expect, double expect_strict) { EXPECT_EQUAL(bp->strict_cost(), expect_strict); } -double calc_cost(std::vector> list) { - double flow = 1.0; - double cost = 0.0; - for (auto [sub_cost,pass_through]: list) { - cost += flow * sub_cost; - flow *= pass_through; - } - return cost; -} +std::vector child_stats({{0.2, 1.1, 0.2*1.1}, + {0.3, 1.2, 0.3*1.2}, + {0.5, 1.3, 1.3}}); TEST("cost for OR") { verify_cost(make::OR(), - calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}), - 0.2*1.1 + 0.3*1.2 + 1.3); + OrFlow::cost_of(child_stats, false), + OrFlow::cost_of(child_stats, true) + flow::heap_cost(OrFlow::estimate_of(child_stats), 3)); } TEST("cost for AND") { verify_cost(make::AND(), - calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}), - calc_cost({{0.2*1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); + AndFlow::cost_of(child_stats, false), + AndFlow::cost_of(child_stats, true)); } TEST("cost for RANK") { @@ -1360,8 +1355,8 @@ TEST("cost for RANK") { TEST("cost for ANDNOT") { verify_cost(make::ANDNOT(), - calc_cost({{1.1, 0.2},{1.3, 0.5},{1.2, 0.7}}), - calc_cost({{0.2*1.1, 0.2},{1.3, 0.5},{1.2, 0.7}})); + AndNotFlow::cost_of(child_stats, false), + AndNotFlow::cost_of(child_stats, true)); } TEST("cost for SB") { @@ -1371,20 +1366,20 @@ TEST("cost for SB") { TEST("cost for NEAR") { verify_cost(make::NEAR(1), - 0.2*0.3*0.5 * 3 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}), - 0.2*0.3*0.5 * 3 + calc_cost({{0.2*1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); + AndFlow::cost_of(child_stats, false) + AndFlow::estimate_of(child_stats) * 3, + AndFlow::cost_of(child_stats, true) + AndFlow::estimate_of(child_stats) * 3); } TEST("cost for ONEAR") { verify_cost(make::ONEAR(1), - 0.2*0.3*0.5 * 3 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}), - 0.2*0.3*0.5 * 3 + calc_cost({{0.2*1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); + AndFlow::cost_of(child_stats, false) + AndFlow::estimate_of(child_stats) * 3, + AndFlow::cost_of(child_stats, true) + AndFlow::estimate_of(child_stats) * 3); } TEST("cost for WEAKAND") { verify_cost(make::WEAKAND(1000), - calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}), - 0.2*1.1 + 0.3*1.2 + 1.3); + OrFlow::cost_of(child_stats, false), + OrFlow::cost_of(child_stats, true)); } TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index ec3e0066134..7a3950dbf1c 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -9,42 +9,20 @@ constexpr size_t loop_cnt = 64; using namespace search::queryeval; -struct ItemAdapter { - double estimate(const auto &child) const noexcept { return child.rel_est; } - double cost(const auto &child) const noexcept { return child.cost; } - double strict_cost(const auto &child) const noexcept { return child.strict_cost; } -}; - -struct Item { - double rel_est; - double cost; - double strict_cost; - Item(double rel_est_in, double cost_in, double strict_cost_in) noexcept - : rel_est(rel_est_in), cost(cost_in), strict_cost(strict_cost_in) {} - template static double estimate_of(std::vector &data) { - return FLOW::estimate_of(ItemAdapter(), data); - } - template static void sort(std::vector &data, bool strict) { - FLOW::sort(ItemAdapter(), data, strict); - } - template static double cost_of(const std::vector &data, bool strict) { - return FLOW::cost_of(ItemAdapter(), data, strict); - } - template static double ordered_cost_of(const std::vector &data, bool strict) { - return flow::ordered_cost_of(ItemAdapter(), data, FLOW(1.0, strict)); - } - auto operator <=>(const Item &rhs) const noexcept = default; -}; +template +double ordered_cost_of(const std::vector &data, bool strict) { + return flow::ordered_cost_of(flow::DirectAdapter(), data, FLOW(strict)); +} -std::vector gen_data(size_t size) { +std::vector gen_data(size_t size) { static std::mt19937 gen; - static std::uniform_real_distribution rel_est(0.1, 0.9); + static std::uniform_real_distribution estimate(0.1, 0.9); static std::uniform_real_distribution cost(1.0, 10.0); static std::uniform_real_distribution strict_cost(0.1, 5.0); - std::vector result; + std::vector result; result.reserve(size); for (size_t i = 0; i < size; ++i) { - result.emplace_back(rel_est(gen), cost(gen), strict_cost(gen)); + result.emplace_back(estimate(gen), cost(gen), strict_cost(gen)); } return result; } @@ -84,7 +62,7 @@ TEST(FlowTest, perm_test) { template