diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-18 21:19:10 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-18 21:19:10 +0100 |
commit | 16f5afc634cb71dd42b14b2c02487d15609490ed (patch) | |
tree | 749105ffcf7285c072c640ef413d1125b7ecaab6 | |
parent | 9b8ca63f91db64ee4c45e49f6656b81aa7595c76 (diff) | |
parent | f67592983cb5c112040afe7eeb4e375d88d1c13f (diff) |
Merge pull request #29698 from vespa-engine/havardpe/best-flow-costv8.276.19
verify that suggested sort order gives minimal flow cost
-rw-r--r-- | searchlib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | searchlib/src/tests/queryeval/flow/CMakeLists.txt | 9 | ||||
-rw-r--r-- | searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp | 117 |
3 files changed, 127 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index d46d9b57789..c9e19b76cef 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -196,6 +196,7 @@ vespa_define_module( src/tests/queryeval/equiv src/tests/queryeval/fake_searchable src/tests/queryeval/filter_search + src/tests/queryeval/flow src/tests/queryeval/getnodeweight src/tests/queryeval/global_filter src/tests/queryeval/matching_elements_search diff --git a/searchlib/src/tests/queryeval/flow/CMakeLists.txt b/searchlib/src/tests/queryeval/flow/CMakeLists.txt new file mode 100644 index 00000000000..70658d36f21 --- /dev/null +++ b/searchlib/src/tests/queryeval/flow/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_queryeval_flow_test_app TEST + SOURCES + queryeval_flow_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_queryeval_flow_test_app COMMAND searchlib_queryeval_flow_test_app) diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp new file mode 100644 index 00000000000..ceda30f169a --- /dev/null +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -0,0 +1,117 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/queryeval/flow.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vector> +#include <random> + +using search::queryeval::AndFlow; +using search::queryeval::OrFlow; + +struct Item { + double rel_est; + double cost; + Item(double rel_est_in, double cost_in) noexcept + : rel_est(rel_est_in), cost(cost_in) {} + static void sort_for_and(std::vector<Item> &data) { + std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept { + return (1.0 - a.rel_est) / a.cost > (1.0 - b.rel_est) / b.cost; + }); + } + static void sort_for_or(std::vector<Item> &data) { + std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept { + return a.rel_est / a.cost > b.rel_est / b.cost; + }); + } + static double cost_of(const std::vector<Item> &data, auto flow) { + double cost = 0.0; + for (const Item &item: data) { + cost += flow.flow() * item.cost; + flow.add(item.rel_est); + } + return cost; + } + static double cost_of_and(const std::vector<Item> &data) { return cost_of(data, AndFlow()); } + static double cost_of_or(const std::vector<Item> &data) { return cost_of(data, OrFlow()); } +}; + +std::vector<Item> gen_data(size_t size) { + static std::mt19937 gen; + static std::uniform_real_distribution<double> rel_est(0.1, 0.9); + static std::uniform_real_distribution<double> cost(1.0, 10.0); + std::vector<Item> result; + result.reserve(size); + for (size_t i = 0; i < size; ++i) { + result.emplace_back(rel_est(gen), cost(gen)); + } + return result; +} + +template <typename T, typename F> +void each_perm(std::vector<T> &data, size_t k, F fun) { + if (k <= 1) { + fun(const_cast<const std::vector<T> &>(data)); + } else { + each_perm(data, k-1, fun); + for (size_t i = 0; i < k-1; ++i) { + if (k & 1) { + std::swap(data[0], data[k-1]); + } else { + std::swap(data[i], data[k-1]); + } + each_perm(data, k-1, fun); + } + } +} + +template <typename T, typename F> +void each_perm(std::vector<T> &data, F fun) { + each_perm(data, data.size(), fun); +} + +TEST(FlowTest, perm_test) { + std::set<std::vector<int>> seen; + std::vector<int> data = {1,2,3,4,5}; + auto hook = [&](const std::vector<int> &perm) { + EXPECT_EQ(perm.size(), 5); + seen.insert(perm); + }; + each_perm(data, hook); + EXPECT_EQ(seen.size(), 120); +} + +TEST(FlowTest, optimal_and_flow) { + for (size_t i = 0; i < 256; ++i) { + auto data = gen_data(7); + Item::sort_for_and(data); + double min_cost = Item::cost_of_and(data); + double max_cost = 0.0; + auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept { + double my_cost = Item::cost_of_and(my_data); + EXPECT_LE(min_cost, my_cost); + max_cost = std::max(max_cost, my_cost); + }; + each_perm(data, check); + fprintf(stderr, " and cost(%zu): min: %g, max: %g, factor: %g\n", + i, min_cost, max_cost, max_cost / min_cost); + } +} + +TEST(FlowTest, optimal_or_flow) { + for (size_t i = 0; i < 256; ++i) { + auto data = gen_data(7); + Item::sort_for_or(data); + double min_cost = Item::cost_of_or(data); + double max_cost = 0.0; + auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept { + double my_cost = Item::cost_of_or(my_data); + EXPECT_LE(min_cost, my_cost); + max_cost = std::max(max_cost, my_cost); + }; + each_perm(data, check); + fprintf(stderr, " or cost(%zu): min: %g, max: %g, factor: %g\n", + i, min_cost, max_cost, max_cost / min_cost); + } +} + +GTEST_MAIN_RUN_ALL_TESTS() |