diff options
author | Håvard Pettersen <havardpe@yahooinc.com> | 2024-02-02 14:00:53 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@yahooinc.com> | 2024-02-05 11:03:13 +0000 |
commit | 021d823d55362441e11c81224583bb9d46618a35 (patch) | |
tree | cbbbe136bb7a2d790e2fc2a02a1d9026bbd84e9b /searchlib/src | |
parent | 3da723264949a92244565b962c2a4f773203d1e8 (diff) |
adjust strict OR flow
A simple heap-based strict OR is not able to benefit from short
circuiting of the per-document evaluation. An 'optimal' strict OR
would need more book-keeping and would probably be slower than the
simple implementation in most normal cases.
Diffstat (limited to 'searchlib/src')
3 files changed, 18 insertions, 30 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 241d6c67e0a..aed6f6e60f2 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -1345,7 +1345,7 @@ double calc_cost(std::vector<std::pair<double,double>> list) { TEST("cost for OR") { verify_cost(make::OR(), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}), - calc_cost({{0.2*1.1, 0.8},{0.3*1.2, 0.7},{1.3, 0.5}})); + 0.2*1.1 + 0.3*1.2 + 1.3); } TEST("cost for AND") { @@ -1384,7 +1384,7 @@ TEST("cost for ONEAR") { TEST("cost for WEAKAND") { verify_cost(make::WEAKAND(1000), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}), - calc_cost({{0.2*1.1, 0.8},{0.3*1.2, 0.7},{1.3, 0.5}})); + 0.2*1.1 + 0.3*1.2 + 1.3); } TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index 9a9adeac2bc..ec3e0066134 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -130,10 +130,6 @@ TEST(FlowTest, or_ordering_is_strict_weak) { verify_ordering_is_strict_weak<flow::MinOrCost>(); } -TEST(FlowTest, strict_or_ordering_is_strict_weak) { - verify_ordering_is_strict_weak<flow::MinOrStrictCost>(); -} - struct ExpectFlow { double flow; double est; @@ -166,13 +162,16 @@ TEST(FlowTest, basic_and_flow) { TEST(FlowTest, basic_or_flow) { for (double in: {1.0, 0.5, 0.25}) { - for (bool strict: {false, true}) { - verify_flow(OrFlow(in, strict), {0.4, 0.7, 0.2}, - {{in, 0.0, strict}, - {in*0.6, 1.0-in*0.6, strict}, - {in*0.6*0.3, 1.0-in*0.6*0.3, strict}, - {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, strict}}); - } + verify_flow(OrFlow(in, false), {0.4, 0.7, 0.2}, + {{in, 0.0, false}, + {in*0.6, 1.0-in*0.6, false}, + {in*0.6*0.3, 1.0-in*0.6*0.3, false}, + {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}}); + verify_flow(OrFlow(in, true), {0.4, 0.7, 0.2}, + {{in, 0.0, true}, + {in, 1.0-in*0.6, true}, + {in, 1.0-in*0.6*0.3, true}, + {in, 1.0-in*0.6*0.3*0.8, true}}); } } @@ -193,7 +192,7 @@ TEST(FlowTest, flow_cost) { EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3); EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3); EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3); - EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.6*0.5 + 0.6*0.3*0.4); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.5 + 0.4); EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3); EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3); } @@ -232,7 +231,7 @@ TEST(FlowTest, optimal_or_flow) { EXPECT_EQ(Item::ordered_cost_of<OrFlow>(data, strict), min_cost); auto check = [&](const std::vector<Item> &my_data) noexcept { double my_cost = Item::ordered_cost_of<OrFlow>(my_data, strict); - EXPECT_LE(min_cost, my_cost); + EXPECT_LE(min_cost, my_cost + 1e-9); max_cost = std::max(max_cost, my_cost); }; each_perm(data, check); diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index 01a5922d864..8b354b7b9a9 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -67,16 +67,6 @@ struct MinOrCost { } }; -template <typename ADAPTER> -struct MinOrStrictCost { - // sort children to minimize total cost of strict OR flow - [[no_unique_address]] ADAPTER adapter; - MinOrStrictCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {} - bool operator () (const auto &a, const auto &b) const noexcept { - return adapter.estimate(a) * adapter.strict_cost(b) > adapter.estimate(b) * adapter.strict_cost(a); - } -}; - template <typename ADAPTER, typename T, typename F> double estimate_of(ADAPTER adapter, const T &children, F flow) { for (const auto &child: children) { @@ -195,18 +185,19 @@ public: class OrFlow : public FlowMixin<OrFlow>{ private: + double _full; double _flow; bool _strict; bool _first; public: OrFlow(double in, bool strict) noexcept - : _flow(in), _strict(strict), _first(true) {} + : _full(in), _flow(in), _strict(strict), _first(true) {} void add(double est) noexcept { _flow *= (1.0 - est); _first = false; } double flow() const noexcept { - return _flow; + return _strict ? _full : _flow; } bool strict() const noexcept { return _strict; @@ -215,9 +206,7 @@ public: return _first ? 0.0 : (1.0 - _flow); } static void sort(auto adapter, auto &children, bool strict) { - if (strict) { - flow::sort<flow::MinOrStrictCost>(adapter, children); - } else { + if (!strict) { flow::sort<flow::MinOrCost>(adapter, children); } } |