adjust strict OR flow

A simple heap-based strict OR is not able to benefit from short circuiting of the per-document evaluation. An 'optimal' strict OR would need more book-keeping and would probably be slower than the simple implementation in most normal cases.
author: Håvard Pettersen <havardpe@yahooinc.com> 2024-02-02 14:00:53 +0000
committer: Håvard Pettersen <havardpe@yahooinc.com> 2024-02-05 11:03:13 +0000
commit: 021d823d55362441e11c81224583bb9d46618a35 (patch)
tree: cbbbe136bb7a2d790e2fc2a02a1d9026bbd84e9b /searchlib/src
parent: 3da723264949a92244565b962c2a4f773203d1e8 (diff)
3 files changed, 18 insertions, 30 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index 241d6c67e0a..aed6f6e60f2 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -1345,7 +1345,7 @@ double calc_cost(std::vector<std::pair<double,double>> list) {
 TEST("cost for OR") {
     verify_cost(make::OR(),
                 calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}),
-                calc_cost({{0.2*1.1, 0.8},{0.3*1.2, 0.7},{1.3, 0.5}}));
+                0.2*1.1 + 0.3*1.2 + 1.3);
 }
 
 TEST("cost for AND") {
@@ -1384,7 +1384,7 @@ TEST("cost for ONEAR") {
 TEST("cost for WEAKAND") {
     verify_cost(make::WEAKAND(1000),
                 calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}),
-                calc_cost({{0.2*1.1, 0.8},{0.3*1.2, 0.7},{1.3, 0.5}}));
+                0.2*1.1 + 0.3*1.2 + 1.3);
 }
 
 TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 9a9adeac2bc..ec3e0066134 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -130,10 +130,6 @@ TEST(FlowTest, or_ordering_is_strict_weak) {
     verify_ordering_is_strict_weak<flow::MinOrCost>();
 }
 
-TEST(FlowTest, strict_or_ordering_is_strict_weak) {
-    verify_ordering_is_strict_weak<flow::MinOrStrictCost>();
-}
-
 struct ExpectFlow {
     double flow;
     double est;
@@ -166,13 +162,16 @@ TEST(FlowTest, basic_and_flow) {
 
 TEST(FlowTest, basic_or_flow) {
     for (double in: {1.0, 0.5, 0.25}) {
-        for (bool strict: {false, true}) {
-            verify_flow(OrFlow(in, strict), {0.4, 0.7, 0.2},
-                        {{in, 0.0, strict},
-                         {in*0.6, 1.0-in*0.6, strict},
-                         {in*0.6*0.3, 1.0-in*0.6*0.3, strict},
-                         {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, strict}});
-        }
+        verify_flow(OrFlow(in, false), {0.4, 0.7, 0.2},
+                    {{in, 0.0, false},
+                     {in*0.6, 1.0-in*0.6, false},
+                     {in*0.6*0.3, 1.0-in*0.6*0.3, false},
+                     {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}});
+        verify_flow(OrFlow(in, true), {0.4, 0.7, 0.2},
+                    {{in, 0.0, true},
+                     {in, 1.0-in*0.6, true},
+                     {in, 1.0-in*0.6*0.3, true},
+                     {in, 1.0-in*0.6*0.3*0.8, true}});
     }
 }
 
@@ -193,7 +192,7 @@ TEST(FlowTest, flow_cost) {
     EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3);
     EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3);
     EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3);
-    EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.6*0.5 + 0.6*0.3*0.4);
+    EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.5 + 0.4);
     EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3);
     EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3);
 }
@@ -232,7 +231,7 @@ TEST(FlowTest, optimal_or_flow) {
             EXPECT_EQ(Item::ordered_cost_of<OrFlow>(data, strict), min_cost);
             auto check = [&](const std::vector<Item> &my_data) noexcept {
                              double my_cost = Item::ordered_cost_of<OrFlow>(my_data, strict);
-                             EXPECT_LE(min_cost, my_cost);
+                             EXPECT_LE(min_cost, my_cost + 1e-9);
                              max_cost = std::max(max_cost, my_cost);
                          };
             each_perm(data, check);
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index 01a5922d864..8b354b7b9a9 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -67,16 +67,6 @@ struct MinOrCost {
     }
 };
 
-template <typename ADAPTER>
-struct MinOrStrictCost {
-    // sort children to minimize total cost of strict OR flow
-    [[no_unique_address]] ADAPTER adapter;
-    MinOrStrictCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {}
-    bool operator () (const auto &a, const auto &b) const noexcept {
-        return adapter.estimate(a) * adapter.strict_cost(b) > adapter.estimate(b) * adapter.strict_cost(a);
-    }
-};
-
 template <typename ADAPTER, typename T, typename F>
 double estimate_of(ADAPTER adapter, const T &children, F flow) {
     for (const auto &child: children) {
@@ -195,18 +185,19 @@ public:
 
 class OrFlow : public FlowMixin<OrFlow>{
 private:
+    double _full;
     double _flow;
     bool _strict;
     bool _first;
 public:
     OrFlow(double in, bool strict) noexcept
-      : _flow(in), _strict(strict), _first(true) {}
+      : _full(in), _flow(in), _strict(strict), _first(true) {}
     void add(double est) noexcept {
         _flow *= (1.0 - est);
         _first = false;
     }
     double flow() const noexcept {
-        return _flow;
+        return _strict ? _full : _flow;
     }
     bool strict() const noexcept {
         return _strict;
@@ -215,9 +206,7 @@ public:
         return _first ? 0.0 : (1.0 - _flow);
     }
     static void sort(auto adapter, auto &children, bool strict) {
-        if (strict) {
-            flow::sort<flow::MinOrStrictCost>(adapter, children);
-        } else {
+        if (!strict) {
             flow::sort<flow::MinOrCost>(adapter, children);
         }
     }
author	Håvard Pettersen <havardpe@yahooinc.com>	2024-02-02 14:00:53 +0000
committer	Håvard Pettersen <havardpe@yahooinc.com>	2024-02-05 11:03:13 +0000
commit	021d823d55362441e11c81224583bb9d46618a35 (patch)
tree	cbbbe136bb7a2d790e2fc2a02a1d9026bbd84e9b /searchlib/src
parent	3da723264949a92244565b962c2a4f773203d1e8 (diff)