From 51db1c06279843401604cc61f8e6d12c9cc32830 Mon Sep 17 00:00:00 2001
From: Håvard Pettersen <havardpe@yahooinc.com>
Date: Thu, 15 Feb 2024 09:50:53 +0000
Subject: use flow for fetchPostings

note that only AND/OR are wired to their respective flows here, which
means only strict OR should change (all children getting full in-flow)
---
 .../blueprint/intermediate_blueprints_test.cpp     | 20 ++++++--
 .../tests/queryeval/flow/queryeval_flow_test.cpp   | 58 ++++++++++++++++++++++
 .../src/vespa/searchlib/queryeval/blueprint.cpp    | 17 +++----
 .../src/vespa/searchlib/queryeval/blueprint.h      |  2 +-
 searchlib/src/vespa/searchlib/queryeval/flow.h     | 36 +++++++++++++-
 .../queryeval/intermediate_blueprints.cpp          | 18 ++++---
 .../searchlib/queryeval/intermediate_blueprints.h  |  4 +-
 7 files changed, 129 insertions(+), 26 deletions(-)
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index ee7c201f093..31db731a598 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -165,20 +165,30 @@ TEST("test Or propagates updated histestimate") {
     bp->addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2)));
     bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
     bp->setDocIdLimit(5000);
-    // sort OR as non-strict to get expected order. With strict OR,
-    // the order would be irrelevant since we use the relative
-    // estimate as strict_cost for leafs.
+    // NOTE: use non-strict OR ordering since strict OR ordering is non-deterministic
     optimize(bp, false);
-    bp->fetchPostings(ExecuteInfo::TRUE);
+    //--- execute info when non-strict:
+    bp->fetchPostings(ExecuteInfo::FALSE);
     EXPECT_EQUAL(4u, bp->childCnt());
     for (uint32_t i = 0; i < bp->childCnt(); i++) {
         const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp->getChild(i));
-        EXPECT_TRUE(child.is_strict);
+        EXPECT_FALSE(child.is_strict);
     }
     EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(0)).hit_rate);
     EXPECT_APPROX(0.5, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(1)).hit_rate, 1e-6);
     EXPECT_APPROX(0.5*3.0/5.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(2)).hit_rate, 1e-6);
     EXPECT_APPROX(0.5*3.0*42.0/(5.0*50.0), dynamic_cast<const RememberExecuteInfo &>(bp->getChild(3)).hit_rate, 1e-6);
+    //--- execute info when strict:
+    bp->fetchPostings(ExecuteInfo::TRUE);
+    EXPECT_EQUAL(4u, bp->childCnt());
+    for (uint32_t i = 0; i < bp->childCnt(); i++) {
+        const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp->getChild(i));
+        EXPECT_TRUE(child.is_strict);
+    }
+    EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(0)).hit_rate);
+    EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(1)).hit_rate);
+    EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(2)).hit_rate);
+    EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(3)).hit_rate);
 }
 
 TEST("test And Blueprint") {
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 7a3950dbf1c..5009a15e438 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -126,6 +126,14 @@ void verify_flow(auto flow, const std::vector<double> &est_list, const std::vect
     }
 }
 
+void verify_flow_calc(FlowCalc flow_calc, const std::vector<double> &est_list, const std::vector<double> &expect) {
+    ASSERT_EQ(est_list.size() + 1, expect.size());
+    for (size_t i = 0; i < est_list.size(); ++i) {
+        EXPECT_DOUBLE_EQ(flow_calc(est_list[i]), expect[i]);
+    }
+    EXPECT_DOUBLE_EQ(flow_calc(0.5), expect.back());
+}
+
 TEST(FlowTest, full_and_flow) {
     for (bool strict: {false, true}) {
         verify_flow(AndFlow(strict), {0.4, 0.7, 0.2},
@@ -133,6 +141,8 @@ TEST(FlowTest, full_and_flow) {
                      {0.4, 0.4, false},
                      {0.4*0.7, 0.4*0.7, false},
                      {0.4*0.7*0.2, 0.4*0.7*0.2, false}});
+        verify_flow_calc(flow_calc<AndFlow>(strict, 1.0),
+                         {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.7, 0.4*0.7*0.2});
     }
 }
 
@@ -143,6 +153,8 @@ TEST(FlowTest, partial_and_flow) {
                      {in*0.4, in*0.4, false},
                      {in*0.4*0.7, in*0.4*0.7, false},
                      {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}});
+        verify_flow_calc(flow_calc<AndFlow>(false, in),
+                         {0.4, 0.7, 0.2}, {in*1.0, in*0.4, in*0.4*0.7, in*0.4*0.7*0.2});
     }
 }
 
@@ -152,11 +164,15 @@ TEST(FlowTest, full_or_flow) {
                  {0.6, 1.0-0.6, false},
                  {0.6*0.3, 1.0-0.6*0.3, false},
                  {0.6*0.3*0.8, 1.0-0.6*0.3*0.8, false}});
+    verify_flow_calc(flow_calc<OrFlow>(false, 1.0),
+                     {0.4, 0.7, 0.2}, {1.0, 0.6, 0.6*0.3, 0.6*0.3*0.8});
     verify_flow(OrFlow(true), {0.4, 0.7, 0.2},
                 {{1.0, 0.0, true},
                  {1.0, 1.0-0.6, true},
                  {1.0, 1.0-0.6*0.3, true},
                  {1.0, 1.0-0.6*0.3*0.8, true}});
+    verify_flow_calc(flow_calc<OrFlow>(true, 1.0),
+                     {0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0});
 }
 
 TEST(FlowTest, partial_or_flow) {
@@ -166,6 +182,8 @@ TEST(FlowTest, partial_or_flow) {
                      {in*0.6, 1.0-in*0.6, false},
                      {in*0.6*0.3, 1.0-in*0.6*0.3, false},
                      {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}});
+        verify_flow_calc(flow_calc<OrFlow>(false, in),
+                         {0.4, 0.7, 0.2}, {in, in*0.6, in*0.6*0.3, in*0.6*0.3*0.8});
     }
 }
 
@@ -176,6 +194,8 @@ TEST(FlowTest, full_and_not_flow) {
                      {0.4, 0.4, false},
                      {0.4*0.3, 0.4*0.3, false},
                      {0.4*0.3*0.8, 0.4*0.3*0.8, false}});
+        verify_flow_calc(flow_calc<AndNotFlow>(strict, 1.0),
+                         {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.3, 0.4*0.3*0.8});
     }
 }
 
@@ -186,9 +206,47 @@ TEST(FlowTest, partial_and_not_flow) {
                      {in*0.4, in*0.4, false},
                      {in*0.4*0.3, in*0.4*0.3, false},
                      {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}});
+        verify_flow_calc(flow_calc<AndNotFlow>(false, in),
+                         {0.4, 0.7, 0.2}, {in, in*0.4, in*0.4*0.3, in*0.4*0.3*0.8});
+    }
+}
+
+TEST(FlowTest, full_first_flow_calc) {
+    for (bool strict: {false, true}) {
+        verify_flow_calc(first_flow_calc(strict, 1.0),
+                         {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4, 0.4});
+    }
+}
+
+TEST(FlowTest, partial_first_flow_calc) {
+    for (double in: {1.0, 0.5, 0.25}) {
+        verify_flow_calc(first_flow_calc(false, in),
+                         {0.4, 0.7, 0.2}, {in, in*0.4, in*0.4, in*0.4});
+    }
+}
+
+TEST(FlowTest, full_full_flow_calc) {
+    for (bool strict: {false, true}) {
+        verify_flow_calc(full_flow_calc(strict, 1.0),
+                         {0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0});
     }
 }
 
+TEST(FlowTest, partial_full_flow_calc) {
+    for (double in: {1.0, 0.5, 0.25}) {
+        verify_flow_calc(full_flow_calc(false, in),
+                         {0.4, 0.7, 0.2}, {in, in, in, in});
+    }
+}
+
+TEST(FlowTest, flow_calc_strictness_overrides_rate) {
+    EXPECT_EQ(flow_calc<AndFlow>(true, 0.5)(0.5), 1.0);
+    EXPECT_EQ(flow_calc<OrFlow>(true, 0.5)(0.5), 1.0);
+    EXPECT_EQ(flow_calc<AndNotFlow>(true, 0.5)(0.5), 1.0);
+    EXPECT_EQ(first_flow_calc(true, 0.5)(0.5), 1.0);
+    EXPECT_EQ(full_flow_calc(true, 0.5)(0.5), 1.0);
+}
+
 TEST(FlowTest, flow_cost) {
     std::vector<FlowStats> data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}};
     EXPECT_DOUBLE_EQ(ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3);
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 2f69c45d418..f3539c6989a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -481,6 +481,12 @@ IntermediateBlueprint::count_termwise_nodes(const UnpackInfo &unpack) const
     return termwise_nodes;
 }
 
+FlowCalc
+IntermediateBlueprint::make_flow_calc(bool strict, double flow) const
+{
+    return full_flow_calc(strict, flow);
+}
+
 IntermediateBlueprint::IndexList
 IntermediateBlueprint::find(const IPredicate & pred) const
 {
@@ -538,13 +544,6 @@ IntermediateBlueprint::calculateState() const
     return state;
 }
 
-double
-IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const
-{
-    (void) child;
-    return hit_rate;
-}
-
 bool
 IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double match_limit) const
 {
@@ -648,11 +647,11 @@ IntermediateBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
 void
 IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo)
 {
-    double nextHitRate = execInfo.hit_rate();
+    FlowCalc flow_calc = make_flow_calc(execInfo.is_strict(), execInfo.hit_rate());
     for (size_t i = 0; i < _children.size(); ++i) {
         Blueprint & child = *_children[i];
+        double nextHitRate = flow_calc(child.estimate());
         child.fetchPostings(ExecuteInfo::create(execInfo.is_strict() && inheritStrict(i), nextHitRate, execInfo));
-        nextHitRate = computeNextHitRate(child, nextHitRate);
     }
 }
 
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index 439eff680ec..395512d84cc 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -362,7 +362,7 @@ private:
     bool infer_want_global_filter() const;
 
     size_t count_termwise_nodes(const UnpackInfo &unpack) const;
-    virtual double computeNextHitRate(const Blueprint & child, double hit_rate) const;
+    virtual FlowCalc make_flow_calc(bool strict, double flow) const;
 
 protected:
     // returns an empty collection if children have empty or
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index cfbb28b190f..f31122166d9 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -4,7 +4,7 @@
 #include <vespa/vespalib/util/small_vector.h>
 #include <cstddef>
 #include <algorithm>
-#include <cmath>
+#include <functional>
 
 // Model how boolean result decisions flow through intermediate nodes
 // of different types based on relative estimates for sub-expressions
@@ -280,4 +280,38 @@ public:
     }
 };
 
+using FlowCalc = std::function<double(double)>;
+
+template <typename FLOW>
+FlowCalc flow_calc(bool strict, double non_strict_rate) {
+    FLOW flow = strict ? FLOW(true) : FLOW(non_strict_rate);
+    return [flow](double est) mutable noexcept {
+               double next_flow = flow.flow();
+               flow.add(est);
+               return next_flow;
+           };
+}
+
+inline FlowCalc first_flow_calc(bool strict, double flow) {
+    if (strict) {
+        flow = 1.0;
+    }
+    bool first = true;
+    return [flow,first](double est) mutable noexcept {
+               double next_flow = flow;
+               if (first) {
+                   flow *= est;
+                   first = false;
+               }
+               return next_flow;
+           };
+}
+
+inline FlowCalc full_flow_calc(bool strict, double flow) {
+    if (strict) {
+        flow = 1.0;
+    }
+    return [flow](double) noexcept { return flow; };
+}
+
 }
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 993639becf2..6faa4ddf147 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -300,14 +300,10 @@ AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
     return create_and_filter(get_children(), strict, constraint);
 }
 
-double
-AndBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const {
-    return hit_rate * child.estimate();
-}
-
-double
-OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const {
-    return hit_rate * (1.0 - child.estimate());
+FlowCalc
+AndBlueprint::make_flow_calc(bool strict, double flow) const
+{
+    return flow_calc<AndFlow>(strict, flow);
 }
 
 //-----------------------------------------------------------------------------
@@ -404,6 +400,12 @@ OrBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
     return create_or_filter(get_children(), strict, constraint);
 }
 
+FlowCalc
+OrBlueprint::make_flow_calc(bool strict, double flow) const
+{
+    return flow_calc<OrFlow>(strict, flow);
+}
+
 uint8_t
 OrBlueprint::calculate_cost_tier() const
 {
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index 1da70b4fa70..25586022535 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -56,7 +56,7 @@ public:
     SearchIterator::UP
     createFilterSearch(bool strict, FilterConstraint constraint) const override;
 private:
-    double computeNextHitRate(const Blueprint & child, double hit_rate) const override;
+    virtual FlowCalc make_flow_calc(bool strict, double flow) const override;
 };
 
 //-----------------------------------------------------------------------------
@@ -81,7 +81,7 @@ public:
     SearchIterator::UP
     createFilterSearch(bool strict, FilterConstraint constraint) const override;
 private:
-    double computeNextHitRate(const Blueprint & child, double hit_rate) const override;
+    FlowCalc make_flow_calc(bool strict, double flow) const override;
     uint8_t calculate_cost_tier() const override;
 };
 
-- 
cgit v1.2.3