know the in-flow when sorting blueprints

also added the option and tagging mechanic needed to force selected blueprints to be strict even in a non-strict context as well as calculating how much time this saves you.
author: Håvard Pettersen <havardpe@yahooinc.com> 2024-02-23 09:41:04 +0000
committer: Håvard Pettersen <havardpe@yahooinc.com> 2024-02-23 12:23:53 +0000
commit: fbc08038079b6079fcc12b1f54eba4d59acabe83 (patch)
tree: 218e277d83da7794b14ce449066df371ff4fc394 /searchlib/src/tests
parent: 10267c6347197b5e9731d0a1e9169d74608178f7 (diff)
7 files changed, 59 insertions, 42 deletions
diff --git a/searchlib/src/tests/nearsearch/nearsearch_test.cpp b/searchlib/src/tests/nearsearch/nearsearch_test.cpp
index 95701e59444..6f7cf85258b 100644
--- a/searchlib/src/tests/nearsearch/nearsearch_test.cpp
+++ b/searchlib/src/tests/nearsearch/nearsearch_test.cpp
@@ -229,7 +229,8 @@ Test::testNearSearch(MyQuery &query, uint32_t matchId)
         near_b->addChild(query.getTerm(i).make_blueprint(fieldId, i));
     }
     bp->setDocIdLimit(1000);
-    bp = search::queryeval::Blueprint::optimize_and_sort(std::move(bp), true, true);
+    auto opts = search::queryeval::Blueprint::Options::all();
+    bp = search::queryeval::Blueprint::optimize_and_sort(std::move(bp), true, opts);
     bp->fetchPostings(search::queryeval::ExecuteInfo::TRUE);
     search::fef::MatchData::UP md(layout.createMatchData());
     search::queryeval::SearchIterator::UP near = bp->createSearch(*md, true);
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
index 2a21d66c090..1af9ee6cff7 100644
--- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
@@ -17,13 +17,15 @@ using namespace search::fef;
 
 namespace {
 
+auto opts = Blueprint::Options::all();
+
 //-----------------------------------------------------------------------------
 
 class MyOr : public IntermediateBlueprint
 {
 private:
-    FlowCalc make_flow_calc(bool strict, double flow) const override {
-        return flow_calc<OrFlow>(strict, flow);
+    FlowCalc make_flow_calc(InFlow in_flow) const override {
+        return flow_calc<OrFlow>(in_flow);
     }
 public:
     FlowStats calculate_flow_stats(uint32_t) const final {
@@ -451,7 +453,7 @@ TEST_F("testChildAndNotCollapsing", Fixture)
                               );
     TEST_DO(f.check_not_equal(*sorted, *unsorted));
     unsorted->setDocIdLimit(1000);
-    unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true);
+    unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, opts);
     TEST_DO(f.check_equal(*sorted, *unsorted));
 }
 
@@ -491,7 +493,7 @@ TEST_F("testChildAndCollapsing", Fixture)
 
     TEST_DO(f.check_not_equal(*sorted, *unsorted));
     unsorted->setDocIdLimit(1000);
-    unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true);
+    unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, opts);
     TEST_DO(f.check_equal(*sorted, *unsorted));
 }
 
@@ -530,10 +532,9 @@ TEST_F("testChildOrCollapsing", Fixture)
                               );
     TEST_DO(f.check_not_equal(*sorted, *unsorted));
     unsorted->setDocIdLimit(1000);
-    // we sort non-strict here since the default costs of 1/est for
-    // non-strict/strict leaf iterators makes the order of iterators
-    // under a strict OR irrelevant.
-    unsorted = Blueprint::optimize_and_sort(std::move(unsorted), false, true);
+    // we sort non-strict here since a strict OR does not have a
+    // deterministic sort order.
+    unsorted = Blueprint::optimize_and_sort(std::move(unsorted), false, opts);
     TEST_DO(f.check_equal(*sorted, *unsorted));
 }
 
@@ -577,7 +578,7 @@ TEST_F("testChildSorting", Fixture)
 
     TEST_DO(f.check_not_equal(*sorted, *unsorted));
     unsorted->setDocIdLimit(1000);
-    unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true);
+    unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, opts);
     TEST_DO(f.check_equal(*sorted, *unsorted));
 }
 
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index 31db731a598..f192ea93b0e 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -132,7 +132,8 @@ TEST("test AndNot Blueprint") {
 
 template <typename BP>
 void optimize(std::unique_ptr<BP> &ref, bool strict) {
-    auto optimized = Blueprint::optimize_and_sort(std::move(ref), strict, true);
+    auto opts = Blueprint::Options::all();
+    auto optimized = Blueprint::optimize_and_sort(std::move(ref), strict, opts);
     ref.reset(dynamic_cast<BP*>(optimized.get()));
     ASSERT_TRUE(ref);
     optimized.release();
@@ -568,9 +569,10 @@ optimize_and_compare(Blueprint::UP top, Blueprint::UP expect, bool strict = true
     top->setDocIdLimit(1000);
     expect->setDocIdLimit(1000);
     TEST_DO(compare(*top, *expect, false));
-    top = Blueprint::optimize_and_sort(std::move(top), strict, sort_by_cost);
+    auto opts = Blueprint::Options::all().sort_by_cost(sort_by_cost);
+    top = Blueprint::optimize_and_sort(std::move(top), strict, opts);
     TEST_DO(compare(*top, *expect, true));
-    expect = Blueprint::optimize_and_sort(std::move(expect), strict, sort_by_cost);
+    expect = Blueprint::optimize_and_sort(std::move(expect), strict, opts);
     TEST_DO(compare(*expect, *top, true));
 }
 
@@ -699,11 +701,12 @@ TEST("test empty root node optimization and safeness") {
 
     //-------------------------------------------------------------------------
     auto expect_up = std::make_unique<EmptyBlueprint>();
-    compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top1), true, true), true);
-    compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top2), true, true), true);
-    compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top3), true, true), true);
-    compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top4), true, true), true);
-    compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top5), true, true), true);
+    auto opts = Blueprint::Options::all();
+    compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top1), true, opts), true);
+    compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top2), true, opts), true);
+    compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top3), true, opts), true);
+    compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top4), true, opts), true);
+    compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top5), true, opts), true);
 }
 
 TEST("and with one empty child is optimized away") {
@@ -711,7 +714,8 @@ TEST("and with one empty child is optimized away") {
     Blueprint::UP top = ap((new SourceBlenderBlueprint(*selector))->
                            addChild(ap(MyLeafSpec(10).create())).
                            addChild(addLeafs(std::make_unique<AndBlueprint>(), {{0, true}, 10, 20})));
-    top = Blueprint::optimize_and_sort(std::move(top), true, true);
+    auto opts = Blueprint::Options::all();
+    top = Blueprint::optimize_and_sort(std::move(top), true, opts);
     Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))->
                           addChild(ap(MyLeafSpec(10).create())).
                           addChild(std::make_unique<EmptyBlueprint>())));
@@ -888,8 +892,9 @@ TEST("require that replaced blueprints retain source id") {
                              addChild(ap(MyLeafSpec(30).create()->setSourceId(55)))));
     Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42)));
     //-------------------------------------------------------------------------
-    top1_up = Blueprint::optimize_and_sort(std::move(top1_up), true, true);
-    top2_up = Blueprint::optimize_and_sort(std::move(top2_up), true, true);
+    auto opts = Blueprint::Options::all();
+    top1_up = Blueprint::optimize_and_sort(std::move(top1_up), true, opts);
+    top2_up = Blueprint::optimize_and_sort(std::move(top2_up), true, opts);
     compare(*expect1_up, *top1_up, true);
     compare(*expect2_up, *top2_up, true);
     EXPECT_EQUAL(13u, top1_up->getSourceId());
@@ -1204,7 +1209,8 @@ TEST("require_that_unpack_optimization_is_not_overruled_by_equiv") {
 TEST("require that ANDNOT without children is optimized to empty search") {
     Blueprint::UP top_up = std::make_unique<AndNotBlueprint>();
     auto expect_up = std::make_unique<EmptyBlueprint>();
-    top_up = Blueprint::optimize_and_sort(std::move(top_up), true, true);
+    auto opts = Blueprint::Options::all();
+    top_up = Blueprint::optimize_and_sort(std::move(top_up), true, opts);
     compare(*expect_up, *top_up, true);
 }
 
diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
index 3e8bc06bfd8..c4d34ab3565 100644
--- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
+++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
@@ -49,7 +49,7 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) {
 struct DefaultBlueprint : Blueprint {
     FlowStats calculate_flow_stats(uint32_t) const override { abort(); }
     void optimize(Blueprint* &, OptimizePass) override { abort(); }
-    void sort(bool, bool) override { abort(); }
+    double sort(InFlow, const Options &) override { abort(); }
     const State &getState() const override { abort(); }
     void fetchPostings(const ExecuteInfo &) override { abort(); }
     void freeze() override { abort(); }
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 5009a15e438..8b8b6c1282e 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -141,7 +141,7 @@ TEST(FlowTest, full_and_flow) {
                      {0.4, 0.4, false},
                      {0.4*0.7, 0.4*0.7, false},
                      {0.4*0.7*0.2, 0.4*0.7*0.2, false}});
-        verify_flow_calc(flow_calc<AndFlow>(strict, 1.0),
+        verify_flow_calc(flow_calc<AndFlow>(strict),
                          {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.7, 0.4*0.7*0.2});
     }
 }
@@ -153,7 +153,7 @@ TEST(FlowTest, partial_and_flow) {
                      {in*0.4, in*0.4, false},
                      {in*0.4*0.7, in*0.4*0.7, false},
                      {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}});
-        verify_flow_calc(flow_calc<AndFlow>(false, in),
+        verify_flow_calc(flow_calc<AndFlow>(in),
                          {0.4, 0.7, 0.2}, {in*1.0, in*0.4, in*0.4*0.7, in*0.4*0.7*0.2});
     }
 }
@@ -164,14 +164,14 @@ TEST(FlowTest, full_or_flow) {
                  {0.6, 1.0-0.6, false},
                  {0.6*0.3, 1.0-0.6*0.3, false},
                  {0.6*0.3*0.8, 1.0-0.6*0.3*0.8, false}});
-    verify_flow_calc(flow_calc<OrFlow>(false, 1.0),
+    verify_flow_calc(flow_calc<OrFlow>(1.0),
                      {0.4, 0.7, 0.2}, {1.0, 0.6, 0.6*0.3, 0.6*0.3*0.8});
     verify_flow(OrFlow(true), {0.4, 0.7, 0.2},
                 {{1.0, 0.0, true},
                  {1.0, 1.0-0.6, true},
                  {1.0, 1.0-0.6*0.3, true},
                  {1.0, 1.0-0.6*0.3*0.8, true}});
-    verify_flow_calc(flow_calc<OrFlow>(true, 1.0),
+    verify_flow_calc(flow_calc<OrFlow>(true),
                      {0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0});
 }
 
@@ -182,7 +182,7 @@ TEST(FlowTest, partial_or_flow) {
                      {in*0.6, 1.0-in*0.6, false},
                      {in*0.6*0.3, 1.0-in*0.6*0.3, false},
                      {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}});
-        verify_flow_calc(flow_calc<OrFlow>(false, in),
+        verify_flow_calc(flow_calc<OrFlow>(in),
                          {0.4, 0.7, 0.2}, {in, in*0.6, in*0.6*0.3, in*0.6*0.3*0.8});
     }
 }
@@ -194,7 +194,7 @@ TEST(FlowTest, full_and_not_flow) {
                      {0.4, 0.4, false},
                      {0.4*0.3, 0.4*0.3, false},
                      {0.4*0.3*0.8, 0.4*0.3*0.8, false}});
-        verify_flow_calc(flow_calc<AndNotFlow>(strict, 1.0),
+        verify_flow_calc(flow_calc<AndNotFlow>(strict),
                          {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.3, 0.4*0.3*0.8});
     }
 }
@@ -206,45 +206,52 @@ TEST(FlowTest, partial_and_not_flow) {
                      {in*0.4, in*0.4, false},
                      {in*0.4*0.3, in*0.4*0.3, false},
                      {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}});
-        verify_flow_calc(flow_calc<AndNotFlow>(false, in),
+        verify_flow_calc(flow_calc<AndNotFlow>(in),
                          {0.4, 0.7, 0.2}, {in, in*0.4, in*0.4*0.3, in*0.4*0.3*0.8});
     }
 }
 
 TEST(FlowTest, full_first_flow_calc) {
     for (bool strict: {false, true}) {
-        verify_flow_calc(first_flow_calc(strict, 1.0),
+        verify_flow_calc(first_flow_calc(strict),
                          {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4, 0.4});
     }
 }
 
 TEST(FlowTest, partial_first_flow_calc) {
     for (double in: {1.0, 0.5, 0.25}) {
-        verify_flow_calc(first_flow_calc(false, in),
+        verify_flow_calc(first_flow_calc(in),
                          {0.4, 0.7, 0.2}, {in, in*0.4, in*0.4, in*0.4});
     }
 }
 
 TEST(FlowTest, full_full_flow_calc) {
     for (bool strict: {false, true}) {
-        verify_flow_calc(full_flow_calc(strict, 1.0),
+        verify_flow_calc(full_flow_calc(strict),
                          {0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0});
     }
 }
 
 TEST(FlowTest, partial_full_flow_calc) {
     for (double in: {1.0, 0.5, 0.25}) {
-        verify_flow_calc(full_flow_calc(false, in),
+        verify_flow_calc(full_flow_calc(in),
                          {0.4, 0.7, 0.2}, {in, in, in, in});
     }
 }
 
-TEST(FlowTest, flow_calc_strictness_overrides_rate) {
-    EXPECT_EQ(flow_calc<AndFlow>(true, 0.5)(0.5), 1.0);
-    EXPECT_EQ(flow_calc<OrFlow>(true, 0.5)(0.5), 1.0);
-    EXPECT_EQ(flow_calc<AndNotFlow>(true, 0.5)(0.5), 1.0);
-    EXPECT_EQ(first_flow_calc(true, 0.5)(0.5), 1.0);
-    EXPECT_EQ(full_flow_calc(true, 0.5)(0.5), 1.0);
+TEST(FlowTest, in_flow_strict_vs_rate_interaction) {
+    EXPECT_EQ(InFlow(true).strict(), true);
+    EXPECT_EQ(InFlow(true).rate(), 1.0);
+    EXPECT_EQ(InFlow(false).strict(), false);
+    EXPECT_EQ(InFlow(false).rate(), 1.0);
+    EXPECT_EQ(InFlow(0.5).strict(), false);
+    EXPECT_EQ(InFlow(0.5).rate(), 0.5);
+    EXPECT_EQ(InFlow(true, 0.5).strict(), true);
+    EXPECT_EQ(InFlow(true, 0.5).rate(), 1.0);
+    EXPECT_EQ(InFlow(false, 0.5).strict(), false);
+    EXPECT_EQ(InFlow(false, 0.5).rate(), 0.5);
+    EXPECT_EQ(InFlow(-1.0).strict(), false);
+    EXPECT_EQ(InFlow(-1.0).rate(), 0.0);
 }
 
 TEST(FlowTest, flow_cost) {
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index 6747fed888c..bdc89363b22 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -342,7 +342,8 @@ non_strict_search(Blueprint& blueprint, MatchData& md, uint32_t docid_limit, dou
 BenchmarkResult
 benchmark_search(Blueprint::UP blueprint, uint32_t docid_limit, bool strict, double filter_hit_ratio)
 {
-    blueprint->sort(strict, true);
+    auto opts = Blueprint::Options::all();
+    blueprint->sort(strict, opts);
     blueprint->fetchPostings(ExecuteInfo::createForTest(strict));
     // Note: All blueprints get the same TermFieldMatchData instance.
     //       This is OK as long as we don't do unpacking and only use 1 thread.
diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
index c9fcb472b68..64f4fafd2d1 100644
--- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
+++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
@@ -46,7 +46,8 @@ std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResul
 }
 
 Blueprint::UP finalize(Blueprint::UP bp, bool strict) {
-    Blueprint::UP result = Blueprint::optimize_and_sort(std::move(bp), true, true);
+    auto opts = Blueprint::Options::all();
+    Blueprint::UP result = Blueprint::optimize_and_sort(std::move(bp), true, opts);
     result->fetchPostings(ExecuteInfo::createForTest(strict));
     result->freeze();
     return result;
author	Håvard Pettersen <havardpe@yahooinc.com>	2024-02-23 09:41:04 +0000
committer	Håvard Pettersen <havardpe@yahooinc.com>	2024-02-23 12:23:53 +0000
commit	fbc08038079b6079fcc12b1f54eba4d59acabe83 (patch)
tree	218e277d83da7794b14ce449066df371ff4fc394 /searchlib/src/tests
parent	10267c6347197b5e9731d0a1e9169d74608178f7 (diff)