benchmark effect of being strict in a non-strict context

author: Håvard Pettersen <havardpe@yahooinc.com> 2024-02-09 12:46:08 +0000
committer: Håvard Pettersen <havardpe@yahooinc.com> 2024-02-09 15:11:31 +0000
commit: e73569c0b7c3f97af98545df4381c3f76b2e0cae (patch)
tree: 309cbdbfc50de861c0b01bd836d3c9784e559ee5
parent: 2885e639279cb6b0b6d9cc44bc9c15d3969a522a (diff)
3 files changed, 58 insertions, 10 deletions
diff --git a/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp b/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp
index 502d2b39019..ac88515bb1b 100644
--- a/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp
+++ b/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp
@@ -120,9 +120,11 @@ struct OrSetup {
     std::vector<std::unique_ptr<TMD>> match_data;
     std::vector<BitVector::UP>        child_hits;
     std::vector<bool>                 use_array;
+    bool                              strict_bm = true;
     bool                              strict_or = true;
     bool                              strict_children = true;
     bool                              unwrap_single_child = true;
+    uint32_t                          docid_skip = 1;
     OrSetup(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {}
     size_t per_child(double target, size_t child_cnt) {
         size_t result = (docid_limit * target) / child_cnt;
@@ -182,9 +184,9 @@ struct OrSetup {
         }
         return result;
     }
-    OrSetup &prepare_bm(size_t child_cnt, size_t hits_per_child) {
+    OrSetup &prepare_bm(size_t child_cnt, size_t hits_per_child, bool use_array_in) {
         for (size_t i = 0; i < child_cnt; ++i) {
-            add(hits_per_child, should_use_array(hits_per_child), false);
+            add(hits_per_child, use_array_in, false);
         }
         return *this;
     }
@@ -213,10 +215,11 @@ struct OrSetup {
         size_t seeks = 0;
         BenchmarkTimer timer(budget);
         while (timer.has_budget()) {
+            uint32_t delta = docid_skip;
             timer.before();
             seeks = 0;
             search.initRange(1, docid_limit);
-            for (uint32_t docid = 1; docid < docid_limit; ++docid) {
+            for (uint32_t docid = 1; docid < docid_limit; docid += delta) {
                 ++seeks;
                 search.seek(docid);
             }
@@ -225,7 +228,7 @@ struct OrSetup {
         return std::make_pair(seeks, timer.min_time() * 1000.0);
     }
     std::pair<size_t,double> bm_search_ms(Impl impl, bool optimized) {
-        if (strict_or) {
+        if (strict_bm) {
             return bm_strict(impl, optimized);
         } else {
             return bm_non_strict(impl, optimized);
@@ -378,6 +381,7 @@ TEST(OrSpeed, bm_array_vs_bitvector) {
         for (bool wrapped: {false, true}) {
             setup.unwrap_single_child = !wrapped;
             for (bool strict: {false, true}) {
+                setup.strict_bm = strict;
                 setup.strict_or = strict;
                 setup.strict_children = strict;
                 for (bool use_array: {false, true}) {
@@ -394,6 +398,46 @@ TEST(OrSpeed, bm_array_vs_bitvector) {
     }
 }
 
+TEST(OrSpeed, bm_strict_when_not_needed) {
+    if (!bench_mode) {
+        fprintf(stdout, "[ SKIPPING ] run with 'bench' parameter to activate\n");
+        return;
+    }
+    double target = 0.05;
+    size_t child_cnt = 200;
+    auto impl = Impl::HEAP;
+    bool optimize = false;
+    OrSetup setup(bench_docs);
+    size_t part = setup.per_child(target, child_cnt);
+    bool use_array = false;
+    setup.prepare_bm(child_cnt, part, use_array);
+    fprintf(stderr, "OR bench(%s, %s, children: %4zu, hits_per_child: %8zu %s)\n",
+            impl_str(impl), opt_str(optimize), child_cnt, part, leaf_str(use_array));
+    for (bool strict_bm: {false, true}) {
+        setup.strict_bm = strict_bm;
+        for (bool strict_or: {false, true}) {
+            setup.strict_or = strict_or;
+            for (bool strict_children: {false, true}) {
+                setup.strict_children = strict_children;
+                for (uint32_t skip = 1; skip < 500'000; skip *= 4) {
+                    setup.docid_skip = skip;
+                    bool conflict = (strict_bm && !strict_or) || (strict_or && !strict_children) || (strict_bm && skip > 1);
+                    if (!conflict) {
+                        auto result = setup.bm_search_ms(impl, optimize);
+                        auto flow_stats = FlowAdapter::stats(setup);
+                        double in_flow = 1.0 / double(skip); // NOTE: not multiplied with strict cost
+                        double ms_per_cost = result.second / (strict_or ? flow_stats.strict_cost : (in_flow * flow_stats.cost));
+                        fprintf(stderr, "loop: %s, skip: %8u, OR: %s, children: %s, "
+                                "seeks: %8zu, time: %10.3f ms, ns per seek: %10.3f, ms per cost: %10.3f\n",
+                                strict_str(strict_bm), skip, strict_str(strict_or), strict_str(strict_children), 
+                                result.first, result.second, ns_per(result), ms_per_cost);
+                    }
+                }
+            }
+        }
+    }
+}
+
 TEST(OrSpeed, bm_strict_or) {
     if (!bench_mode) {
         fprintf(stdout, "[ SKIPPING ] run with 'bench' parameter to activate\n");
@@ -406,9 +450,10 @@ TEST(OrSpeed, bm_strict_or) {
                 size_t part = setup.per_child(target, child_cnt);
                 bool use_array = setup.should_use_array(part);
                 if (part > 0 && (!use_array || !optimize)) {
-                    setup.prepare_bm(child_cnt, part);
+                    setup.prepare_bm(child_cnt, part, use_array);
                     for (auto impl: {Impl::PLAIN, Impl::HEAP}) {
                         for (bool strict: {false, true}) {
+                            setup.strict_bm = strict;
                             setup.strict_or = strict;
                             setup.strict_children = strict;
                             auto result = setup.bm_search_ms(impl, optimize);
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index 510351a4843..439eff680ec 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -265,12 +265,12 @@ public:
     // seen by optimize. When the calculate_flow_stats function is
     // called on a complex leaf, it can call the update_flow_stats
     // function directly (the function that is normally called by
-    // optimize) on interal blueprints to make these values available
+    // optimize) on internal blueprints to make these values available
     // before using them to calculate its own flow stats.
     //
     //    'estimate': relative estimate in the range [0,1]
-    //        'cost': per-document cost of non-strict evaluation
-    // 'strict_cost': per-document cost of strict evaluation
+    //        'cost': cost of non-strict evaluation: multiply by non-strict in-flow
+    // 'strict_cost': cost of strict evaluation: assuming strict in-flow of 1.0
     double estimate() const noexcept { return _flow_stats.estimate; }
     double cost() const noexcept { return _flow_stats.cost; }
     double strict_cost() const noexcept { return _flow_stats.strict_cost; }
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index 426aa077db2..1dc6e6aef55 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -91,8 +91,11 @@ template <typename ADAPTER, typename T, typename F>
 double ordered_cost_of(ADAPTER adapter, const T &children, F flow) {
     double cost = 0.0;
     for (const auto &child: children) {
-        double child_cost = flow.strict() ? adapter.strict_cost(child) : adapter.cost(child);
-        cost += flow.flow() * child_cost;
+        if (flow.strict()) {
+            cost += adapter.strict_cost(child);
+        } else {
+            cost += flow.flow() * adapter.cost(child);
+        }
         flow.add(adapter.estimate(child));
     }
     return cost;
author	Håvard Pettersen <havardpe@yahooinc.com>	2024-02-09 12:46:08 +0000
committer	Håvard Pettersen <havardpe@yahooinc.com>	2024-02-09 15:11:31 +0000
commit	e73569c0b7c3f97af98545df4381c3f76b2e0cae (patch)
tree	309cbdbfc50de861c0b01bd836d3c9784e559ee5
parent	2885e639279cb6b0b6d9cc44bc9c15d3969a522a (diff)