1 files changed, 128 insertions, 48 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index f4a1ade8a66..db0fe76b7af 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -26,6 +26,18 @@ using vespalib::make_string_short::fmt;
 const vespalib::string field_name = "myfield";
 double budget_sec = 1.0;
 
+double estimate_actual_cost(Blueprint &bp, InFlow in_flow) {
+    if (in_flow.strict()) {
+        assert(bp.strict());
+        return bp.strict_cost();
+    } else if (bp.strict()) {
+        auto stats = FlowStats::from(flow::DefaultAdapter(), &bp);
+        return flow::forced_strict_cost(stats, in_flow.rate());
+    } else {
+        return bp.cost() * in_flow.rate();
+    }
+}
+
 enum class PlanningAlgo {
     Order,
     Estimate,
@@ -236,7 +248,8 @@ strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, Planning
         timer.after();
     }
     FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost());
-    return {timer.min_time() * 1000.0, hits + 1, hits, flow, flow.strict_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)};
+    double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(true));
+    return {timer.min_time() * 1000.0, hits + 1, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)};
 }
 
 template <bool do_unpack>
@@ -269,7 +282,7 @@ non_strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, doub
         timer.after();
     }
     FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost());
-    double actual_cost = flow.cost * filter_hit_ratio;
+    double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(filter_hit_ratio));
     return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)};
 }
 
@@ -291,10 +304,6 @@ benchmark_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, bool
     }
 }
 
-
-
-
-
 //-----------------------------------------------------------------------------
 
 double est_forced_strict_cost(double estimate, double strict_cost, double rate) {
@@ -317,26 +326,26 @@ struct Sample {
     }
 };
 
-double find_crossover(const char *type, const auto &calculate_at, double delta) {
+double find_crossover(const char *type, const char *a, const char *b, const auto &calculate_at, double delta) {
     double min = delta;
     double max = 1.0;
     fprintf(stderr, "looking for %s crossover in the range [%g, %g]...\n", type, min, max);
     auto at_min = calculate_at(min);
     auto at_max = calculate_at(max);
-    fprintf(stderr, "  before: [%s, %s], after: [%s, %s]\n",
-            at_min.first.str().c_str(), at_max.first.str().c_str(),
-            at_min.second.str().c_str(), at_max.second.str().c_str());
-    auto best_before = [](auto values) { return (values.first < values.second); };
-    if (best_before(at_min) == best_before(at_max)) {
+    fprintf(stderr, "  %s: [%s, %s], %s: [%s, %s]\n",
+            a, at_min.first.str().c_str(), at_max.first.str().c_str(),
+            b, at_min.second.str().c_str(), at_max.second.str().c_str());
+    auto a_best = [](auto values) { return (values.first < values.second); };
+    if (a_best(at_min) == a_best(at_max)) {
         fprintf(stderr, "  NO %s CROSSOVER FOUND\n", type);
         return 0.0;
     }
     while (max > (min + delta)) {
         double x = (min + max) / 2.0;
         auto at_x = calculate_at(x);
-        fprintf(stderr, "  best@%g: %s (%s vs %s)\n", x, best_before(at_x) ? "before" : "after",
+        fprintf(stderr, "  best@%g: %s (%s vs %s)\n", x, a_best(at_x) ? a : b,
                 at_x.first.str().c_str(), at_x.second.str().c_str());
-        if (best_before(at_min) == best_before(at_x)) {
+        if (a_best(at_min) == a_best(at_x)) {
             min = x;
             at_min = at_x;
         } else {
@@ -409,11 +418,11 @@ void analyze_crossover(BenchmarkBlueprintFactory &fixed, std::function<std::uniq
     std::vector<double> results;
     std::vector<const char *> names;
     names.push_back("time crossover");
-    results.push_back(find_crossover("TIME", combine(estimate_AND_time_ms), delta));
+    results.push_back(find_crossover("TIME", "before", "after", combine(estimate_AND_time_ms), delta));
     names.push_back("cost crossover");
-    results.push_back(find_crossover("COST", combine(calculate_AND_cost), delta));
+    results.push_back(find_crossover("COST", "before", "after", combine(calculate_AND_cost), delta));
     names.push_back("abs_est crossover");
-    results.push_back(find_crossover("ABS_EST", combine(first_abs_est), delta));
+    results.push_back(find_crossover("ABS_EST", "before", "after", combine(first_abs_est), delta));
     sample_at("COST", combine(calculate_AND_cost), results, names);
     sample_at("TIME", combine(estimate_AND_time_ms), results, names);
 }
@@ -429,21 +438,37 @@ to_string(bool val)
 void
 print_result_header()
 {
-    std::cout << "|   chn | f_ratio | o_ratio | a_ratio |   f.est |    f.cost | f.scost |     hits |    seeks |  time_ms |  act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl;
+    std::cout << "| in_flow |   chn | o_ratio | a_ratio |   f.est |    f.cost | f.act_cost | f.scost | f.act_scost |     hits |    seeks |  time_ms |  act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl;
+}
+
+std::ostream &operator<<(std::ostream &dst, InFlow in_flow) {
+    auto old_w = dst.width();
+    auto old_p = dst.precision();
+    dst << std::setw(7) << std::setprecision(5);
+    if (in_flow.strict()) {
+        dst << " STRICT";
+    } else {
+        dst << in_flow.rate();
+    }
+    dst << std::setw(old_w);
+    dst << std::setprecision(old_p);
+    return dst;
 }
 
 void
-print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, double filter_hit_ratio, uint32_t num_docs)
+print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, InFlow in_flow, uint32_t num_docs)
 {
     std::cout << std::fixed << std::setprecision(5)
-              << "| " << std::setw(5) << children
-              << " | " << std::setw(7) << filter_hit_ratio
+              << "| " << in_flow
+              << " | " << std::setw(5) << children
               << " | " << std::setw(7) << op_hit_ratio
               << " | " << std::setw(7) << ((double) res.hits / (double) num_docs)
               << " | " << std::setw(6) << res.flow.estimate
               << std::setprecision(4)
               << " | " << std::setw(9) << res.flow.cost
+              << " | " << std::setw(10) << (res.flow.cost * in_flow.rate())
               << " | " << std::setw(7) << res.flow.strict_cost
+              << " | " << std::setw(11) << (in_flow.strict() ? res.flow.strict_cost : flow::forced_strict_cost(res.flow, in_flow.rate()))
               << " | " << std::setw(8) << res.hits
               << " | " << std::setw(8) << res.seeks
               << std::setprecision(3)
@@ -640,7 +665,7 @@ run_benchmark_case(const BenchmarkCaseSetup& setup)
                 if (filter_hit_ratio * setup.filter_crossover_factor <= op_hit_ratio) {
                     auto res = benchmark_search(*factory, setup.num_docs + 1,
                                                 setup.bcase.strict_context, setup.bcase.force_strict, setup.bcase.unpack_iterator, filter_hit_ratio, PlanningAlgo::Cost);
-                    print_result(res, children, op_hit_ratio, filter_hit_ratio, setup.num_docs);
+                    print_result(res, children, op_hit_ratio, InFlow(setup.bcase.strict_context, filter_hit_ratio), setup.num_docs);
                     result.add(res);
                 }
             }
@@ -681,23 +706,25 @@ run_benchmarks(const BenchmarkSetup& setup)
 void
 print_intermediate_blueprint_result_header(size_t children)
 {
+    std::cout << "| in_flow";
     // This matches the naming scheme in IntermediateBlueprintFactory.
     char name = 'A';
     for (size_t i = 0; i < children; ++i) {
-        std::cout << "| " << name++ << ".ratio ";
+        std::cout << " | " << name++ << ".ratio";
     }
-    std::cout << "|  flow.cost | flow.scost | flow.est |   ratio |     hits |    seeks | ms_per_cost |  time_ms | algo | blueprint |" << std::endl;
+    std::cout << " |  flow.cost | flow.scost | flow.est |   ratio |     hits |    seeks | ms_per_cost |  time_ms | algo | blueprint |" << std::endl;
 }
 
 void
-print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, uint32_t num_docs)
+print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, InFlow in_flow, uint32_t num_docs)
 {
-    std::cout << std::fixed << std::setprecision(5);
+    std::cout << std::fixed << std::setprecision(5)
+              << "| " << in_flow;
     for (auto ratio : children_ratios) {
-        std::cout << "| " << std::setw(7) << ratio << " ";
+        std::cout << " | " << std::setw(7) << ratio;
     }
     std::cout << std::setprecision(5)
-              << "| " << std::setw(10) << res.flow.cost
+              << " | " << std::setw(10) << res.flow.cost
               << " | " << std::setw(10) << res.flow.strict_cost
               << " | " << std::setw(8) << res.flow.estimate
               << " | " << std::setw(7) << ((double) res.hits / (double) num_docs)
@@ -745,9 +772,8 @@ struct BlueprintFactorySetup {
 
 BlueprintFactorySetup::~BlueprintFactorySetup() = default;
 
-template <typename IntermediateBlueprintFactoryType>
 void
-run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
+run_intermediate_blueprint_benchmark(auto factory_factory, std::vector<InFlow> in_flows, const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
 {
     print_intermediate_blueprint_result_header(2);
     double max_speedup = 0.0;
@@ -755,26 +781,28 @@ run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const Bluep
     for (double b_hit_ratio: b.op_hit_ratios) {
         auto b_factory = b.make_factory_shared(num_docs, b_hit_ratio);
         for (double a_hit_ratio : a.op_hit_ratios) {
-            IntermediateBlueprintFactoryType factory;
-            factory.add_child(a.make_factory(num_docs, a_hit_ratio));
-            factory.add_child(b_factory);
+            auto factory = factory_factory();
+            factory->add_child(a.make_factory(num_docs, a_hit_ratio));
+            factory->add_child(b_factory);
             double time_ms_esti = 0.0;
-            for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost,
-                             PlanningAlgo::CostForceStrict}) {
-                auto res = benchmark_search(factory, num_docs + 1, true, false, false, 1.0, algo);
-                print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, num_docs);
-                if (algo == PlanningAlgo::Estimate) {
-                    time_ms_esti = res.time_ms;
-                }
-                if (algo == PlanningAlgo::CostForceStrict) {
-                    double speedup = time_ms_esti / res.time_ms;
-                    if (speedup > max_speedup) {
-                        max_speedup = speedup;
+            for (InFlow in_flow: in_flows) {
+                for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost,
+                            PlanningAlgo::CostForceStrict}) {
+                    auto res = benchmark_search(*factory, num_docs + 1, in_flow.strict(), false, false, in_flow.rate(), algo);
+                    print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, in_flow, num_docs);
+                    if (algo == PlanningAlgo::Estimate) {
+                        time_ms_esti = res.time_ms;
                     }
-                    if (speedup < min_speedup) {
-                        min_speedup = speedup;
+                    if (algo == PlanningAlgo::CostForceStrict) {
+                        double speedup = time_ms_esti / res.time_ms;
+                        if (speedup > max_speedup) {
+                            max_speedup = speedup;
+                        }
+                        if (speedup < min_speedup) {
+                            min_speedup = speedup;
+                        }
+                        std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl;
                     }
-                    std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl;
                 }
             }
         }
@@ -786,7 +814,19 @@ void
 run_and_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
 {
     std::cout << "AND[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl;
-    run_intermediate_blueprint_benchmark<AndBlueprintFactory>(a, b, num_docs);
+    run_intermediate_blueprint_benchmark([](){ return std::make_unique<AndBlueprintFactory>(); }, {true}, a, b, num_docs);
+}
+
+void
+run_source_blender_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
+{
+    std::cout << "SB[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl;
+    auto factory_factory = [&](){
+                               auto factory = std::make_unique<SourceBlenderBlueprintFactory>();
+                               factory->init_selector([](uint32_t i){ return (i%10 == 0) ? 1 : 2; }, num_docs + 1);
+                               return factory;
+                           };
+    run_intermediate_blueprint_benchmark(factory_factory, {true, 0.75, 0.5, 0.25, 0.1, 0.01, 0.001}, a, b, num_docs);
 }
 
 //-------------------------------------------------------------------------------------
@@ -970,16 +1010,40 @@ TEST(IteratorBenchmark, analyze_AND_bitvector_vs_IN)
     }
 }
 
+TEST(IteratorBenchmark, analyze_strict_SOURCEBLENDER_memory_and_disk)
+{
+    for (double small_ratio: {0.001, 0.005, 0.01, 0.05}) {
+        run_source_blender_benchmark({str_fs, QueryOperator::Term, {small_ratio}},
+                                     {str_index, QueryOperator::Term, {small_ratio * 10}},
+                                     num_docs);
+    }
+}
+
 TEST(IteratorBenchmark, analyze_OR_non_strict_fs)
 {
     for (auto or_hit_ratio : {0.01, 0.1, 0.5}) {
         BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {false}, {or_hit_ratio},
                              {2, 4, 6, 8, 10, 100, 1000});
+        //setup.force_strict = true;
         setup.filter_hit_ratios = gen_ratios(or_hit_ratio, 10.0, 13);
         run_benchmarks(setup);
     }
 }
 
+TEST(IteratorBenchmark, analyze_OR_non_strict_fs_child_est_adjust)
+{
+    for (auto or_hit_ratio : {0.01, 0.1, 0.5}) {
+        for (uint32_t children : {2, 4, 6, 8, 10, 100, 1000}) {
+            double child_est = or_hit_ratio / children;
+            BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {false}, {or_hit_ratio},
+                                 {children});
+            //setup.force_strict = true;
+            setup.filter_hit_ratios = gen_ratios(child_est, 10.0, 13);
+            run_benchmarks(setup);
+        }
+    }
+}
+
 TEST(IteratorBenchmark, analyze_OR_non_strict_non_fs)
 {
     BenchmarkSetup setup(num_docs, {int32}, {QueryOperator::Or}, {false}, {0.1}, {2, 4, 6, 8, 10});
@@ -1008,6 +1072,22 @@ TEST(IteratorBenchmark, analyze_btree_vs_bitvector_iterators_strict)
     run_benchmarks(setup);
 }
 
+TEST(IteratorBenchmark, btree_vs_array_nonstrict_crossover) {
+    for (double hit_ratio: { 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
+                              0.01,  0.02,  0.03,  0.04,  0.05,  0.06,  0.07,  0.08,  0.09,
+                               0.1,   0.2,   0.3,   0.4,   0.5,   0.6,   0.7,   0.8,   0.9,
+                              0.91,  0.92,  0.93,  0.94,  0.95,  0.96,  0.97,  0.98,  0.99, 1.0})
+    {
+        auto btree = make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, hit_ratio, 1, false);
+        auto array = make_blueprint_factory(   int32_array, QueryOperator::Term, num_docs, 0, hit_ratio, 1, false);
+        auto time_ms = [&](auto &bpf, double in_flow) {
+                           return Sample(benchmark_search(bpf, num_docs + 1, false, false, false, in_flow, PlanningAlgo::Cost).time_ms);
+                       };
+        auto calculate_at = [&](double in_flow) { return std::make_pair(time_ms(*btree, in_flow), time_ms(*array, in_flow)); };
+        fprintf(stderr, "btree/array crossover@%5.3f: %8.6f\n", hit_ratio, find_crossover("TIME", "btree", "array", calculate_at, 0.0001));
+    }
+}
+
 int main(int argc, char **argv) {
     ::testing::InitGoogleTest(&argc, argv);
     int res = RUN_ALL_TESTS();