5 files changed, 133 insertions, 33 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/attribute_ctx_builder.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/attribute_ctx_builder.cpp
index e776b6a9379..6a2b306522d 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/attribute_ctx_builder.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/attribute_ctx_builder.cpp
@@ -25,13 +25,13 @@ populate_attribute(AttributeType& attr, uint32_t docid_limit, const HitSpecs& hi
         docids->foreach_truebit([&](uint32_t docid) {
             if constexpr (is_string) {
                 if constexpr (is_multivalue) {
-                    attr.append(docid, std::to_string(spec.term_value), 1);
+                    attr.append(docid, std::to_string(spec.term_value), random_int(1, 100));
                 } else {
                     attr.update(docid, std::to_string(spec.term_value));
                 }
             } else {
                 if constexpr (is_multivalue) {
-                    attr.append(docid, spec.term_value, 1);
+                    attr.append(docid, spec.term_value, random_int(1, 100));
                 } else {
                     attr.update(docid, spec.term_value);
                 }
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/benchmark_blueprint_factory.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/benchmark_blueprint_factory.cpp
index 15690fd71d5..516a819aae8 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/benchmark_blueprint_factory.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/benchmark_blueprint_factory.cpp
@@ -18,6 +18,7 @@ using search::query::Node;
 using search::query::SimpleDotProduct;
 using search::query::SimpleInTerm;
 using search::query::SimpleStringTerm;
+using search::query::SimpleWandTerm;
 using search::query::SimpleWeightedSetTerm;
 using search::query::Weight;
 
@@ -83,6 +84,17 @@ make_query_node(QueryOperator query_op, const TermVector& terms)
             res->addTerm(term, Weight(1));
         }
         return res;
+    } else if (query_op == QueryOperator::ParallelWeakAnd) {
+        // These config values match the defaults (see WandItem.java):
+        uint32_t target_hits = 100;
+        int64_t score_threshold = 0;
+        double threshold_boost_factor = 1.0;
+        auto res = std::make_unique<SimpleWandTerm>(terms.size(), field_name, 0, Weight(1),
+                                                    target_hits, score_threshold, threshold_boost_factor);
+        for (auto term : terms) {
+            res->addTerm(term, Weight(random_int(1, 100)));
+        }
+        return res;
     }
     return {};
 }
@@ -97,15 +109,18 @@ make_leaf_blueprint(const Node& node, BenchmarkSearchable& searchable, uint32_t
     return blueprint;
 }
 
-template <typename BlueprintType>
 Blueprint::UP
-make_intermediate_blueprint(BenchmarkSearchable& searchable, const TermVector& terms, uint32_t docid_limit)
+make_intermediate_blueprint(std::unique_ptr<IntermediateBlueprint> blueprint, BenchmarkSearchable& searchable, const TermVector& terms, uint32_t docid_limit)
 {
-    auto blueprint = std::make_unique<BlueprintType>();
+    auto* weak_and = blueprint->asWeakAnd();
     for (auto term : terms) {
         SimpleStringTerm sterm(std::to_string(term), field_name, 0, Weight(1));
         auto child = make_leaf_blueprint(sterm, searchable, docid_limit);
-        blueprint->addChild(std::move(child));
+        if (weak_and != nullptr) {
+            weak_and->addTerm(std::move(child), random_int(1, 100));
+        } else {
+            blueprint->addChild(std::move(child));
+        }
     }
     blueprint->setDocIdLimit(docid_limit);
     blueprint->update_flow_stats(docid_limit);
@@ -116,9 +131,12 @@ Blueprint::UP
 make_blueprint_helper(BenchmarkSearchable& searchable, QueryOperator query_op, const TermVector& terms, uint32_t docid_limit)
 {
     if (query_op == QueryOperator::And) {
-        return make_intermediate_blueprint<AndBlueprint>(searchable, terms, docid_limit);
+        return make_intermediate_blueprint(std::make_unique<AndBlueprint>(), searchable, terms, docid_limit);
     } else if (query_op == QueryOperator::Or) {
-        return make_intermediate_blueprint<OrBlueprint>(searchable, terms, docid_limit);
+        return make_intermediate_blueprint(std::make_unique<OrBlueprint>(), searchable, terms, docid_limit);
+    } else if (query_op == QueryOperator::WeakAnd) {
+        uint32_t target_hits = 100;
+        return make_intermediate_blueprint(std::make_unique<WeakAndBlueprint>(target_hits), searchable, terms, docid_limit);
     } else {
         auto query_node = make_query_node(query_op, terms);
         return make_leaf_blueprint(*query_node, searchable, docid_limit);
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp
index b937f6a2f00..f17403bd33a 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp
@@ -35,6 +35,8 @@ to_string(QueryOperator query_op)
         case QueryOperator::DotProduct: return "DotProduct";
         case QueryOperator::And: return "And";
         case QueryOperator::Or: return "Or";
+        case QueryOperator::WeakAnd: return "WeakAnd";
+        case QueryOperator::ParallelWeakAnd: return "ParallelWeakAnd";
     }
     return "unknown";
 }
@@ -71,4 +73,11 @@ random_docids(uint32_t docid_limit, uint32_t count)
     return res;
 }
 
+int32_t
+random_int(int32_t a, int32_t b)
+{
+    std::uniform_int_distribution<int32_t> distr(a, b);
+    return distr(gen);
+}
+
 }
 \ No newline at end of file
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/common.h b/searchlib/src/tests/queryeval/iterator_benchmark/common.h
index 6d890910271..45fd82b091c 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/common.h
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/common.h
@@ -39,7 +39,9 @@ enum class QueryOperator {
     WeightedSet,
     DotProduct,
     And,
-    Or
+    Or,
+    WeakAnd,
+    ParallelWeakAnd
 };
 
 vespalib::string to_string(QueryOperator query_op);
@@ -78,4 +80,6 @@ public:
 
 BitVector::UP random_docids(uint32_t docid_limit, uint32_t count);
 
+int32_t random_int(int32_t a, int32_t b);
+
 }
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index 33c2e067255..5b061697220 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -153,40 +153,87 @@ get_class_name(const auto& obj)
     return res;
 }
 
+struct MatchLoopContext {
+    Blueprint::UP blueprint;
+    MatchData::UP match_data;
+    SearchIterator::UP iterator;
+    MatchLoopContext() : blueprint(), match_data(), iterator() {}
+    MatchLoopContext(Blueprint::UP blueprint_in,
+                     MatchData::UP match_data_in,
+                     SearchIterator::UP iterator_in)
+        : blueprint(std::move(blueprint_in)),
+          match_data(std::move(match_data_in)),
+          iterator(std::move(iterator_in))
+    {}
+    void operator=(MatchLoopContext&& rhs) {
+        blueprint = std::move(rhs.blueprint);
+        match_data = std::move(rhs.match_data);
+        iterator = std::move(rhs.iterator);
+    }
+    ~MatchLoopContext();
+};
+
+MatchLoopContext::~MatchLoopContext() = default;
+
+MatchLoopContext
+make_match_loop_context(BenchmarkBlueprintFactory& factory, bool strict, uint32_t docid_limit)
+{
+    auto blueprint = factory.make_blueprint();
+    assert(blueprint);
+    blueprint->basic_plan(strict, docid_limit);
+    blueprint->fetchPostings(ExecuteInfo::FULL);
+    // Note: All blueprints get the same TermFieldMatchData instance.
+    //       This is OK as long as we don't do unpacking and only use 1 thread.
+    auto md = MatchData::makeTestInstance(1, 1);
+    auto itr = blueprint->createSearch(*md);
+    assert(itr);
+    return {std::move(blueprint), std::move(md), std::move(itr)};
+}
+
+template <bool do_unpack>
 BenchmarkResult
-strict_search(Blueprint& blueprint, MatchData& md, uint32_t docid_limit)
+strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit)
 {
-    auto itr = blueprint.createSearch(md);
-    assert(itr.get());
     BenchmarkTimer timer(budget_sec);
     uint32_t hits = 0;
+    MatchLoopContext ctx;
     while (timer.has_budget()) {
+        ctx = make_match_loop_context(factory, true, docid_limit);
+        auto* itr = ctx.iterator.get();
         timer.before();
         hits = 0;
         itr->initRange(1, docid_limit);
         uint32_t docid = itr->seekFirst(1);
+        if constexpr (do_unpack) {
+            itr->unpack(docid);
+        }
         while (docid < docid_limit) {
             ++hits;
             docid = itr->seekNext(docid + 1);
+            if constexpr (do_unpack) {
+                itr->unpack(docid);
+            }
         }
         timer.after();
     }
-    FlowStats flow(blueprint.estimate(), blueprint.cost(), blueprint.strict_cost());
-    return {timer.min_time() * 1000.0, hits + 1, hits, flow, flow.strict_cost, flow.strict_cost, get_class_name(*itr), get_class_name(blueprint)};
+    FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost());
+    return {timer.min_time() * 1000.0, hits + 1, hits, flow, flow.strict_cost, flow.strict_cost, get_class_name(*ctx.iterator), get_class_name(*ctx.blueprint)};
 }
 
+template <bool do_unpack>
 BenchmarkResult
-non_strict_search(Blueprint& blueprint, MatchData& md, uint32_t docid_limit, double filter_hit_ratio)
+non_strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, double filter_hit_ratio, bool force_strict)
 {
-    auto itr = blueprint.createSearch(md);
-    assert(itr.get());
     BenchmarkTimer timer(budget_sec);
     uint32_t seeks = 0;
     uint32_t hits = 0;
     // This simulates a filter that is evaluated before this iterator.
     // The filter returns 'filter_hit_ratio' amount of the document corpus.
     uint32_t docid_skip = 1.0 / filter_hit_ratio;
+    MatchLoopContext ctx;
     while (timer.has_budget()) {
+        ctx = make_match_loop_context(factory, force_strict, docid_limit);
+        auto* itr = ctx.iterator.get();
         timer.before();
         seeks = 0;
         hits = 0;
@@ -195,29 +242,35 @@ non_strict_search(Blueprint& blueprint, MatchData& md, uint32_t docid_limit, dou
             ++seeks;
             if (itr->seek(docid)) {
                 ++hits;
+                if constexpr (do_unpack) {
+                    itr->unpack(docid);
+                }
             }
         }
         timer.after();
     }
-    FlowStats flow(blueprint.estimate(), blueprint.cost(), blueprint.strict_cost());
+    FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost());
     double actual_cost = flow.cost * filter_hit_ratio;
     // This is an attempt to calculate an alternative actual cost for strict / posting list iterators that are used in a non-strict context.
     double alt_cost = flow.strict_cost + 0.5 * filter_hit_ratio;
-    return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, alt_cost, get_class_name(*itr), get_class_name(blueprint)};
+    return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, alt_cost, get_class_name(*ctx.iterator), get_class_name(*ctx.blueprint)};
 }
 
 BenchmarkResult
-benchmark_search(Blueprint::UP blueprint, uint32_t docid_limit, bool strict_context, bool force_strict, double filter_hit_ratio)
+benchmark_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, bool strict_context, bool force_strict, bool unpack_iterator, double filter_hit_ratio)
 {
-    blueprint->basic_plan(strict_context || force_strict, docid_limit);
-    blueprint->fetchPostings(ExecuteInfo::FULL);
-    // Note: All blueprints get the same TermFieldMatchData instance.
-    //       This is OK as long as we don't do unpacking and only use 1 thread.
-    auto md = MatchData::makeTestInstance(1, 1);
     if (strict_context) {
-        return strict_search(*blueprint, *md, docid_limit);
+        if (unpack_iterator) {
+            return strict_search<true>(factory, docid_limit);
+        } else {
+            return strict_search<false>(factory, docid_limit);
+        }
     } else {
-        return non_strict_search(*blueprint, *md, docid_limit, filter_hit_ratio);
+        if (unpack_iterator) {
+            return non_strict_search<true>(factory, docid_limit, filter_hit_ratio, force_strict);
+        } else {
+            return non_strict_search<false>(factory, docid_limit, filter_hit_ratio, force_strict);
+        }
     }
 }
 
@@ -230,18 +283,19 @@ to_string(bool val)
 void
 print_result_header()
 {
-    std::cout << "|  chn | f_ratio | o_ratio | a_ratio |  f.est |  f.cost | f.scost |     hits |    seeks |  time_ms | act_cost | alt_cost | ns_per_seek | ms_per_act_cost | ms_per_alt_cost | iterator | blueprint |" << std::endl;
+    std::cout << "|  chn | f_ratio | o_ratio | a_ratio |   f.est |  f.cost | f.scost |     hits |    seeks |  time_ms | act_cost | alt_cost | ns_per_seek | ms_per_act_cost | ms_per_alt_cost | iterator | blueprint |" << std::endl;
 }
 
 void
 print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, double filter_hit_ratio, uint32_t num_docs)
 {
-    std::cout << std::fixed << std::setprecision(4)
+    std::cout << std::fixed << std::setprecision(5)
               << "| " << std::setw(4) << children
               << " | " << std::setw(7) << filter_hit_ratio
               << " | " << std::setw(7) << op_hit_ratio
               << " | " << std::setw(7) << ((double) res.hits / (double) num_docs)
               << " | " << std::setw(6) << res.flow.estimate
+              << std::setprecision(4)
               << " | " << std::setw(7) << res.flow.cost
               << " | " << std::setw(7) << res.flow.strict_cost
               << " | " << std::setw(8) << res.hits
@@ -274,11 +328,13 @@ struct BenchmarkCase {
     QueryOperator query_op;
     bool strict_context;
     bool force_strict;
+    bool unpack_iterator;
     BenchmarkCase(const FieldConfig& field_cfg_in, QueryOperator query_op_in, bool strict_context_in)
         : field_cfg(field_cfg_in),
           query_op(query_op_in),
           strict_context(strict_context_in),
-          force_strict(false)
+          force_strict(false),
+          unpack_iterator(false)
     {}
     vespalib::string to_string() const {
         return "op=" + search::queryeval::test::to_string(query_op) + ", cfg=" + field_cfg.to_string() +
@@ -359,7 +415,7 @@ struct BenchmarkCaseSetup {
           child_counts(child_counts_in),
           filter_hit_ratios({1.0}),
           default_values_per_document(0),
-          filter_crossover_factor(1.0)
+          filter_crossover_factor(0.0)
     {}
     ~BenchmarkCaseSetup() {}
 };
@@ -373,6 +429,7 @@ struct BenchmarkSetup {
     std::vector<uint32_t> child_counts;
     std::vector<double> filter_hit_ratios;
     bool force_strict;
+    bool unpack_iterator;
     uint32_t default_values_per_document;
     double filter_crossover_factor;
     BenchmarkSetup(uint32_t num_docs_in,
@@ -389,8 +446,9 @@ struct BenchmarkSetup {
           child_counts(child_counts_in),
           filter_hit_ratios({1.0}),
           force_strict(false),
+          unpack_iterator(false),
           default_values_per_document(0),
-          filter_crossover_factor(1.0)
+          filter_crossover_factor(0.0)
     {}
     BenchmarkSetup(uint32_t num_docs_in,
                    const std::vector<FieldConfig>& field_cfgs_in,
@@ -402,6 +460,7 @@ struct BenchmarkSetup {
     BenchmarkCaseSetup make_case_setup(const BenchmarkCase& bcase) const {
         BenchmarkCaseSetup res(num_docs, bcase, op_hit_ratios, child_counts);
         res.bcase.force_strict = force_strict;
+        res.bcase.unpack_iterator = unpack_iterator;
         res.default_values_per_document = default_values_per_document;
         if (!bcase.strict_context) {
             // Simulation of a filter is only relevant in a non-strict context.
@@ -431,7 +490,8 @@ run_benchmark_case(const BenchmarkCaseSetup& setup)
                                                   op_hit_ratio, children);
             for (double filter_hit_ratio : setup.filter_hit_ratios) {
                 if (filter_hit_ratio * setup.filter_crossover_factor <= op_hit_ratio) {
-                    auto res = benchmark_search(factory->make_blueprint(), setup.num_docs + 1, setup.bcase.strict_context, setup.bcase.force_strict, filter_hit_ratio);
+                    auto res = benchmark_search(*factory, setup.num_docs + 1,
+                                                setup.bcase.strict_context, setup.bcase.force_strict, setup.bcase.unpack_iterator, filter_hit_ratio);
                     print_result(res, children, op_hit_ratio, filter_hit_ratio, setup.num_docs);
                     result.add(res);
                 }
@@ -545,6 +605,15 @@ TEST(IteratorBenchmark, analyze_complex_leaf_operators)
     run_benchmarks(setup);
 }
 
+TEST(IteratorBenchmark, analyze_weak_and_operators)
+{
+    std::vector<FieldConfig> field_cfgs = {int32_wset_fs};
+    std::vector<QueryOperator> query_ops = {QueryOperator::WeakAnd, QueryOperator::ParallelWeakAnd};
+    BenchmarkSetup setup(num_docs, field_cfgs, query_ops, {true, false}, base_hit_ratios, {1, 2, 10, 100});
+    setup.unpack_iterator = true;
+    run_benchmarks(setup);
+}
+
 TEST(IteratorBenchmark, term_benchmark)
 {
     BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Term}, {true, false}, base_hit_ratios);