From cfd38de6fb17cef4683c67b67ec0fc5cf499379c Mon Sep 17 00:00:00 2001
From: Geir Storli <geirst@yahooinc.com>
Date: Wed, 14 Feb 2024 16:35:16 +0000
Subject: Analyze term search in attributes without fast-search.

This results in a breakdown of average "ms per cost" for the different attribute types,
with suggestions on how the blueprint costs can be tuned to match the benchmark results.
---
 .../iterator_benchmark/iterator_benchmark_test.cpp | 320 +++++++++++++++++----
 1 file changed, 264 insertions(+), 56 deletions(-)
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index 54119722b69..9c084db4a5d 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -17,8 +17,8 @@
 #include <vespa/searchlib/queryeval/intermediate_blueprints.h>
 #include <vespa/searchlib/test/mock_attribute_context.h>
 #include <vespa/vespalib/gtest/gtest.h>
-#include <vespa/vespalib/stllike/hash_set.h>
 #include <vespa/vespalib/util/benchmark_timer.h>
+#include <cmath>
 #include <numeric>
 #include <random>
 #include <vector>
@@ -42,6 +42,11 @@ BitVector::UP
 random_docids(uint32_t docid_limit, uint32_t count)
 {
     auto res = BitVector::create(docid_limit);
+    if ((count + 1) == docid_limit) {
+        res->notSelf();
+        res->clearBit(0);
+        return res;
+    }
     uint32_t docids_left = count;
     // Bit 0 is never set since it is reserved as docid 0.
     // All other docids have equal probability to be set.
@@ -63,17 +68,32 @@ struct HitSpec {
     HitSpec(uint32_t term_value_in, uint32_t num_hits_in) : term_value(term_value_in), num_hits(num_hits_in) {}
 };
 
-using HitSpecs = std::vector<HitSpec>;
+namespace benchmark {
+using TermVector = std::vector<uint32_t>;
+}
 
-HitSpecs
-make_hit_specs(uint32_t num_terms, uint32_t hits_per_term, uint32_t first_term_value)
-{
-    HitSpecs res;
-    for (uint32_t i = 0; i < num_terms; ++i) {
-        res.push_back({first_term_value + i, hits_per_term});
+class HitSpecs {
+private:
+    std::vector<HitSpec> _specs;
+    uint32_t _next_term_value;
+
+public:
+    HitSpecs(uint32_t first_term_value)
+        : _specs(), _next_term_value(first_term_value)
+    {
     }
-    return res;
-}
+    benchmark::TermVector add(uint32_t num_terms, uint32_t hits_per_term) {
+        benchmark::TermVector res;
+        for (uint32_t i = 0; i < num_terms; ++i) {
+            uint32_t term_value = _next_term_value++;
+            _specs.push_back({term_value, hits_per_term});
+            res.push_back(term_value);
+        }
+        return res;
+    }
+    auto begin() const { return _specs.begin(); }
+    auto end() const { return _specs.end(); }
+};
 
 template <typename AttributeType, bool is_string, bool is_multivalue>
 void
@@ -96,7 +116,6 @@ populate_attribute(AttributeType& attr, uint32_t docid_limit, const HitSpecs& hi
                 }
             }
         });
-        attr.commit(true);
     }
 }
 
@@ -124,6 +143,7 @@ make_attribute(const Config& cfg, uint32_t num_docs, const HitSpecs& hit_specs)
             populate_attribute<IntegerAttribute, false, false>(real, docid_limit, hit_specs);
         }
     }
+    attr->commit(true);
     return attr;
 }
 
@@ -146,8 +166,82 @@ struct BenchmarkResult {
     BenchmarkResult() : BenchmarkResult(0, 0, 0, 0, 0, "") {}
     BenchmarkResult(double time_ms_in, uint32_t seeks_in, uint32_t hits_in, double estimate_in, double cost_in, const vespalib::string& iterator_name_in)
         : time_ms(time_ms_in), seeks(seeks_in), hits(hits_in), estimate(estimate_in), cost(cost_in), iterator_name(iterator_name_in) {}
-    double time_per_seek_ns() const { return (time_ms / seeks) * 1000.0 * 1000.0; }
-    double time_per_cost_ms() const { return (time_ms / cost); }
+    double ns_per_seek() const { return (time_ms / seeks) * 1000.0 * 1000.0; }
+    double ms_per_cost() const { return (time_ms / cost); }
+};
+
+struct Stats {
+    double average;
+    double median;
+    double std_dev;
+    Stats() : average(0.0), median(0.0), std_dev(0.0) {}
+    Stats(double average_in, double median_in, double std_dev_in)
+        : average(average_in), median(median_in), std_dev(std_dev_in)
+    {}
+    vespalib::string to_string() const {
+        std::ostringstream oss;
+        oss << std::fixed << std::setprecision(3);
+        oss << "{average=" << average << ", median=" << median << ", std_dev=" << std_dev << "}";
+        return oss.str();
+    }
+};
+
+class BenchmarkResults {
+private:
+    std::vector<BenchmarkResult> _results;
+
+    template <typename F>
+    std::vector<double> extract_sorted_values(F func) const {
+        std::vector<double> values;
+        for (const auto& res: _results) {
+            values.push_back(func(res));
+        }
+        std::sort(values.begin(), values.end());
+        return values;
+    }
+
+    double calc_median(const std::vector<double>& values) const {
+        size_t middle = values.size() / 2;
+        if (values.size() % 2 == 0) {
+            return (values[middle - 1] + values[middle]) / 2;
+        } else {
+            return values[middle];
+        }
+    }
+
+    double calc_standard_deviation(const std::vector<double>& values, double average) const {
+        double deviations = 0.0;
+        for (double val : values) {
+            double diff = val - average;
+            deviations += (diff * diff);
+        }
+        double variance = deviations / values.size();
+        return std::sqrt(variance);
+    }
+
+    template <typename F>
+    Stats calc_stats(F func) const {
+        auto values = extract_sorted_values(func);
+        double average = std::accumulate(values.begin(), values.end(), 0.0) / values.size();
+        double median = calc_median(values);
+        double std_dev = calc_standard_deviation(values, average);
+        return {average, median, std_dev};
+    }
+
+public:
+    BenchmarkResults(): _results() {}
+    void add(const BenchmarkResult& res) {
+        _results.push_back(res);
+    }
+    Stats time_ms_stats() const {
+        return calc_stats([](const auto& res){ return res.time_ms; });
+    }
+    Stats ns_per_seek_stats() const {
+        return calc_stats([](const auto& res){ return res.ns_per_seek(); });
+    }
+    Stats ms_per_cost_stats() const {
+        return calc_stats([](const auto& res){ return res.ms_per_cost(); });
+    }
 };
 
 BenchmarkResult
@@ -245,28 +339,39 @@ to_string(QueryOperator query_op)
     return "unknown";
 }
 
+vespalib::string
+to_string(const Config& attr_config)
+{
+    auto col_type = attr_config.collectionType();
+    auto basic_type = attr_config.basicType();
+    if (col_type == CollectionType::SINGLE) {
+        return basic_type.asString();
+    }
+    return vespalib::string(col_type.asString()) + "<" + vespalib::string(basic_type.asString()) + ">";
+}
+
 std::unique_ptr<Node>
-make_query_node(QueryOperator query_op, const HitSpecs& hit_specs)
+make_query_node(QueryOperator query_op, const benchmark::TermVector& terms)
 {
     if (query_op == QueryOperator::Term) {
-        assert(hit_specs.size() == 1);
-        return std::make_unique<SimpleNumberTerm>(std::to_string(hit_specs[0].term_value), field, 0, Weight(1));
+        assert(terms.size() == 1);
+        return std::make_unique<SimpleNumberTerm>(std::to_string(terms[0]), field, 0, Weight(1));
     } else if (query_op == QueryOperator::In) {
-        auto terms = std::make_unique<IntegerTermVector>(hit_specs.size());
-        for (auto spec : hit_specs) {
-            terms->addTerm(spec.term_value);
+        auto termv = std::make_unique<IntegerTermVector>(terms.size());
+        for (auto term : terms) {
+            termv->addTerm(term);
         }
-        return std::make_unique<SimpleInTerm>(std::move(terms), MultiTerm::Type::INTEGER, field, 0, Weight(1));
+        return std::make_unique<SimpleInTerm>(std::move(termv), MultiTerm::Type::INTEGER, field, 0, Weight(1));
     } else if (query_op == QueryOperator::WeightedSet) {
-        auto res = std::make_unique<SimpleWeightedSetTerm>(hit_specs.size(), field, 0, Weight(1));
-        for (auto spec : hit_specs) {
-            res->addTerm(spec.term_value, Weight(1));
+        auto res = std::make_unique<SimpleWeightedSetTerm>(terms.size(), field, 0, Weight(1));
+        for (auto term : terms) {
+            res->addTerm(term, Weight(1));
         }
         return res;
     } else if (query_op == QueryOperator::DotProduct) {
-        auto res = std::make_unique<SimpleDotProduct>(hit_specs.size(), field, 0, Weight(1));
-        for (auto spec : hit_specs) {
-            res->addTerm(spec.term_value, Weight(1));
+        auto res = std::make_unique<SimpleDotProduct>(terms.size(), field, 0, Weight(1));
+        for (auto term : terms) {
+            res->addTerm(term, Weight(1));
         }
         return res;
     }
@@ -275,12 +380,12 @@ make_query_node(QueryOperator query_op, const HitSpecs& hit_specs)
 
 template <typename BlueprintType>
 Blueprint::UP
-make_intermediate_blueprint(IAttributeContext& attr_ctx, const HitSpecs& hit_specs, uint32_t docid_limit)
+make_intermediate_blueprint(IAttributeContext& attr_ctx, const benchmark::TermVector& terms, uint32_t docid_limit)
 {
     auto blueprint = std::make_unique<BlueprintType>();
-    for (auto spec : hit_specs) {
-        SimpleNumberTerm term(std::to_string(spec.term_value), field, 0, Weight(1));
-        auto child = make_leaf_blueprint(term, attr_ctx, docid_limit);
+    for (auto term : terms) {
+        SimpleNumberTerm sterm(std::to_string(term), field, 0, Weight(1));
+        auto child = make_leaf_blueprint(sterm, attr_ctx, docid_limit);
         blueprint->addChild(std::move(child));
     }
     blueprint->setDocIdLimit(docid_limit);
@@ -289,41 +394,111 @@ make_intermediate_blueprint(IAttributeContext& attr_ctx, const HitSpecs& hit_spe
 }
 
 BenchmarkResult
-run_benchmark(IAttributeContext& attr_ctx, QueryOperator query_op, const HitSpecs& hit_specs, uint32_t docid_limit, bool strict)
+run_benchmark(IAttributeContext& attr_ctx, QueryOperator query_op, const benchmark::TermVector& terms, uint32_t docid_limit, bool strict)
 {
     if (query_op == QueryOperator::And) {
-        return benchmark_search(make_intermediate_blueprint<AndBlueprint>(attr_ctx, hit_specs, docid_limit), docid_limit, strict);
+        return benchmark_search(make_intermediate_blueprint<AndBlueprint>(attr_ctx, terms, docid_limit), docid_limit, strict);
     } else if (query_op == QueryOperator::Or) {
-        return benchmark_search(make_intermediate_blueprint<OrBlueprint>(attr_ctx, hit_specs, docid_limit), docid_limit, strict);
+        return benchmark_search(make_intermediate_blueprint<OrBlueprint>(attr_ctx, terms, docid_limit), docid_limit, strict);
     } else {
-        auto query_node = make_query_node(query_op, hit_specs);
+        auto query_node = make_query_node(query_op, terms);
         auto blueprint = make_leaf_blueprint(*query_node, attr_ctx, docid_limit);
         return benchmark_search(std::move(blueprint), docid_limit, strict);
     }
 }
 
 void
-run_benchmark(const Config& cfg, uint32_t num_docs, const HitSpecs& hit_specs, double hit_ratio, QueryOperator query_op, bool strict)
+print_result(const BenchmarkResult& res, const benchmark::TermVector& terms, double hit_ratio, uint32_t num_docs)
 {
-    auto attr_ctx = make_attribute_context(cfg, num_docs, hit_specs);
-    BenchmarkResult res = run_benchmark(*attr_ctx, query_op, hit_specs, num_docs + 1, strict);
+    std::cout << std::fixed << std::setprecision(3)
+              << "children=" << std::setw(4) << terms.size()
+              << ", t_ratio=" << std::setw(5) << hit_ratio
+              << ", a_ratio=" << std::setw(5) << ((double) res.hits / (double) num_docs)
+              << ", est=" << std::setw(5) << res.estimate
+              << ", hits=" << std::setw(7) << res.hits
+              << ", seeks=" << std::setw(8) << res.seeks
+              << std::setprecision(2)
+              << ", time_ms=" << std::setw(8) << res.time_ms
+              << std::setprecision(3)
+              << ", cost=" << std::setw(8) << res.cost
+              << std::setprecision(2)
+              << ", ns_per_seek=" << std::setw(8) << res.ns_per_seek()
+              << ", ms_per_cost=" << std::setw(7) << res.ms_per_cost()
+              << ", itr=" << res.iterator_name << std::endl;
+}
 
+void
+print_results(const BenchmarkResults& results)
+{
     std::cout << std::fixed << std::setprecision(3)
-        << "children=" << std::setw(4) << hit_specs.size()
-        << ", strict=" << std::setw(5) << (strict ? "true" : "false")
-        << ", t_ratio=" << std::setw(5) << hit_ratio
-        << ", a_ratio=" << std::setw(5) << ((double)res.hits / (double)num_docs)
-        << ", est=" << std::setw(5) << res.estimate
-        << ", hits=" << std::setw(7) << res.hits
-        << ", seeks=" << std::setw(8) << res.seeks
-        << std::setprecision(2)
-        << ", time_ms:" << std::setw(8) << res.time_ms
-        << std::setprecision(3)
-        << ", cost=" << std::setw(8) << res.cost
-        << std::setprecision(2)
-        << ", ns_per_seek=" << std::setw(8) << res.time_per_seek_ns()
-        << ", ms_per_cost=" << std::setw(7) << res.time_per_cost_ms()
-        << ", itr=" << res.iterator_name << std::endl;
+              << "statistics summary: time_ms=" << results.time_ms_stats().to_string()
+              << ", ns_per_seek=" << results.ns_per_seek_stats().to_string()
+              << ", ms_per_cost=" << results.ms_per_cost_stats().to_string() << std::endl << std::endl;
+}
+
+struct BenchmarkCase {
+    Config attr_cfg;
+    QueryOperator query_op;
+    bool strict;
+    BenchmarkResults results;
+    double scaled_cost;
+    BenchmarkCase(const Config& attr_cfg_in, QueryOperator query_op_in, bool strict_in, const BenchmarkResults& results_in)
+        : attr_cfg(attr_cfg_in),
+          query_op(query_op_in),
+          strict(strict_in),
+          results(results_in),
+          scaled_cost(1.0)
+    {}
+    BenchmarkCase(const BenchmarkCase&);
+    BenchmarkCase& operator=(const BenchmarkCase&);
+    ~BenchmarkCase();
+};
+
+BenchmarkCase::BenchmarkCase(const BenchmarkCase&) = default;
+BenchmarkCase& BenchmarkCase::operator=(const BenchmarkCase&) = default;
+BenchmarkCase::~BenchmarkCase() = default;
+
+class BenchmarkSummary {
+private:
+    std::vector<BenchmarkCase> _cases;
+    double _baseline_ms_per_cost;
+
+public:
+    BenchmarkSummary(double baseline_ms_per_cost_in)
+        : _cases(),
+          _baseline_ms_per_cost(baseline_ms_per_cost_in)
+    {}
+    double baseline_ms_per_cost() const { return _baseline_ms_per_cost; }
+    void add(const Config& attr_cfg, QueryOperator query_op, bool strict, const BenchmarkResults& results) {
+        _cases.emplace_back(attr_cfg, query_op, strict, results);
+    }
+    void calc_scaled_costs() {
+        std::sort(_cases.begin(), _cases.end(), [](const auto& lhs, const auto& rhs) {
+            return lhs.results.ms_per_cost_stats().average < rhs.results.ms_per_cost_stats().average;
+        });
+        for (auto& c : _cases) {
+            c.scaled_cost = c.results.ms_per_cost_stats().average / _baseline_ms_per_cost;
+        }
+    }
+    const std::vector<BenchmarkCase>& cases() const { return _cases; }
+};
+
+vespalib::string
+to_string(QueryOperator query_op, const Config& attr_cfg, bool strict)
+{
+    return "op=" + to_string(query_op) + ", cfg=" + to_string(attr_cfg) + ", strict=" + (strict ? "true" : "false");
+}
+
+void
+print_summary(const BenchmarkSummary& summary)
+{
+    std::cout << "-------- benchmark summary (baseline_ms_per_cost=" << summary.baseline_ms_per_cost() << ") --------" << std::endl;
+    for (const auto& c : summary.cases()) {
+        std::cout << std::fixed << std::setprecision(3) << ""
+                  << std::setw(40) << std::left << to_string(c.query_op, c.attr_cfg, c.strict) << ": "
+                  << "ms_per_cost=" << std::setw(7) << std::right << c.results.ms_per_cost_stats().to_string()
+                  << ", scaled_cost=" << std::setw(7) << c.scaled_cost << std::endl;
+    }
 }
 
 struct BenchmarkSetup {
@@ -333,6 +508,7 @@ struct BenchmarkSetup {
     std::vector<double> hit_ratios;
     std::vector<uint32_t> child_counts;
     std::vector<bool> strictness;
+    uint32_t default_values_per_document;
     BenchmarkSetup(uint32_t num_docs_in,
                    Config attr_cfg_in,
                    QueryOperator query_op_in,
@@ -344,7 +520,8 @@ struct BenchmarkSetup {
           query_op(query_op_in),
           hit_ratios(hit_ratios_in),
           child_counts(child_counts_in),
-          strictness(strictness_in)
+          strictness(strictness_in),
+          default_values_per_document(0)
     {}
     ~BenchmarkSetup() {}
 };
@@ -361,18 +538,27 @@ calc_hits_per_term(uint32_t num_docs, double target_hit_ratio, uint32_t children
     }
 }
 
-void
+BenchmarkResults
 run_benchmarks(const BenchmarkSetup& setup)
 {
-    std::cout << "-------- run_benchmarks: " << to_string(setup.query_op) << " --------" << std::endl;
+    BenchmarkResults results;
     for (bool strict : setup.strictness) {
+        std::cout << "-------- run_benchmarks: " << to_string(setup.query_op, setup.attr_cfg, strict) << " --------" << std::endl;
         for (double hit_ratio : setup.hit_ratios) {
             for (uint32_t children : setup.child_counts) {
                 uint32_t hits_per_term = calc_hits_per_term(setup.num_docs, hit_ratio, children, setup.query_op);
-                run_benchmark(setup.attr_cfg, setup.num_docs, make_hit_specs(children, hits_per_term, 55555), hit_ratio, setup.query_op, strict);
+                HitSpecs hit_specs(55555);
+                hit_specs.add(setup.default_values_per_document, setup.num_docs);
+                auto terms = hit_specs.add(children, hits_per_term);
+                auto attr_ctx = make_attribute_context(setup.attr_cfg, setup.num_docs, hit_specs);
+                auto res = run_benchmark(*attr_ctx, setup.query_op, terms, setup.num_docs + 1, strict);
+                print_result(res, terms, hit_ratio, setup.num_docs);
+                results.add(res);
             }
         }
     }
+    print_results(results);
+    return results;
 }
 
 Config
@@ -386,9 +572,31 @@ make_config(BasicType basic_type, CollectionType col_type, bool fast_search)
 constexpr uint32_t num_docs = 10'000'000;
 const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.5};
 const std::vector<uint32_t> child_counts = {1, 10, 100, 1000};
+const Config int32 = make_config(BasicType::INT32, CollectionType::SINGLE, false);
 const Config int32_fs = make_config(BasicType::INT32, CollectionType::SINGLE, true);
+const Config int32_array = make_config(BasicType::INT32, CollectionType::ARRAY, false);
 const Config int32_array_fs = make_config(BasicType::INT32, CollectionType::ARRAY, true);
 const Config int32_wset_fs = make_config(BasicType::INT32, CollectionType::WSET, true);
+const Config str = make_config(BasicType::STRING, CollectionType::SINGLE, false);
+const Config str_array = make_config(BasicType::STRING, CollectionType::ARRAY, false);
+
+
+TEST(IteratorBenchmark, analyze_term_search_in_attributes_without_fast_search)
+{
+    std::vector<Config> attr_cfgs = {int32, int32_array, str, str_array};
+    const std::vector<double> my_hit_ratios = {0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9};
+    BenchmarkSummary summary(25.0);
+    for (const auto& attr_cfg : attr_cfgs) {
+        for (bool strict : {true, false}) {
+            BenchmarkSetup setup(num_docs, attr_cfg, QueryOperator::Term, my_hit_ratios, {1}, {strict});
+            setup.default_values_per_document = 1;
+            auto results = run_benchmarks(setup);
+            summary.add(attr_cfg, QueryOperator::Term, strict, results);
+        }
+    }
+    summary.calc_scaled_costs();
+    print_summary(summary);
+}
 
 TEST(IteratorBenchmark, term_benchmark)
 {
-- 
cgit v1.2.3