diff options
author | Geir Storli <geirst@yahooinc.com> | 2024-02-27 14:10:06 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2024-02-29 14:14:37 +0000 |
commit | a96032d35739cf137d30dbff701d5f3b6f69cf30 (patch) | |
tree | 89117156106d11fdcede453e06cb7b97e0a39196 | |
parent | 6a749a1a167dced0c8450ccd891ccdac30407401 (diff) |
Extend benchmark with more test cases and options.
- Option to force an iterator to be strict in a non-strict context.
- Option to test a subset of the combinations between a simulated filter and operator.
-rw-r--r-- | searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp | 98 |
1 files changed, 66 insertions, 32 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index bdc89363b22..d1fa9209dd0 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -309,9 +309,9 @@ strict_search(Blueprint& blueprint, MatchData& md, uint32_t docid_limit) } BenchmarkResult -non_strict_search(Blueprint& blueprint, MatchData& md, uint32_t docid_limit, double filter_hit_ratio) +non_strict_search(Blueprint& blueprint, MatchData& md, uint32_t docid_limit, double filter_hit_ratio, bool force_strict) { - auto itr = blueprint.createSearch(md, false); + auto itr = blueprint.createSearch(md, force_strict); assert(itr.get()); BenchmarkTimer timer(budget_sec); uint32_t seeks = 0; @@ -340,18 +340,18 @@ non_strict_search(Blueprint& blueprint, MatchData& md, uint32_t docid_limit, dou } BenchmarkResult -benchmark_search(Blueprint::UP blueprint, uint32_t docid_limit, bool strict, double filter_hit_ratio) +benchmark_search(Blueprint::UP blueprint, uint32_t docid_limit, bool strict_context, bool force_strict, double filter_hit_ratio) { auto opts = Blueprint::Options::all(); - blueprint->sort(strict, opts); - blueprint->fetchPostings(ExecuteInfo::createForTest(strict)); + blueprint->sort(strict_context || force_strict, opts); + blueprint->fetchPostings(ExecuteInfo::createForTest(strict_context || force_strict)); // Note: All blueprints get the same TermFieldMatchData instance. // This is OK as long as we don't do unpacking and only use 1 thread. auto md = MatchData::makeTestInstance(1, 1); - if (strict) { + if (strict_context) { return strict_search(*blueprint, *md, docid_limit); } else { - return non_strict_search(*blueprint, *md, docid_limit, filter_hit_ratio); + return non_strict_search(*blueprint, *md, docid_limit, filter_hit_ratio, force_strict); } } @@ -407,6 +407,12 @@ to_string(const Config& attr_config) return oss.str(); } +vespalib::string +to_string(bool val) +{ + return val ? "true" : "false"; +} + std::unique_ptr<Node> make_query_node(QueryOperator query_op, const benchmark::TermVector& terms) { @@ -451,23 +457,23 @@ make_intermediate_blueprint(IAttributeContext& attr_ctx, const benchmark::TermVe } BenchmarkResult -run_benchmark(IAttributeContext& attr_ctx, QueryOperator query_op, const benchmark::TermVector& terms, uint32_t docid_limit, bool strict, double filter_hit_ratio) +run_benchmark(IAttributeContext& attr_ctx, QueryOperator query_op, const benchmark::TermVector& terms, uint32_t docid_limit, bool strict_context, bool force_strict, double filter_hit_ratio) { if (query_op == QueryOperator::And) { - return benchmark_search(make_intermediate_blueprint<AndBlueprint>(attr_ctx, terms, docid_limit), docid_limit, strict, filter_hit_ratio); + return benchmark_search(make_intermediate_blueprint<AndBlueprint>(attr_ctx, terms, docid_limit), docid_limit, strict_context, force_strict, filter_hit_ratio); } else if (query_op == QueryOperator::Or) { - return benchmark_search(make_intermediate_blueprint<OrBlueprint>(attr_ctx, terms, docid_limit), docid_limit, strict, filter_hit_ratio); + return benchmark_search(make_intermediate_blueprint<OrBlueprint>(attr_ctx, terms, docid_limit), docid_limit, strict_context, force_strict, filter_hit_ratio); } else { auto query_node = make_query_node(query_op, terms); auto blueprint = make_leaf_blueprint(*query_node, attr_ctx, docid_limit); - return benchmark_search(std::move(blueprint), docid_limit, strict, filter_hit_ratio); + return benchmark_search(std::move(blueprint), docid_limit, strict_context, force_strict, filter_hit_ratio); } } void print_result_header() { - std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.scost | hits | seeks | time_ms | act_cost | alt_cost | ns_per_seek | ms_per_act_cost | ms_per_alt_cost | iterator | blueprint |" << std::endl; + std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.scost | hits | seeks | time_ms | act_cost | alt_cost | ns_per_seek | ms_per_act_cost | ms_per_alt_cost | iterator | blueprint |" << std::endl; } void @@ -479,13 +485,13 @@ print_result(const BenchmarkResult& res, const benchmark::TermVector& terms, dou << " | " << std::setw(7) << op_hit_ratio << " | " << std::setw(7) << ((double) res.hits / (double) num_docs) << " | " << std::setw(6) << res.flow.estimate - << " | " << std::setw(6) << res.flow.cost + << " | " << std::setw(7) << res.flow.cost << " | " << std::setw(7) << res.flow.strict_cost << " | " << std::setw(8) << res.hits << " | " << std::setw(8) << res.seeks - << std::setprecision(2) - << " | " << std::setw(8) << res.time_ms << std::setprecision(3) + << " | " << std::setw(8) << res.time_ms + << std::setprecision(4) << " | " << std::setw(8) << res.actual_cost << " | " << std::setw(8) << res.alt_cost << std::setprecision(2) @@ -509,14 +515,17 @@ print_result(const BenchmarkCaseResult& result) struct BenchmarkCase { Config attr_cfg; QueryOperator query_op; - bool strict; - BenchmarkCase(const Config& attr_cfg_in, QueryOperator query_op_in, bool strict_in) + bool strict_context; + bool force_strict; + BenchmarkCase(const Config& attr_cfg_in, QueryOperator query_op_in, bool strict_context_in) : attr_cfg(attr_cfg_in), query_op(query_op_in), - strict(strict_in) + strict_context(strict_context_in), + force_strict(false) {} vespalib::string to_string() const { - return "op=" + ::to_string(query_op) + ", cfg=" + ::to_string(attr_cfg) + ", strict=" + (strict ? "true" : "false"); + return "op=" + ::to_string(query_op) + ", cfg=" + ::to_string(attr_cfg) + + ", strict_context=" + ::to_string(strict_context) + ", force_strict=" + ::to_string(force_strict); } }; @@ -582,6 +591,7 @@ struct BenchmarkCaseSetup { std::vector<uint32_t> child_counts; std::vector<double> filter_hit_ratios; uint32_t default_values_per_document; + double filter_crossover_factor; BenchmarkCaseSetup(uint32_t num_docs_in, const BenchmarkCase& bcase_in, const std::vector<double>& op_hit_ratios_in, @@ -591,7 +601,8 @@ struct BenchmarkCaseSetup { op_hit_ratios(op_hit_ratios_in), child_counts(child_counts_in), filter_hit_ratios({1.0}), - default_values_per_document(0) + default_values_per_document(0), + filter_crossover_factor(1.0) {} ~BenchmarkCaseSetup() {} }; @@ -604,7 +615,9 @@ struct BenchmarkSetup { std::vector<double> op_hit_ratios; std::vector<uint32_t> child_counts; std::vector<double> filter_hit_ratios; + bool force_strict; uint32_t default_values_per_document; + double filter_crossover_factor; BenchmarkSetup(uint32_t num_docs_in, const std::vector<Config>& attr_cfgs_in, const std::vector<QueryOperator>& query_ops_in, @@ -618,7 +631,9 @@ struct BenchmarkSetup { op_hit_ratios(op_hit_ratios_in), child_counts(child_counts_in), filter_hit_ratios({1.0}), - default_values_per_document(0) + force_strict(false), + default_values_per_document(0), + filter_crossover_factor(1.0) {} BenchmarkSetup(uint32_t num_docs_in, const std::vector<Config>& attr_cfgs_in, @@ -629,12 +644,15 @@ struct BenchmarkSetup { {} BenchmarkCaseSetup make_case_setup(const BenchmarkCase& bcase) const { BenchmarkCaseSetup res(num_docs, bcase, op_hit_ratios, child_counts); + res.bcase.force_strict = force_strict; res.default_values_per_document = default_values_per_document; - if (!bcase.strict) { + if (!bcase.strict_context) { // Simulation of a filter is only relevant in a non-strict context. res.filter_hit_ratios = filter_hit_ratios; + res.filter_crossover_factor = filter_crossover_factor; } else { res.filter_hit_ratios = {1.0}; + res.filter_crossover_factor = 0.0; } return res; } @@ -669,10 +687,12 @@ run_benchmark_case(const BenchmarkCaseSetup& setup) auto terms = hit_specs.add(children, hits_per_term); auto attr_ctx = make_attribute_context(setup.bcase.attr_cfg, setup.num_docs, hit_specs); for (double filter_hit_ratio : setup.filter_hit_ratios) { - auto res = run_benchmark(*attr_ctx, setup.bcase.query_op, terms, setup.num_docs + 1, - setup.bcase.strict, filter_hit_ratio); - print_result(res, terms, op_hit_ratio, filter_hit_ratio, setup.num_docs); - result.add(res); + if (filter_hit_ratio * setup.filter_crossover_factor <= op_hit_ratio) { + auto res = run_benchmark(*attr_ctx, setup.bcase.query_op, terms, setup.num_docs + 1, + setup.bcase.strict_context, setup.bcase.force_strict, filter_hit_ratio); + print_result(res, terms, op_hit_ratio, filter_hit_ratio, setup.num_docs); + result.add(res); + } } } } @@ -712,29 +732,43 @@ const Config int32 = make_config(BasicType::INT32, CollectionType::SINGLE, false const Config int32_fs = make_config(BasicType::INT32, CollectionType::SINGLE, true); const Config int32_array = make_config(BasicType::INT32, CollectionType::ARRAY, false); const Config int32_array_fs = make_config(BasicType::INT32, CollectionType::ARRAY, true); +const Config int32_wset = make_config(BasicType::INT32, CollectionType::WSET, false); const Config int32_wset_fs = make_config(BasicType::INT32, CollectionType::WSET, true); const Config str = make_config(BasicType::STRING, CollectionType::SINGLE, false); const Config str_fs = make_config(BasicType::STRING, CollectionType::SINGLE, true); const Config str_array = make_config(BasicType::STRING, CollectionType::ARRAY, false); const Config str_array_fs = make_config(BasicType::STRING, CollectionType::ARRAY, true); +const Config str_wset = make_config(BasicType::STRING, CollectionType::WSET, false); TEST(IteratorBenchmark, analyze_term_search_in_attributes_without_fast_search) { - std::vector<Config> attr_cfgs = {int32, int32_array, str, str_array}; - const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0}; - BenchmarkSetup setup(num_docs, attr_cfgs, {QueryOperator::Term}, {false}, hit_ratios); + std::vector<Config> attr_cfgs = {int32, int32_array, int32_wset, str, str_array, str_wset}; + const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; + BenchmarkSetup setup(num_docs, attr_cfgs, {QueryOperator::Term}, {true, false}, hit_ratios); setup.default_values_per_document = 1; - setup.filter_hit_ratios = {0.01, 0.1, 0.5, 1.0}; + setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; run_benchmarks(setup); } TEST(IteratorBenchmark, analyze_term_search_in_attributes_with_fast_search) { std::vector<Config> attr_cfgs = {int32_fs, int32_array_fs, str_fs, str_array_fs}; - const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0}; + const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; BenchmarkSetup setup(num_docs, attr_cfgs, {QueryOperator::Term}, {true, false}, hit_ratios); - setup.filter_hit_ratios = {0.01, 0.1, 0.5, 1.0}; + setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; + run_benchmarks(setup); +} + +TEST(IteratorBenchmark, analyze_term_search_in_attributes_combined) +{ + // Note: all fast-search attributes has similar performance, so only needed to include one. + std::vector<Config> attr_cfgs = {int32_fs, int32, int32_array, int32_wset, str, str_array, str_wset}; + const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; + BenchmarkSetup setup(num_docs, attr_cfgs, {QueryOperator::Term}, {true, false}, hit_ratios); + setup.default_values_per_document = 1; + setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; + setup.filter_crossover_factor = 1.0; run_benchmarks(setup); } |