diff options
Diffstat (limited to 'searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp')
-rw-r--r-- | searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp | 60 |
1 files changed, 35 insertions, 25 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index b08fde50d7c..c6dae52fd69 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -31,25 +31,22 @@ struct BenchmarkResult { uint32_t hits; FlowStats flow; double actual_cost; - double alt_cost; vespalib::string iterator_name; vespalib::string blueprint_name; - BenchmarkResult() : BenchmarkResult(0, 0, 0, {0, 0, 0}, 0, 0, "", "") {} - BenchmarkResult(double time_ms_in, uint32_t seeks_in, uint32_t hits_in, FlowStats flow_in, double actual_cost_in, double alt_cost_in, + BenchmarkResult() : BenchmarkResult(0, 0, 0, {0, 0, 0}, 0, "", "") {} + BenchmarkResult(double time_ms_in, uint32_t seeks_in, uint32_t hits_in, FlowStats flow_in, double actual_cost_in, const vespalib::string& iterator_name_in, const vespalib::string& blueprint_name_in) : time_ms(time_ms_in), seeks(seeks_in), hits(hits_in), flow(flow_in), actual_cost(actual_cost_in), - alt_cost(alt_cost_in), iterator_name(iterator_name_in), blueprint_name(blueprint_name_in) {} ~BenchmarkResult(); double ns_per_seek() const { return (time_ms / seeks) * 1000.0 * 1000.0; } double ms_per_actual_cost() const { return (time_ms / actual_cost); } - double ms_per_alt_cost() const { return (time_ms / alt_cost); } }; BenchmarkResult::~BenchmarkResult() = default; @@ -128,9 +125,6 @@ public: Stats ms_per_actual_cost_stats() const { return calc_stats([](const auto& res){ return res.ms_per_actual_cost(); }); } - Stats ms_per_alt_cost_stats() const { - return calc_stats([](const auto& res){ return res.ms_per_alt_cost(); }); - } }; std::string @@ -222,7 +216,7 @@ strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit) timer.after(); } FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost()); - return {timer.min_time() * 1000.0, hits + 1, hits, flow, flow.strict_cost, flow.strict_cost, get_class_name(*ctx.iterator), get_class_name(*ctx.blueprint)}; + return {timer.min_time() * 1000.0, hits + 1, hits, flow, flow.strict_cost, get_class_name(*ctx.iterator), get_class_name(*ctx.blueprint)}; } template <bool do_unpack> @@ -256,9 +250,7 @@ non_strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, doub } FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost()); double actual_cost = flow.cost * filter_hit_ratio; - // This is an attempt to calculate an alternative actual cost for strict / posting list iterators that are used in a non-strict context. - double alt_cost = flow.strict_cost + 0.5 * filter_hit_ratio; - return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, alt_cost, get_class_name(*ctx.iterator), get_class_name(*ctx.blueprint)}; + return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, get_class_name(*ctx.iterator), get_class_name(*ctx.blueprint)}; } BenchmarkResult @@ -413,32 +405,30 @@ to_string(bool val) void print_result_header() { - std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.scost | hits | seeks | time_ms | act_cost | alt_cost | ns_per_seek | ms_per_act_cost | ms_per_alt_cost | iterator | blueprint |" << std::endl; + std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl; } void print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, double filter_hit_ratio, uint32_t num_docs) { std::cout << std::fixed << std::setprecision(5) - << "| " << std::setw(4) << children + << "| " << std::setw(5) << children << " | " << std::setw(7) << filter_hit_ratio << " | " << std::setw(7) << op_hit_ratio << " | " << std::setw(7) << ((double) res.hits / (double) num_docs) << " | " << std::setw(6) << res.flow.estimate << std::setprecision(4) - << " | " << std::setw(7) << res.flow.cost + << " | " << std::setw(9) << res.flow.cost << " | " << std::setw(7) << res.flow.strict_cost << " | " << std::setw(8) << res.hits << " | " << std::setw(8) << res.seeks << std::setprecision(3) << " | " << std::setw(8) << res.time_ms << std::setprecision(4) - << " | " << std::setw(8) << res.actual_cost - << " | " << std::setw(8) << res.alt_cost + << " | " << std::setw(9) << res.actual_cost << std::setprecision(2) << " | " << std::setw(11) << res.ns_per_seek() << " | " << std::setw(15) << res.ms_per_actual_cost() - << " | " << std::setw(15) << res.ms_per_alt_cost() << " | " << res.iterator_name << " | " << res.blueprint_name << " |" << std::endl; } @@ -449,8 +439,7 @@ print_result(const BenchmarkCaseResult& result) std::cout << std::fixed << std::setprecision(3) << "summary: time_ms=" << result.time_ms_stats().to_string() << std::endl << " ns_per_seek=" << result.ns_per_seek_stats().to_string() << std::endl - << " ms_per_act_cost=" << result.ms_per_actual_cost_stats().to_string() << std::endl - << " ms_per_alt_cost=" << result.ms_per_alt_cost_stats().to_string() << std::endl << std::endl; + << " ms_per_act_cost=" << result.ms_per_actual_cost_stats().to_string() << std::endl << std::endl; } struct BenchmarkCase { @@ -534,6 +523,7 @@ struct BenchmarkCaseSetup { std::vector<uint32_t> child_counts; std::vector<double> filter_hit_ratios; uint32_t default_values_per_document; + bool disjunct_children; double filter_crossover_factor; BenchmarkCaseSetup(uint32_t num_docs_in, const BenchmarkCase& bcase_in, @@ -545,6 +535,7 @@ struct BenchmarkCaseSetup { child_counts(child_counts_in), filter_hit_ratios({1.0}), default_values_per_document(0), + disjunct_children(false), filter_crossover_factor(0.0) {} ~BenchmarkCaseSetup() {} @@ -561,6 +552,7 @@ struct BenchmarkSetup { bool force_strict; bool unpack_iterator; uint32_t default_values_per_document; + bool disjunct_children; double filter_crossover_factor; BenchmarkSetup(uint32_t num_docs_in, const std::vector<FieldConfig>& field_cfgs_in, @@ -578,6 +570,7 @@ struct BenchmarkSetup { force_strict(false), unpack_iterator(false), default_values_per_document(0), + disjunct_children(false), filter_crossover_factor(0.0) {} BenchmarkSetup(uint32_t num_docs_in, @@ -592,6 +585,7 @@ struct BenchmarkSetup { res.bcase.force_strict = force_strict; res.bcase.unpack_iterator = unpack_iterator; res.default_values_per_document = default_values_per_document; + res.disjunct_children = disjunct_children; if (!bcase.strict_context) { // Simulation of a filter is only relevant in a non-strict context. res.filter_hit_ratios = filter_hit_ratios; @@ -617,7 +611,7 @@ run_benchmark_case(const BenchmarkCaseSetup& setup) for (uint32_t children : setup.child_counts) { auto factory = make_blueprint_factory(setup.bcase.field_cfg, setup.bcase.query_op, setup.num_docs, setup.default_values_per_document, - op_hit_ratio, children); + op_hit_ratio, children, setup.disjunct_children); for (double filter_hit_ratio : setup.filter_hit_ratios) { if (filter_hit_ratio * setup.filter_crossover_factor <= op_hit_ratio) { auto res = benchmark_search(*factory, setup.num_docs + 1, @@ -726,6 +720,22 @@ TEST(IteratorBenchmark, analyze_term_search_in_fast_search_attributes) run_benchmarks(setup, global_summary); } +TEST(IteratorBenchmark, analyze_in_operator_non_strict) +{ + const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8}; + BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {false}, hit_ratios, {5, 9, 10, 100, 1000, 10000}); + setup.disjunct_children = true; + run_benchmarks(setup); +} + +TEST(IteratorBenchmark, analyze_in_operator_strict) +{ + const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8}; + BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {true}, hit_ratios, {5, 9, 10, 100, 1000, 10000}); + setup.disjunct_children = true; + run_benchmarks(setup); +} + TEST(IteratorBenchmark, analyze_complex_leaf_operators) { std::vector<FieldConfig> field_cfgs = {int32_array_fs}; @@ -764,18 +774,18 @@ TEST(IteratorBenchmark, or_benchmark) TEST(IteratorBenchmark, or_vs_filter_crossover) { - auto fixed_or = make_blueprint_factory(int32_array_fs, QueryOperator::Or, num_docs, 0, 0.1, 100); + auto fixed_or = make_blueprint_factory(int32_array_fs, QueryOperator::Or, num_docs, 0, 0.1, 100, false); auto variable_term = [](double rate) { - return make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, rate, 1); + return make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, rate, 1, false); }; analyze_crossover(*fixed_or, variable_term, num_docs + 1, false, 0.0001); } TEST(IteratorBenchmark, or_vs_filter_crossover_with_allow_force_strict) { - auto fixed_or = make_blueprint_factory(int32_array_fs, QueryOperator::Or, num_docs, 0, 0.1, 100); + auto fixed_or = make_blueprint_factory(int32_array_fs, QueryOperator::Or, num_docs, 0, 0.1, 100, false); auto variable_term = [](double rate) { - return make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, rate, 1); + return make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, rate, 1, false); }; analyze_crossover(*fixed_or, variable_term, num_docs + 1, true, 0.0001); } |