aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp')
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp60
1 files changed, 35 insertions, 25 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index b08fde50d7c..c6dae52fd69 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -31,25 +31,22 @@ struct BenchmarkResult {
uint32_t hits;
FlowStats flow;
double actual_cost;
- double alt_cost;
vespalib::string iterator_name;
vespalib::string blueprint_name;
- BenchmarkResult() : BenchmarkResult(0, 0, 0, {0, 0, 0}, 0, 0, "", "") {}
- BenchmarkResult(double time_ms_in, uint32_t seeks_in, uint32_t hits_in, FlowStats flow_in, double actual_cost_in, double alt_cost_in,
+ BenchmarkResult() : BenchmarkResult(0, 0, 0, {0, 0, 0}, 0, "", "") {}
+ BenchmarkResult(double time_ms_in, uint32_t seeks_in, uint32_t hits_in, FlowStats flow_in, double actual_cost_in,
const vespalib::string& iterator_name_in, const vespalib::string& blueprint_name_in)
: time_ms(time_ms_in),
seeks(seeks_in),
hits(hits_in),
flow(flow_in),
actual_cost(actual_cost_in),
- alt_cost(alt_cost_in),
iterator_name(iterator_name_in),
blueprint_name(blueprint_name_in)
{}
~BenchmarkResult();
double ns_per_seek() const { return (time_ms / seeks) * 1000.0 * 1000.0; }
double ms_per_actual_cost() const { return (time_ms / actual_cost); }
- double ms_per_alt_cost() const { return (time_ms / alt_cost); }
};
BenchmarkResult::~BenchmarkResult() = default;
@@ -128,9 +125,6 @@ public:
Stats ms_per_actual_cost_stats() const {
return calc_stats([](const auto& res){ return res.ms_per_actual_cost(); });
}
- Stats ms_per_alt_cost_stats() const {
- return calc_stats([](const auto& res){ return res.ms_per_alt_cost(); });
- }
};
std::string
@@ -222,7 +216,7 @@ strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit)
timer.after();
}
FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost());
- return {timer.min_time() * 1000.0, hits + 1, hits, flow, flow.strict_cost, flow.strict_cost, get_class_name(*ctx.iterator), get_class_name(*ctx.blueprint)};
+ return {timer.min_time() * 1000.0, hits + 1, hits, flow, flow.strict_cost, get_class_name(*ctx.iterator), get_class_name(*ctx.blueprint)};
}
template <bool do_unpack>
@@ -256,9 +250,7 @@ non_strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, doub
}
FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost());
double actual_cost = flow.cost * filter_hit_ratio;
- // This is an attempt to calculate an alternative actual cost for strict / posting list iterators that are used in a non-strict context.
- double alt_cost = flow.strict_cost + 0.5 * filter_hit_ratio;
- return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, alt_cost, get_class_name(*ctx.iterator), get_class_name(*ctx.blueprint)};
+ return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, get_class_name(*ctx.iterator), get_class_name(*ctx.blueprint)};
}
BenchmarkResult
@@ -413,32 +405,30 @@ to_string(bool val)
void
print_result_header()
{
- std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.scost | hits | seeks | time_ms | act_cost | alt_cost | ns_per_seek | ms_per_act_cost | ms_per_alt_cost | iterator | blueprint |" << std::endl;
+ std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl;
}
void
print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, double filter_hit_ratio, uint32_t num_docs)
{
std::cout << std::fixed << std::setprecision(5)
- << "| " << std::setw(4) << children
+ << "| " << std::setw(5) << children
<< " | " << std::setw(7) << filter_hit_ratio
<< " | " << std::setw(7) << op_hit_ratio
<< " | " << std::setw(7) << ((double) res.hits / (double) num_docs)
<< " | " << std::setw(6) << res.flow.estimate
<< std::setprecision(4)
- << " | " << std::setw(7) << res.flow.cost
+ << " | " << std::setw(9) << res.flow.cost
<< " | " << std::setw(7) << res.flow.strict_cost
<< " | " << std::setw(8) << res.hits
<< " | " << std::setw(8) << res.seeks
<< std::setprecision(3)
<< " | " << std::setw(8) << res.time_ms
<< std::setprecision(4)
- << " | " << std::setw(8) << res.actual_cost
- << " | " << std::setw(8) << res.alt_cost
+ << " | " << std::setw(9) << res.actual_cost
<< std::setprecision(2)
<< " | " << std::setw(11) << res.ns_per_seek()
<< " | " << std::setw(15) << res.ms_per_actual_cost()
- << " | " << std::setw(15) << res.ms_per_alt_cost()
<< " | " << res.iterator_name
<< " | " << res.blueprint_name << " |" << std::endl;
}
@@ -449,8 +439,7 @@ print_result(const BenchmarkCaseResult& result)
std::cout << std::fixed << std::setprecision(3)
<< "summary: time_ms=" << result.time_ms_stats().to_string() << std::endl
<< " ns_per_seek=" << result.ns_per_seek_stats().to_string() << std::endl
- << " ms_per_act_cost=" << result.ms_per_actual_cost_stats().to_string() << std::endl
- << " ms_per_alt_cost=" << result.ms_per_alt_cost_stats().to_string() << std::endl << std::endl;
+ << " ms_per_act_cost=" << result.ms_per_actual_cost_stats().to_string() << std::endl << std::endl;
}
struct BenchmarkCase {
@@ -534,6 +523,7 @@ struct BenchmarkCaseSetup {
std::vector<uint32_t> child_counts;
std::vector<double> filter_hit_ratios;
uint32_t default_values_per_document;
+ bool disjunct_children;
double filter_crossover_factor;
BenchmarkCaseSetup(uint32_t num_docs_in,
const BenchmarkCase& bcase_in,
@@ -545,6 +535,7 @@ struct BenchmarkCaseSetup {
child_counts(child_counts_in),
filter_hit_ratios({1.0}),
default_values_per_document(0),
+ disjunct_children(false),
filter_crossover_factor(0.0)
{}
~BenchmarkCaseSetup() {}
@@ -561,6 +552,7 @@ struct BenchmarkSetup {
bool force_strict;
bool unpack_iterator;
uint32_t default_values_per_document;
+ bool disjunct_children;
double filter_crossover_factor;
BenchmarkSetup(uint32_t num_docs_in,
const std::vector<FieldConfig>& field_cfgs_in,
@@ -578,6 +570,7 @@ struct BenchmarkSetup {
force_strict(false),
unpack_iterator(false),
default_values_per_document(0),
+ disjunct_children(false),
filter_crossover_factor(0.0)
{}
BenchmarkSetup(uint32_t num_docs_in,
@@ -592,6 +585,7 @@ struct BenchmarkSetup {
res.bcase.force_strict = force_strict;
res.bcase.unpack_iterator = unpack_iterator;
res.default_values_per_document = default_values_per_document;
+ res.disjunct_children = disjunct_children;
if (!bcase.strict_context) {
// Simulation of a filter is only relevant in a non-strict context.
res.filter_hit_ratios = filter_hit_ratios;
@@ -617,7 +611,7 @@ run_benchmark_case(const BenchmarkCaseSetup& setup)
for (uint32_t children : setup.child_counts) {
auto factory = make_blueprint_factory(setup.bcase.field_cfg, setup.bcase.query_op,
setup.num_docs, setup.default_values_per_document,
- op_hit_ratio, children);
+ op_hit_ratio, children, setup.disjunct_children);
for (double filter_hit_ratio : setup.filter_hit_ratios) {
if (filter_hit_ratio * setup.filter_crossover_factor <= op_hit_ratio) {
auto res = benchmark_search(*factory, setup.num_docs + 1,
@@ -726,6 +720,22 @@ TEST(IteratorBenchmark, analyze_term_search_in_fast_search_attributes)
run_benchmarks(setup, global_summary);
}
+TEST(IteratorBenchmark, analyze_in_operator_non_strict)
+{
+ const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8};
+ BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {false}, hit_ratios, {5, 9, 10, 100, 1000, 10000});
+ setup.disjunct_children = true;
+ run_benchmarks(setup);
+}
+
+TEST(IteratorBenchmark, analyze_in_operator_strict)
+{
+ const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.2, 0.4, 0.6, 0.8};
+ BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::In}, {true}, hit_ratios, {5, 9, 10, 100, 1000, 10000});
+ setup.disjunct_children = true;
+ run_benchmarks(setup);
+}
+
TEST(IteratorBenchmark, analyze_complex_leaf_operators)
{
std::vector<FieldConfig> field_cfgs = {int32_array_fs};
@@ -764,18 +774,18 @@ TEST(IteratorBenchmark, or_benchmark)
TEST(IteratorBenchmark, or_vs_filter_crossover)
{
- auto fixed_or = make_blueprint_factory(int32_array_fs, QueryOperator::Or, num_docs, 0, 0.1, 100);
+ auto fixed_or = make_blueprint_factory(int32_array_fs, QueryOperator::Or, num_docs, 0, 0.1, 100, false);
auto variable_term = [](double rate) {
- return make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, rate, 1);
+ return make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, rate, 1, false);
};
analyze_crossover(*fixed_or, variable_term, num_docs + 1, false, 0.0001);
}
TEST(IteratorBenchmark, or_vs_filter_crossover_with_allow_force_strict)
{
- auto fixed_or = make_blueprint_factory(int32_array_fs, QueryOperator::Or, num_docs, 0, 0.1, 100);
+ auto fixed_or = make_blueprint_factory(int32_array_fs, QueryOperator::Or, num_docs, 0, 0.1, 100, false);
auto variable_term = [](double rate) {
- return make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, rate, 1);
+ return make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, rate, 1, false);
};
analyze_crossover(*fixed_or, variable_term, num_docs + 1, true, 0.0001);
}