aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp')
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp176
1 files changed, 128 insertions, 48 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index f4a1ade8a66..db0fe76b7af 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -26,6 +26,18 @@ using vespalib::make_string_short::fmt;
const vespalib::string field_name = "myfield";
double budget_sec = 1.0;
+double estimate_actual_cost(Blueprint &bp, InFlow in_flow) {
+ if (in_flow.strict()) {
+ assert(bp.strict());
+ return bp.strict_cost();
+ } else if (bp.strict()) {
+ auto stats = FlowStats::from(flow::DefaultAdapter(), &bp);
+ return flow::forced_strict_cost(stats, in_flow.rate());
+ } else {
+ return bp.cost() * in_flow.rate();
+ }
+}
+
enum class PlanningAlgo {
Order,
Estimate,
@@ -236,7 +248,8 @@ strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, Planning
timer.after();
}
FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost());
- return {timer.min_time() * 1000.0, hits + 1, hits, flow, flow.strict_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)};
+ double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(true));
+ return {timer.min_time() * 1000.0, hits + 1, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)};
}
template <bool do_unpack>
@@ -269,7 +282,7 @@ non_strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, doub
timer.after();
}
FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost());
- double actual_cost = flow.cost * filter_hit_ratio;
+ double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(filter_hit_ratio));
return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)};
}
@@ -291,10 +304,6 @@ benchmark_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, bool
}
}
-
-
-
-
//-----------------------------------------------------------------------------
double est_forced_strict_cost(double estimate, double strict_cost, double rate) {
@@ -317,26 +326,26 @@ struct Sample {
}
};
-double find_crossover(const char *type, const auto &calculate_at, double delta) {
+double find_crossover(const char *type, const char *a, const char *b, const auto &calculate_at, double delta) {
double min = delta;
double max = 1.0;
fprintf(stderr, "looking for %s crossover in the range [%g, %g]...\n", type, min, max);
auto at_min = calculate_at(min);
auto at_max = calculate_at(max);
- fprintf(stderr, " before: [%s, %s], after: [%s, %s]\n",
- at_min.first.str().c_str(), at_max.first.str().c_str(),
- at_min.second.str().c_str(), at_max.second.str().c_str());
- auto best_before = [](auto values) { return (values.first < values.second); };
- if (best_before(at_min) == best_before(at_max)) {
+ fprintf(stderr, " %s: [%s, %s], %s: [%s, %s]\n",
+ a, at_min.first.str().c_str(), at_max.first.str().c_str(),
+ b, at_min.second.str().c_str(), at_max.second.str().c_str());
+ auto a_best = [](auto values) { return (values.first < values.second); };
+ if (a_best(at_min) == a_best(at_max)) {
fprintf(stderr, " NO %s CROSSOVER FOUND\n", type);
return 0.0;
}
while (max > (min + delta)) {
double x = (min + max) / 2.0;
auto at_x = calculate_at(x);
- fprintf(stderr, " best@%g: %s (%s vs %s)\n", x, best_before(at_x) ? "before" : "after",
+ fprintf(stderr, " best@%g: %s (%s vs %s)\n", x, a_best(at_x) ? a : b,
at_x.first.str().c_str(), at_x.second.str().c_str());
- if (best_before(at_min) == best_before(at_x)) {
+ if (a_best(at_min) == a_best(at_x)) {
min = x;
at_min = at_x;
} else {
@@ -409,11 +418,11 @@ void analyze_crossover(BenchmarkBlueprintFactory &fixed, std::function<std::uniq
std::vector<double> results;
std::vector<const char *> names;
names.push_back("time crossover");
- results.push_back(find_crossover("TIME", combine(estimate_AND_time_ms), delta));
+ results.push_back(find_crossover("TIME", "before", "after", combine(estimate_AND_time_ms), delta));
names.push_back("cost crossover");
- results.push_back(find_crossover("COST", combine(calculate_AND_cost), delta));
+ results.push_back(find_crossover("COST", "before", "after", combine(calculate_AND_cost), delta));
names.push_back("abs_est crossover");
- results.push_back(find_crossover("ABS_EST", combine(first_abs_est), delta));
+ results.push_back(find_crossover("ABS_EST", "before", "after", combine(first_abs_est), delta));
sample_at("COST", combine(calculate_AND_cost), results, names);
sample_at("TIME", combine(estimate_AND_time_ms), results, names);
}
@@ -429,21 +438,37 @@ to_string(bool val)
void
print_result_header()
{
- std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl;
+ std::cout << "| in_flow | chn | o_ratio | a_ratio | f.est | f.cost | f.act_cost | f.scost | f.act_scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl;
+}
+
+std::ostream &operator<<(std::ostream &dst, InFlow in_flow) {
+ auto old_w = dst.width();
+ auto old_p = dst.precision();
+ dst << std::setw(7) << std::setprecision(5);
+ if (in_flow.strict()) {
+ dst << " STRICT";
+ } else {
+ dst << in_flow.rate();
+ }
+ dst << std::setw(old_w);
+ dst << std::setprecision(old_p);
+ return dst;
}
void
-print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, double filter_hit_ratio, uint32_t num_docs)
+print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, InFlow in_flow, uint32_t num_docs)
{
std::cout << std::fixed << std::setprecision(5)
- << "| " << std::setw(5) << children
- << " | " << std::setw(7) << filter_hit_ratio
+ << "| " << in_flow
+ << " | " << std::setw(5) << children
<< " | " << std::setw(7) << op_hit_ratio
<< " | " << std::setw(7) << ((double) res.hits / (double) num_docs)
<< " | " << std::setw(6) << res.flow.estimate
<< std::setprecision(4)
<< " | " << std::setw(9) << res.flow.cost
+ << " | " << std::setw(10) << (res.flow.cost * in_flow.rate())
<< " | " << std::setw(7) << res.flow.strict_cost
+ << " | " << std::setw(11) << (in_flow.strict() ? res.flow.strict_cost : flow::forced_strict_cost(res.flow, in_flow.rate()))
<< " | " << std::setw(8) << res.hits
<< " | " << std::setw(8) << res.seeks
<< std::setprecision(3)
@@ -640,7 +665,7 @@ run_benchmark_case(const BenchmarkCaseSetup& setup)
if (filter_hit_ratio * setup.filter_crossover_factor <= op_hit_ratio) {
auto res = benchmark_search(*factory, setup.num_docs + 1,
setup.bcase.strict_context, setup.bcase.force_strict, setup.bcase.unpack_iterator, filter_hit_ratio, PlanningAlgo::Cost);
- print_result(res, children, op_hit_ratio, filter_hit_ratio, setup.num_docs);
+ print_result(res, children, op_hit_ratio, InFlow(setup.bcase.strict_context, filter_hit_ratio), setup.num_docs);
result.add(res);
}
}
@@ -681,23 +706,25 @@ run_benchmarks(const BenchmarkSetup& setup)
void
print_intermediate_blueprint_result_header(size_t children)
{
+ std::cout << "| in_flow";
// This matches the naming scheme in IntermediateBlueprintFactory.
char name = 'A';
for (size_t i = 0; i < children; ++i) {
- std::cout << "| " << name++ << ".ratio ";
+ std::cout << " | " << name++ << ".ratio";
}
- std::cout << "| flow.cost | flow.scost | flow.est | ratio | hits | seeks | ms_per_cost | time_ms | algo | blueprint |" << std::endl;
+ std::cout << " | flow.cost | flow.scost | flow.est | ratio | hits | seeks | ms_per_cost | time_ms | algo | blueprint |" << std::endl;
}
void
-print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, uint32_t num_docs)
+print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, InFlow in_flow, uint32_t num_docs)
{
- std::cout << std::fixed << std::setprecision(5);
+ std::cout << std::fixed << std::setprecision(5)
+ << "| " << in_flow;
for (auto ratio : children_ratios) {
- std::cout << "| " << std::setw(7) << ratio << " ";
+ std::cout << " | " << std::setw(7) << ratio;
}
std::cout << std::setprecision(5)
- << "| " << std::setw(10) << res.flow.cost
+ << " | " << std::setw(10) << res.flow.cost
<< " | " << std::setw(10) << res.flow.strict_cost
<< " | " << std::setw(8) << res.flow.estimate
<< " | " << std::setw(7) << ((double) res.hits / (double) num_docs)
@@ -745,9 +772,8 @@ struct BlueprintFactorySetup {
BlueprintFactorySetup::~BlueprintFactorySetup() = default;
-template <typename IntermediateBlueprintFactoryType>
void
-run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
+run_intermediate_blueprint_benchmark(auto factory_factory, std::vector<InFlow> in_flows, const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
{
print_intermediate_blueprint_result_header(2);
double max_speedup = 0.0;
@@ -755,26 +781,28 @@ run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const Bluep
for (double b_hit_ratio: b.op_hit_ratios) {
auto b_factory = b.make_factory_shared(num_docs, b_hit_ratio);
for (double a_hit_ratio : a.op_hit_ratios) {
- IntermediateBlueprintFactoryType factory;
- factory.add_child(a.make_factory(num_docs, a_hit_ratio));
- factory.add_child(b_factory);
+ auto factory = factory_factory();
+ factory->add_child(a.make_factory(num_docs, a_hit_ratio));
+ factory->add_child(b_factory);
double time_ms_esti = 0.0;
- for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost,
- PlanningAlgo::CostForceStrict}) {
- auto res = benchmark_search(factory, num_docs + 1, true, false, false, 1.0, algo);
- print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, num_docs);
- if (algo == PlanningAlgo::Estimate) {
- time_ms_esti = res.time_ms;
- }
- if (algo == PlanningAlgo::CostForceStrict) {
- double speedup = time_ms_esti / res.time_ms;
- if (speedup > max_speedup) {
- max_speedup = speedup;
+ for (InFlow in_flow: in_flows) {
+ for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost,
+ PlanningAlgo::CostForceStrict}) {
+ auto res = benchmark_search(*factory, num_docs + 1, in_flow.strict(), false, false, in_flow.rate(), algo);
+ print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, in_flow, num_docs);
+ if (algo == PlanningAlgo::Estimate) {
+ time_ms_esti = res.time_ms;
}
- if (speedup < min_speedup) {
- min_speedup = speedup;
+ if (algo == PlanningAlgo::CostForceStrict) {
+ double speedup = time_ms_esti / res.time_ms;
+ if (speedup > max_speedup) {
+ max_speedup = speedup;
+ }
+ if (speedup < min_speedup) {
+ min_speedup = speedup;
+ }
+ std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl;
}
- std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl;
}
}
}
@@ -786,7 +814,19 @@ void
run_and_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
{
std::cout << "AND[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl;
- run_intermediate_blueprint_benchmark<AndBlueprintFactory>(a, b, num_docs);
+ run_intermediate_blueprint_benchmark([](){ return std::make_unique<AndBlueprintFactory>(); }, {true}, a, b, num_docs);
+}
+
+void
+run_source_blender_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
+{
+ std::cout << "SB[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl;
+ auto factory_factory = [&](){
+ auto factory = std::make_unique<SourceBlenderBlueprintFactory>();
+ factory->init_selector([](uint32_t i){ return (i%10 == 0) ? 1 : 2; }, num_docs + 1);
+ return factory;
+ };
+ run_intermediate_blueprint_benchmark(factory_factory, {true, 0.75, 0.5, 0.25, 0.1, 0.01, 0.001}, a, b, num_docs);
}
//-------------------------------------------------------------------------------------
@@ -970,16 +1010,40 @@ TEST(IteratorBenchmark, analyze_AND_bitvector_vs_IN)
}
}
+TEST(IteratorBenchmark, analyze_strict_SOURCEBLENDER_memory_and_disk)
+{
+ for (double small_ratio: {0.001, 0.005, 0.01, 0.05}) {
+ run_source_blender_benchmark({str_fs, QueryOperator::Term, {small_ratio}},
+ {str_index, QueryOperator::Term, {small_ratio * 10}},
+ num_docs);
+ }
+}
+
TEST(IteratorBenchmark, analyze_OR_non_strict_fs)
{
for (auto or_hit_ratio : {0.01, 0.1, 0.5}) {
BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {false}, {or_hit_ratio},
{2, 4, 6, 8, 10, 100, 1000});
+ //setup.force_strict = true;
setup.filter_hit_ratios = gen_ratios(or_hit_ratio, 10.0, 13);
run_benchmarks(setup);
}
}
+TEST(IteratorBenchmark, analyze_OR_non_strict_fs_child_est_adjust)
+{
+ for (auto or_hit_ratio : {0.01, 0.1, 0.5}) {
+ for (uint32_t children : {2, 4, 6, 8, 10, 100, 1000}) {
+ double child_est = or_hit_ratio / children;
+ BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {false}, {or_hit_ratio},
+ {children});
+ //setup.force_strict = true;
+ setup.filter_hit_ratios = gen_ratios(child_est, 10.0, 13);
+ run_benchmarks(setup);
+ }
+ }
+}
+
TEST(IteratorBenchmark, analyze_OR_non_strict_non_fs)
{
BenchmarkSetup setup(num_docs, {int32}, {QueryOperator::Or}, {false}, {0.1}, {2, 4, 6, 8, 10});
@@ -1008,6 +1072,22 @@ TEST(IteratorBenchmark, analyze_btree_vs_bitvector_iterators_strict)
run_benchmarks(setup);
}
+TEST(IteratorBenchmark, btree_vs_array_nonstrict_crossover) {
+ for (double hit_ratio: { 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
+ 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09,
+ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
+ 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0})
+ {
+ auto btree = make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, hit_ratio, 1, false);
+ auto array = make_blueprint_factory( int32_array, QueryOperator::Term, num_docs, 0, hit_ratio, 1, false);
+ auto time_ms = [&](auto &bpf, double in_flow) {
+ return Sample(benchmark_search(bpf, num_docs + 1, false, false, false, in_flow, PlanningAlgo::Cost).time_ms);
+ };
+ auto calculate_at = [&](double in_flow) { return std::make_pair(time_ms(*btree, in_flow), time_ms(*array, in_flow)); };
+ fprintf(stderr, "btree/array crossover@%5.3f: %8.6f\n", hit_ratio, find_crossover("TIME", "btree", "array", calculate_at, 0.0001));
+ }
+}
+
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
int res = RUN_ALL_TESTS();