diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-05-03 19:19:15 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-03 19:19:15 +0200 |
commit | dc46e0ea3fa20f5d12f26f8ad1fc99f394dcd5d6 (patch) | |
tree | 0425d1d27e830461c6a16878ebe897268568aeae | |
parent | ac654e7eb268e5af6c1a1e9eb5c4726b61b7e4a6 (diff) | |
parent | aa226470becd5dfb048065edddf2dfae86615131 (diff) |
Merge pull request #31105 from vespa-engine/havardpe/flow-stats-tuning
test and adjust some stuff
6 files changed, 52 insertions, 7 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 48ddeed47e9..490f221d1d8 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -576,7 +576,9 @@ void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) { bp1.asSlime(SlimeInserter(a)); bp2.asSlime(SlimeInserter(b)); if (expect_eq) { - EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook)); + if(!EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook))) { + fprintf(stderr, "a: %s\n\nb: %s\n\n", bp1.asString().c_str(), bp2.asString().c_str()); + } } else { EXPECT_FALSE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook)); } @@ -614,7 +616,6 @@ TEST_F("test SourceBlender below AND partial optimization", SourceBlenderTestFix auto expect = std::make_unique<AndBlueprint>(); addLeafs(*expect, {1,2,3}); - expect->addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(f.selector_2), {{10, 1}, {20, 2}})); auto blender = std::make_unique<SourceBlenderBlueprint>(f.selector_1); blender->addChild(addLeafsWithSourceId(3, std::make_unique<AndBlueprint>(), {{30, 3}, {300, 3}})); @@ -622,6 +623,8 @@ TEST_F("test SourceBlender below AND partial optimization", SourceBlenderTestFix blender->addChild(addLeafsWithSourceId(1, std::make_unique<AndBlueprint>(), {{10, 1}, {100, 1}, {1000, 1}})); expect->addChild(std::move(blender)); + expect->addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(f.selector_2), {{10, 1}, {20, 2}})); + optimize_and_compare(std::move(top), std::move(expect)); } @@ -1402,7 +1405,7 @@ TEST("cost for ANDNOT") { TEST("cost for SB") { InvalidSelector sel; - verify_cost(make::SB(sel), 1.3, 1.3); // max + verify_cost(make::SB(sel), 1.3+1.0, 1.3+(1.0-0.8*0.7*0.5)); // max, non_strict+1.0, strict+est } TEST("cost for NEAR") { diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index 57fddb0a819..d6008136d73 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchlib/queryeval/flow.h> +#include <vespa/searchlib/queryeval/flow_tuning.h> #include <vespa/vespalib/gtest/gtest.h> #include <vector> #include <random> @@ -349,6 +350,35 @@ TEST(FlowTest, blender_flow_cost_accumulation_is_max) { } } +double my_non_strict_cost(double est, double adjust) { + return (1.0/adjust) * flow::forced_strict_cost(FlowStats(est, 0.0, est), adjust); +} + +TEST(FlowTest, non_strict_btree_cost) { + for (double est: {0.001, 0.01, 0.1, 0.2, 0.3, 0.5, 0.75, 1.0}) { + auto prev = FlowStats(est, 1.0, est); + auto base = FlowStats(est, flow::non_strict_cost_of_strict_iterator(est, est), est); + auto opt05 = FlowStats(est, my_non_strict_cost(est, 0.5), est); + auto opt02 = FlowStats(est, my_non_strict_cost(est, 0.2), est); + auto opt01 = FlowStats(est, my_non_strict_cost(est, 0.1), est); + auto opt005 = FlowStats(est, my_non_strict_cost(est, 0.05), est); + auto opt003 = FlowStats(est, my_non_strict_cost(est, 0.03), est); + EXPECT_NEAR(strict_crossover(opt05), 0.5, 1e-6); + EXPECT_NEAR(strict_crossover(opt02), 0.2, 1e-6); + EXPECT_NEAR(strict_crossover(opt01), 0.1, 1e-6); + EXPECT_NEAR(strict_crossover(opt005), 0.05, 1e-6); + EXPECT_NEAR(strict_crossover(opt003), 0.03, 1e-6); + fprintf(stderr, "est: %5.3f\n", est); + fprintf(stderr, " prev crossover: %6.4f (cost: %6.4f)\n", strict_crossover(prev), prev.cost); + fprintf(stderr, " base crossover: %6.4f (cost: %6.4f)\n", strict_crossover(base), base.cost); + fprintf(stderr, " 0.5 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt05), opt05.cost); + fprintf(stderr, " 0.2 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt02), opt02.cost); + fprintf(stderr, " 0.1 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt01), opt01.cost); + fprintf(stderr, " 0.05 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt005), opt005.cost); + fprintf(stderr, " 0.03 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt003), opt003.cost); + } +} + TEST(FlowTest, optimal_and_flow) { for (size_t i = 0; i < loop_cnt; ++i) { for (bool strict: {false, true}) { diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h index cf1d1a8c09f..22faa920bc0 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h @@ -90,7 +90,7 @@ inline double lookup_strict_cost(size_t num_indirections) { * as the latency (time) penalty is higher if choosing wrong. */ inline double non_strict_cost_of_strict_iterator(double estimate, double strict_cost) { - return strict_cost + strict_cost_diff(estimate, 1.0); + return 2.0 * (strict_cost + strict_cost_diff(estimate, 0.5)); } // Strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field). diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index 99f7604e1a3..b0b3b302e82 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -758,7 +758,18 @@ SourceBlenderBlueprint::calculate_flow_stats(uint32_t) const { my_cost = std::max(my_cost, child->cost()); my_strict_cost = std::max(my_strict_cost, child->strict_cost()); } - return {OrFlow::estimate_of(get_children()), my_cost, my_strict_cost}; + double my_est = OrFlow::estimate_of(get_children()); + return {my_est, my_cost + 1.0, my_strict_cost + my_est}; +} + +double +SourceBlenderBlueprint::estimate_self_cost(InFlow in_flow) const noexcept +{ + if (in_flow.strict()) { + return estimate(); + } else { + return in_flow.rate(); + } } Blueprint::HitEstimate diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 7f4796c5f43..f7eeace3e8b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -201,6 +201,7 @@ public: explicit SourceBlenderBlueprint(const ISourceSelector &selector) noexcept; ~SourceBlenderBlueprint() override; FlowStats calculate_flow_stats(uint32_t docid_limit) const final; + double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, InFlow in_flow) const override; diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp index d825c9e1a20..9d19ba87af7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp @@ -11,9 +11,9 @@ namespace search::queryeval { //----------------------------------------------------------------------------- FlowStats -EmptyBlueprint::calculate_flow_stats(uint32_t docid_limit) const +EmptyBlueprint::calculate_flow_stats(uint32_t) const { - return default_flow_stats(docid_limit, 0, 0); + return {0.0, 0.2, 0.0}; } SearchIterator::UP |