From 25e5bf351de0c227c35a8451d5da7e475f2e6f70 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 23 Sep 2021 11:39:02 +0200 Subject: Add estimate moved docs ratio unit test. --- .../vespa_redistribute_bm.cpp | 94 +++++----------------- 1 file changed, 19 insertions(+), 75 deletions(-) (limited to 'searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp') diff --git a/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp b/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp index e5c3959d2d4..d22754736ce 100644 --- a/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp +++ b/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include #include @@ -51,6 +53,8 @@ using search::bmcluster::BmNode; using search::bmcluster::BmNodeStatsReporter; using search::bmcluster::BmRange; using search::bmcluster::BucketSelector; +using search::bmcluster::CalculateMovedDocsRatio; +using search::bmcluster::EstimateMovedDocsRatio; using search::index::DummyFileHeaderContext; using storage::lib::State; @@ -100,76 +104,6 @@ vespalib::string& get_mode_name(Mode mode) { return (i < mode_names.size()) ? mode_names[i] : bad_mode_name; } -double -estimate_lost_docs_base_ratio(uint32_t redundancy, uint32_t lost_nodes, uint32_t num_nodes) -{ - if (redundancy > lost_nodes) { - return 0.0; - } - double loss_ratio = 1.0; - for (uint32_t i = 0; i < redundancy; ++i) { - loss_ratio *= ((double) (lost_nodes - i)) / (num_nodes - i); - } - LOG(info, "estimated lost docs base ratio: %4.2f", loss_ratio); - return loss_ratio; -} - -double -estimate_moved_docs_ratio_grow(uint32_t redundancy, uint32_t added_nodes, uint32_t num_nodes) -{ - double new_redundancy = redundancy; - double new_per_node_doc_ratio = new_redundancy / num_nodes; - double moved_ratio = new_per_node_doc_ratio * added_nodes; - LOG(info, "estimated_moved_docs_ratio_grow(%u,%u,%u)=%4.2f", redundancy, added_nodes, num_nodes, moved_ratio); - return moved_ratio; -} - -double -estimate_moved_docs_ratio_shrink(uint32_t redundancy, uint32_t retired_nodes, uint32_t num_nodes) -{ - double old_redundancy = redundancy; - double old_per_node_doc_ratio = old_redundancy / num_nodes; - uint32_t new_nodes = num_nodes - retired_nodes; - double new_redundancy = std::min(redundancy, new_nodes); - double new_per_node_doc_ratio = new_redundancy / new_nodes; - double moved_ratio = (new_per_node_doc_ratio - old_per_node_doc_ratio) * new_nodes; - LOG(info, "estimated_moved_docs_ratio_shrink(%u,%u,%u)=%4.2f", redundancy, retired_nodes, num_nodes, moved_ratio); - return moved_ratio; -} - -double -estimate_moved_docs_ratio_crash(uint32_t redundancy, uint32_t crashed_nodes, uint32_t num_nodes) -{ - double old_redundancy = redundancy; - double old_per_node_doc_ratio = old_redundancy / num_nodes; - uint32_t new_nodes = num_nodes - crashed_nodes; - double new_redundancy = std::min(redundancy, new_nodes); - double new_per_node_doc_ratio = new_redundancy / new_nodes; - double lost_docs_ratio = estimate_lost_docs_base_ratio(redundancy, crashed_nodes, num_nodes) * new_redundancy; - double moved_ratio = (new_per_node_doc_ratio - old_per_node_doc_ratio) * new_nodes - lost_docs_ratio; - LOG(info, "estimated_moved_docs_ratio_crash(%u,%u,%u)=%4.2f", redundancy, crashed_nodes, num_nodes, moved_ratio); - return moved_ratio; -} - -double -estimate_moved_docs_ratio_replace(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t num_nodes) -{ - uint32_t old_nodes = num_nodes - added_nodes; - double old_redundancy = std::min(redundancy, old_nodes); - double old_per_node_doc_ratio = old_redundancy / old_nodes; - uint32_t new_nodes = num_nodes - retired_nodes; - double new_redundancy = std::min(redundancy, new_nodes); - double new_per_node_doc_ratio = new_redundancy / new_nodes; - double moved_ratio = new_per_node_doc_ratio * added_nodes; - uint32_t stable_nodes = num_nodes - added_nodes - retired_nodes; - // Account for extra documents moved from retired nodes to stable nodes - double extra_per_stable_node_doc_ratio = new_per_node_doc_ratio * added_nodes / old_nodes; - double extra_moved_ratio = (std::min(1.0, new_per_node_doc_ratio + extra_per_stable_node_doc_ratio) - old_per_node_doc_ratio) * stable_nodes; - moved_ratio += extra_moved_ratio; - LOG(info, "estimated_moved_docs_ratio_replace(%u,%u,%u,%u)=%4.2f, (of which %4.2f extra)", redundancy, added_nodes, retired_nodes, num_nodes, moved_ratio, extra_moved_ratio); - return moved_ratio; -} - class BMParams : public BmClusterParams, public BmFeedParams { @@ -391,7 +325,7 @@ Benchmark::estimate_lost_docs() case Mode::TEMP_CRASH: { double new_redundancy = std::min(_params.get_redundancy(), _params.get_num_nodes() - _params.get_flip_nodes()); - auto lost_docs_ratio = estimate_lost_docs_base_ratio(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()) * new_redundancy; + auto lost_docs_ratio = EstimateMovedDocsRatio().estimate_lost_docs_base_ratio(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()) * new_redundancy; return _params.get_documents() * lost_docs_ratio; } default: @@ -404,14 +338,24 @@ Benchmark::estimate_moved_docs() { switch(_params.get_mode()) { case Mode::GROW: - return _params.get_documents() * estimate_moved_docs_ratio_grow(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()); + return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_grow(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()); case Mode::SHRINK: - return _params.get_documents() * estimate_moved_docs_ratio_shrink(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()); + return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_shrink(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()); case Mode::PERM_CRASH: case Mode::TEMP_CRASH: - return _params.get_documents() * estimate_moved_docs_ratio_crash(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()); + return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_crash(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()); case Mode::REPLACE: - return _params.get_documents() * estimate_moved_docs_ratio_replace(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_flip_nodes(), _params.get_num_nodes()); + if (_params.get_num_nodes() < 10) { + // Calculate better estimate for moved docs ratio with brute force + uint32_t old_placement_mask = (1u << _params.get_num_nodes()) - (1u << _params.get_flip_nodes()); + uint32_t new_placement_mask = (1u << _params.get_num_nodes()) - (((1u << _params.get_flip_nodes()) - 1) << _params.get_flip_nodes()) - 1; + uint32_t new_up_mask = (1u << _params.get_num_nodes()) - 1u; + CalculateMovedDocsRatio scanner(_params.get_num_nodes(), _params.get_redundancy(), old_placement_mask, new_placement_mask, new_up_mask); + scanner.scan(); + return _params.get_documents() * scanner.get_moved_docs_ratio(); + } else { + return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_replace(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_flip_nodes(), _params.get_num_nodes()); + } default: return 0.0; } -- cgit v1.2.3 From ede35f912e2d2505cef07d8a580f2b1d9abd5abf Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 23 Sep 2021 14:58:14 +0200 Subject: Use static helper methods to instantiate CalculateMovedDocsRatio. --- .../vespa_redistribute_bm.cpp | 5 +-- .../estimate_moved_docs_ratio_test.cpp | 24 +++--------- .../bmcluster/calculate_moved_docs_ratio.cpp | 43 ++++++++++++++++++++++ .../bmcluster/calculate_moved_docs_ratio.h | 4 ++ 4 files changed, 53 insertions(+), 23 deletions(-) (limited to 'searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp') diff --git a/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp b/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp index d22754736ce..0227d9539d2 100644 --- a/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp +++ b/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp @@ -347,10 +347,7 @@ Benchmark::estimate_moved_docs() case Mode::REPLACE: if (_params.get_num_nodes() < 10) { // Calculate better estimate for moved docs ratio with brute force - uint32_t old_placement_mask = (1u << _params.get_num_nodes()) - (1u << _params.get_flip_nodes()); - uint32_t new_placement_mask = (1u << _params.get_num_nodes()) - (((1u << _params.get_flip_nodes()) - 1) << _params.get_flip_nodes()) - 1; - uint32_t new_up_mask = (1u << _params.get_num_nodes()) - 1u; - CalculateMovedDocsRatio scanner(_params.get_num_nodes(), _params.get_redundancy(), old_placement_mask, new_placement_mask, new_up_mask); + auto scanner = CalculateMovedDocsRatio::make_replace_calculator(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_flip_nodes(), _params.get_num_nodes()); scanner.scan(); return _params.get_documents() * scanner.get_moved_docs_ratio(); } else { diff --git a/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp b/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp index 94cc04ab09a..79af31e3247 100644 --- a/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp +++ b/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp @@ -20,9 +20,7 @@ TEST(EstimateMovedDocsRatioTest, estimate_lost_docs_ratio) for (uint32_t nodes = 1; nodes < 2; ++nodes) { for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) { for (uint32_t lost_nodes = 0; lost_nodes <= nodes; ++lost_nodes) { - uint32_t old_placement_mask = (1u << nodes) - 1u; - uint32_t new_placement_mask = (1u << nodes) - (1u << lost_nodes); - CalculateMovedDocsRatio scanner(nodes, redundancy, old_placement_mask, new_placement_mask, new_placement_mask); + auto scanner = CalculateMovedDocsRatio::make_crash_calculator(redundancy, lost_nodes, nodes); scanner.scan(); double lost_docs_base_ratio = scanner.get_lost_docs_base_ratio(); double estimated_lost_docs_base_ratio = EstimateMovedDocsRatio().estimate_lost_docs_base_ratio(redundancy, lost_nodes, nodes); @@ -37,9 +35,7 @@ TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_grow) for (uint32_t nodes = 1; nodes < 10; ++nodes) { for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) { for (uint32_t added_nodes = 0; added_nodes <= nodes; ++added_nodes) { - uint32_t old_placement_mask = (1u << nodes) - (1u << added_nodes); - uint32_t new_placement_mask = (1u << nodes) - 1; - CalculateMovedDocsRatio scanner(nodes, redundancy, old_placement_mask, new_placement_mask, new_placement_mask); + auto scanner = CalculateMovedDocsRatio::make_grow_calculator(redundancy, added_nodes, nodes); scanner.scan(); double moved_docs_ratio = scanner.get_moved_docs_ratio(); double estimated_moved_docs_ratio = EstimateMovedDocsRatio().estimate_moved_docs_ratio_grow(redundancy, added_nodes, nodes); @@ -54,9 +50,7 @@ TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_shrink) for (uint32_t nodes = 1; nodes < 10; ++nodes) { for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) { for (uint32_t retired_nodes = 0; retired_nodes <= nodes; ++retired_nodes) { - uint32_t old_placement_mask = (1u << nodes) - 1; - uint32_t new_placement_mask = (1u << nodes) - (1u << retired_nodes); - CalculateMovedDocsRatio scanner(nodes, redundancy, old_placement_mask, new_placement_mask, old_placement_mask); + auto scanner = CalculateMovedDocsRatio::make_shrink_calculator(redundancy, retired_nodes, nodes); scanner.scan(); double moved_docs_ratio = scanner.get_moved_docs_ratio(); double estimated_moved_docs_ratio = EstimateMovedDocsRatio().estimate_moved_docs_ratio_shrink(redundancy, retired_nodes, nodes); @@ -72,9 +66,7 @@ TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_crash) for (uint32_t nodes = 1; nodes < 10; ++nodes) { for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) { for (uint32_t crashed_nodes = 0; crashed_nodes <= nodes; ++crashed_nodes) { - uint32_t old_placement_mask = (1u << nodes) - 1; - uint32_t new_placement_mask = (1u << nodes) - (1u << crashed_nodes); - CalculateMovedDocsRatio scanner(nodes, redundancy, old_placement_mask, new_placement_mask, new_placement_mask); + auto scanner = CalculateMovedDocsRatio::make_crash_calculator(redundancy, crashed_nodes, nodes); scanner.scan(); double moved_docs_ratio = scanner.get_moved_docs_ratio(); double estimated_moved_docs_ratio = EstimateMovedDocsRatio().estimate_moved_docs_ratio_crash(redundancy, crashed_nodes, nodes); @@ -96,13 +88,7 @@ TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_replace) for (uint32_t retired_nodes = 0; retired_nodes <= nodes; ++retired_nodes) { for (uint32_t added_nodes = 0; added_nodes <= nodes - retired_nodes; ++added_nodes) { // std::cout << "Estimate moved docs ratio replace " << retired_nodes << " of " << nodes << " retired, added " << added_nodes << " nodes ,redundancy " << redundancy << std::endl; - uint32_t old_placement_mask = (1u << nodes) - (1u << added_nodes); - uint32_t new_placement_mask = (1u << nodes) - (((1u << retired_nodes) - 1) << added_nodes) - 1; - uint32_t new_up_mask = (1u << nodes) - 1u; - if (verbose) { - std::cout << "0x" << std::hex << old_placement_mask << ", 0x" << std::hex << new_placement_mask << ", 0x" << new_up_mask << std::dec << std::endl; - } - CalculateMovedDocsRatio scanner(nodes, redundancy, old_placement_mask, new_placement_mask, new_up_mask); + auto scanner = CalculateMovedDocsRatio::make_replace_calculator(redundancy, added_nodes, retired_nodes, nodes); scanner.scan(); double moved_docs_ratio = scanner.get_moved_docs_ratio(); double estimated_moved_docs_ratio = EstimateMovedDocsRatio(verbose).estimate_moved_docs_ratio_replace(redundancy, added_nodes, retired_nodes, nodes); diff --git a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp b/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp index 6f6e6143814..63f23476a95 100644 --- a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp +++ b/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp @@ -5,6 +5,15 @@ namespace search::bmcluster { +namespace { + +uint32_t make_bit_range(uint32_t low, uint32_t high) +{ + return (1u << high) - (1u << low); +} + +} + struct CalculateMovedDocsRatio::Placements { uint32_t _mask; @@ -57,6 +66,40 @@ CalculateMovedDocsRatio::CalculateMovedDocsRatio(uint32_t nodes, uint32_t redund CalculateMovedDocsRatio::~CalculateMovedDocsRatio() = default; +CalculateMovedDocsRatio +CalculateMovedDocsRatio::make_grow_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t nodes) +{ + uint32_t old_placement_mask = make_bit_range(added_nodes, nodes); + uint32_t new_placement_mask = make_bit_range(0, nodes); + return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, new_placement_mask); +} + +CalculateMovedDocsRatio +CalculateMovedDocsRatio::make_shrink_calculator(uint32_t redundancy, uint32_t retired_nodes, uint32_t nodes) +{ + uint32_t old_placement_mask = make_bit_range(0, nodes); + uint32_t new_placement_mask = make_bit_range(retired_nodes, nodes); + return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, old_placement_mask); +} + +CalculateMovedDocsRatio +CalculateMovedDocsRatio::make_crash_calculator(uint32_t redundancy, uint32_t crashed_nodes, uint32_t nodes) +{ + uint32_t old_placement_mask = make_bit_range(0, nodes); + uint32_t new_placement_mask = make_bit_range(crashed_nodes, nodes); + return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, new_placement_mask); + +} + +CalculateMovedDocsRatio +CalculateMovedDocsRatio::make_replace_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t nodes) +{ + uint32_t old_placement_mask = make_bit_range(added_nodes, nodes); + uint32_t new_placement_mask = make_bit_range(added_nodes + retired_nodes, nodes) | make_bit_range(0, added_nodes); + uint32_t new_up_mask = make_bit_range(0, nodes); + return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, new_up_mask); +} + void CalculateMovedDocsRatio::scan(Placements selected, Placements old_placement, Placements new_placement) { diff --git a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h b/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h index 65907e6eae3..cd161177246 100644 --- a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h +++ b/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h @@ -31,6 +31,10 @@ class CalculateMovedDocsRatio public: CalculateMovedDocsRatio(uint32_t nodes, uint32_t redundancy, uint32_t old_placement_mask, uint32_t new_placement_mask, uint32_t new_up_mask); ~CalculateMovedDocsRatio(); + static CalculateMovedDocsRatio make_grow_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t nodes); + static CalculateMovedDocsRatio make_shrink_calculator(uint32_t redundancy, uint32_t retired_nodes, uint32_t nodes); + static CalculateMovedDocsRatio make_crash_calculator(uint32_t redundancy, uint32_t crashed_nodes, uint32_t nodes); + static CalculateMovedDocsRatio make_replace_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t nodes); void scan(); uint32_t get_lost_docs_base() const noexcept { return _lost_docs_base; } uint32_t get_checked_states() const noexcept { return _checked_states; } -- cgit v1.2.3