diff options
Diffstat (limited to 'searchcore')
9 files changed, 2 insertions, 527 deletions
diff --git a/searchcore/CMakeLists.txt b/searchcore/CMakeLists.txt index 36ef347b7ac..c0353747e80 100644 --- a/searchcore/CMakeLists.txt +++ b/searchcore/CMakeLists.txt @@ -54,7 +54,6 @@ vespa_define_module( src/apps/vespa-transactionlog-inspect TESTS - src/tests/bmcluster/estimate_moved_docs_ratio src/tests/grouping src/tests/proton/attribute src/tests/proton/attribute/attribute_aspect_delayer diff --git a/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp b/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp index 43346c366c0..d9c2c1eda3e 100644 --- a/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp +++ b/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp @@ -18,8 +18,6 @@ #include <vespa/searchcore/bmcluster/bm_range.h> #include <vespa/searchcore/bmcluster/bucket_db_snapshot_vector.h> #include <vespa/searchcore/bmcluster/bucket_selector.h> -#include <vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h> -#include <vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h> #include <vespa/searchcore/bmcluster/spi_bm_feed_handler.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/vespalib/io/fileutil.h> @@ -54,8 +52,6 @@ using search::bmcluster::BmNode; using search::bmcluster::BmNodeStatsReporter; using search::bmcluster::BmRange; using search::bmcluster::BucketSelector; -using search::bmcluster::CalculateMovedDocsRatio; -using search::bmcluster::EstimateMovedDocsRatio; using search::index::DummyFileHeaderContext; using storage::lib::State; @@ -179,8 +175,6 @@ class Benchmark { void adjust_cluster_state_before_feed(); void adjust_cluster_state_after_feed(); void adjust_cluster_state_after_first_redistribution(); - double estimate_lost_unique_docs(); - double estimate_moved_docs(); void feed(); std::chrono::duration<double> redistribute(); @@ -318,46 +312,6 @@ Benchmark::redistribute() return reporter.get_change_time() - before; } -double -Benchmark::estimate_lost_unique_docs() -{ - switch (_params.get_mode()) { - case Mode::PERM_CRASH: - case Mode::TEMP_CRASH: - { - auto lost_docs_ratio = EstimateMovedDocsRatio().estimate_lost_docs_base_ratio(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()); - return _params.get_documents() * lost_docs_ratio; - } - default: - return 0.0; - } -} - -double -Benchmark::estimate_moved_docs() -{ - switch(_params.get_mode()) { - case Mode::GROW: - return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_grow(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()); - case Mode::SHRINK: - return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_shrink(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()); - case Mode::PERM_CRASH: - case Mode::TEMP_CRASH: - return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_crash(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()); - case Mode::REPLACE: - if (_params.get_num_nodes() < 10) { - // Calculate better estimate for moved docs ratio with brute force - auto scanner = CalculateMovedDocsRatio::make_replace_calculator(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_flip_nodes(), _params.get_num_nodes()); - scanner.scan(); - return _params.get_documents() * scanner.get_moved_docs_ratio(); - } else { - return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_replace(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_flip_nodes(), _params.get_num_nodes()); - } - default: - return 0.0; - } -} - void Benchmark::run() { @@ -368,12 +322,10 @@ Benchmark::run() auto old_snapshot = _cluster->get_bucket_db_snapshots(); adjust_cluster_state_after_feed(); auto elapsed = redistribute(); - double estimated_moved_docs = estimate_moved_docs(); - double estimated_lost_unique_docs = estimate_lost_unique_docs(); auto new_snapshot = _cluster->get_bucket_db_snapshots(); uint32_t moved_docs = new_snapshot.count_moved_documents(old_snapshot); uint32_t lost_unique_docs = new_snapshot.count_lost_unique_documents(old_snapshot); - LOG(info, "Redistributed (estimated %4.2f) %u docs in %5.3f seconds, %4.2f docs/s, (estimated %4.2f) %u lost unique docs", estimated_moved_docs, moved_docs, elapsed.count(), moved_docs / elapsed.count(), estimated_lost_unique_docs, lost_unique_docs); + LOG(info, "Redistributed %u docs in %5.3f seconds, %4.2f docs/s, %u lost unique docs", moved_docs, elapsed.count(), moved_docs / elapsed.count(), lost_unique_docs); if (_params.get_mode() == Mode::TEMP_CRASH) { if (_params.get_use_feed_settle()) { LOG(info, "Settling redistribution"); @@ -381,7 +333,7 @@ Benchmark::run() } adjust_cluster_state_after_first_redistribution(); elapsed = redistribute(); - LOG(info, "Cleanup of (estimated %4.2f) %u docs in %5.3f seconds, %4.2f docs/s, (estimated %4.2f) %u refound unique docs", estimated_moved_docs, moved_docs, elapsed.count(), moved_docs / elapsed.count(), estimated_lost_unique_docs, lost_unique_docs); + LOG(info, "Cleanup of %u docs in %5.3f seconds, %4.2f docs/s, %u refound unique docs", moved_docs, elapsed.count(), moved_docs / elapsed.count(), lost_unique_docs); } _cluster->stop(); } diff --git a/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/CMakeLists.txt b/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/CMakeLists.txt deleted file mode 100644 index 23f72e4f1fa..00000000000 --- a/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchcore_bmcluster_estimate_moved_docs_ratio_test_app TEST - SOURCES - estimate_moved_docs_ratio_test.cpp - DEPENDS - searchcore_bmcluster - GTest::GTest -) - -vespa_add_test(NAME searchcore_bmcluster_estimate_moved_docs_ratio_test_app - COMMAND searchcore_bmcluster_estimate_moved_docs_ratio_test_app) diff --git a/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp b/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp deleted file mode 100644 index 79af31e3247..00000000000 --- a/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vespalib/gtest/gtest.h> -#include <vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h> -#include <vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h> -#include <iostream> - -#include <vespa/log/log.h> -LOG_SETUP("estimate_moved_docs_ratio_test"); - -using search::bmcluster::CalculateMovedDocsRatio; -using search::bmcluster::EstimateMovedDocsRatio; - -namespace { - -bool verbose; - -TEST(EstimateMovedDocsRatioTest, estimate_lost_docs_ratio) -{ - for (uint32_t nodes = 1; nodes < 2; ++nodes) { - for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) { - for (uint32_t lost_nodes = 0; lost_nodes <= nodes; ++lost_nodes) { - auto scanner = CalculateMovedDocsRatio::make_crash_calculator(redundancy, lost_nodes, nodes); - scanner.scan(); - double lost_docs_base_ratio = scanner.get_lost_docs_base_ratio(); - double estimated_lost_docs_base_ratio = EstimateMovedDocsRatio().estimate_lost_docs_base_ratio(redundancy, lost_nodes, nodes); - EXPECT_DOUBLE_EQ(lost_docs_base_ratio, estimated_lost_docs_base_ratio); - } - } - } -} - -TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_grow) -{ - for (uint32_t nodes = 1; nodes < 10; ++nodes) { - for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) { - for (uint32_t added_nodes = 0; added_nodes <= nodes; ++added_nodes) { - auto scanner = CalculateMovedDocsRatio::make_grow_calculator(redundancy, added_nodes, nodes); - scanner.scan(); - double moved_docs_ratio = scanner.get_moved_docs_ratio(); - double estimated_moved_docs_ratio = EstimateMovedDocsRatio().estimate_moved_docs_ratio_grow(redundancy, added_nodes, nodes); - EXPECT_DOUBLE_EQ(moved_docs_ratio, estimated_moved_docs_ratio); - } - } - } -} - -TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_shrink) -{ - for (uint32_t nodes = 1; nodes < 10; ++nodes) { - for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) { - for (uint32_t retired_nodes = 0; retired_nodes <= nodes; ++retired_nodes) { - auto scanner = CalculateMovedDocsRatio::make_shrink_calculator(redundancy, retired_nodes, nodes); - scanner.scan(); - double moved_docs_ratio = scanner.get_moved_docs_ratio(); - double estimated_moved_docs_ratio = EstimateMovedDocsRatio().estimate_moved_docs_ratio_shrink(redundancy, retired_nodes, nodes); - EXPECT_DOUBLE_EQ(moved_docs_ratio, estimated_moved_docs_ratio); - } - } - } -} - -TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_crash) -{ - double epsilon = 1e-15; - for (uint32_t nodes = 1; nodes < 10; ++nodes) { - for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) { - for (uint32_t crashed_nodes = 0; crashed_nodes <= nodes; ++crashed_nodes) { - auto scanner = CalculateMovedDocsRatio::make_crash_calculator(redundancy, crashed_nodes, nodes); - scanner.scan(); - double moved_docs_ratio = scanner.get_moved_docs_ratio(); - double estimated_moved_docs_ratio = EstimateMovedDocsRatio().estimate_moved_docs_ratio_crash(redundancy, crashed_nodes, nodes); - EXPECT_NEAR(moved_docs_ratio, estimated_moved_docs_ratio, epsilon); - } - } - } -} - -TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_replace) -{ - uint32_t bad_cases = 0; - uint32_t really_bad_cases = 0; - if (verbose) { - std::cout << "Summary: HDR Red A Ret N Act Est ScaleMv ScaleEs States" << std::endl; - } - for (uint32_t nodes = 1; nodes < 6; ++nodes) { - for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) { - for (uint32_t retired_nodes = 0; retired_nodes <= nodes; ++retired_nodes) { - for (uint32_t added_nodes = 0; added_nodes <= nodes - retired_nodes; ++added_nodes) { - // std::cout << "Estimate moved docs ratio replace " << retired_nodes << " of " << nodes << " retired, added " << added_nodes << " nodes ,redundancy " << redundancy << std::endl; - auto scanner = CalculateMovedDocsRatio::make_replace_calculator(redundancy, added_nodes, retired_nodes, nodes); - scanner.scan(); - double moved_docs_ratio = scanner.get_moved_docs_ratio(); - double estimated_moved_docs_ratio = EstimateMovedDocsRatio(verbose).estimate_moved_docs_ratio_replace(redundancy, added_nodes, retired_nodes, nodes); - double error_ratio = abs(moved_docs_ratio - estimated_moved_docs_ratio); - bool bad = error_ratio > 1e-8; - bool really_bad = error_ratio > 0.2 * estimated_moved_docs_ratio + 1e-8; - if (bad) { - ++bad_cases; - } - if (really_bad) { - ++really_bad_cases; - } - if (verbose) { - double scaled_moved = moved_docs_ratio * scanner.get_checked_states(); - double scaled_estimated_moved = estimated_moved_docs_ratio * scanner.get_checked_states(); - std::cout << "Summary: " << (bad ? "BAD" : "OK ") << std::setw(4) << redundancy << std::setw(4) << added_nodes << std::setw(4) << retired_nodes << std::setw(4) << nodes << " " << std::setw(12) << std::setprecision(5) << std::fixed << moved_docs_ratio << " " << std::setw(12) << std::setprecision(5) << std::fixed << estimated_moved_docs_ratio << " " << std::setw(12) << std::setprecision(5) << std::fixed << scaled_moved << " " << std::setw(12) << std::setprecision(5) << std::fixed << scaled_estimated_moved << std::setw(8) << scanner.get_checked_states(); - std::cout << " ["; - for (uint32_t node_idx = 0; node_idx < nodes; ++node_idx) { - std::cout << std::setw(8) << scanner.get_moved_docs_per_node()[node_idx]; - } - std::cout << " ]" << std::endl; - } - // TODO: Fix calculation so we get zero bad cases. - // EXPECT_DOUBLE_EQ(moved_docs_ratio, estimated_moved_docs_ratio); - } - } - } - } - EXPECT_LE(6, bad_cases); - EXPECT_LE(1, really_bad_cases); -} - -} // namespace - -int -main(int argc, char* argv[]) -{ - ::testing::InitGoogleTest(&argc, argv); - if (argc > 1 && std::string("--verbose") == argv[1]) { - verbose = true; - } - return RUN_ALL_TESTS(); -} diff --git a/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt b/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt index 501c4af447b..0f2299958c3 100644 --- a/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt @@ -21,9 +21,7 @@ vespa_add_library(searchcore_bmcluster STATIC bucket_db_snapshot.cpp bucket_db_snapshot_vector.cpp bucket_info_queue.cpp - calculate_moved_docs_ratio.cpp document_api_message_bus_bm_feed_handler.cpp - estimate_moved_docs_ratio.cpp pending_tracker.cpp pending_tracker_hash.cpp spi_bm_feed_handler.cpp diff --git a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp b/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp deleted file mode 100644 index 63f23476a95..00000000000 --- a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "calculate_moved_docs_ratio.h" -#include <cassert> - -namespace search::bmcluster { - -namespace { - -uint32_t make_bit_range(uint32_t low, uint32_t high) -{ - return (1u << high) - (1u << low); -} - -} - -struct CalculateMovedDocsRatio::Placements -{ - uint32_t _mask; - uint32_t _count; - - Placements() - : _mask(0u), - _count(0u) - { - } - - Placements(uint32_t mask, uint32_t count) noexcept - : _mask(mask), - _count(count) - { - } - - Placements add(uint32_t idx) const noexcept { - return Placements(_mask | (1u << idx), _count + 1); - } - - Placements add(uint32_t idx, uint32_t mask, uint32_t redundancy) const noexcept { - return (((_count < redundancy) && ((1u << idx) & mask) != 0)) ? Placements(_mask | (1u << idx), _count + 1) : *this; - } -}; - - -CalculateMovedDocsRatio::CalculateMovedDocsRatio(uint32_t nodes, uint32_t redundancy, uint32_t old_placement_mask, uint32_t new_placement_mask, uint32_t new_up_mask) - : _num_states(nodes + 1), - _nodes(nodes), - _old_placement_mask(old_placement_mask), - _new_placement_mask(new_placement_mask), - _new_up_mask(new_up_mask), - _moved_docs(0u), - _moved_docs_per_node(nodes), - _checked_states(0u), - _lost_docs_base(0u), - _redundancy(redundancy), - _old_redundancy(std::min(redundancy, (uint32_t)__builtin_popcount(old_placement_mask))), - _new_redundancy(std::min(redundancy, (uint32_t)__builtin_popcount(new_placement_mask))) -{ - assert((new_placement_mask & ~new_up_mask) == 0u); - uint32_t states = 1; - for (uint32_t level = nodes; level > 0; --level) { - states *= std::max(1u, nodes - level); - _num_states[level] = states; - } - _num_states[0] = states * nodes; -} - -CalculateMovedDocsRatio::~CalculateMovedDocsRatio() = default; - -CalculateMovedDocsRatio -CalculateMovedDocsRatio::make_grow_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t nodes) -{ - uint32_t old_placement_mask = make_bit_range(added_nodes, nodes); - uint32_t new_placement_mask = make_bit_range(0, nodes); - return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, new_placement_mask); -} - -CalculateMovedDocsRatio -CalculateMovedDocsRatio::make_shrink_calculator(uint32_t redundancy, uint32_t retired_nodes, uint32_t nodes) -{ - uint32_t old_placement_mask = make_bit_range(0, nodes); - uint32_t new_placement_mask = make_bit_range(retired_nodes, nodes); - return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, old_placement_mask); -} - -CalculateMovedDocsRatio -CalculateMovedDocsRatio::make_crash_calculator(uint32_t redundancy, uint32_t crashed_nodes, uint32_t nodes) -{ - uint32_t old_placement_mask = make_bit_range(0, nodes); - uint32_t new_placement_mask = make_bit_range(crashed_nodes, nodes); - return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, new_placement_mask); - -} - -CalculateMovedDocsRatio -CalculateMovedDocsRatio::make_replace_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t nodes) -{ - uint32_t old_placement_mask = make_bit_range(added_nodes, nodes); - uint32_t new_placement_mask = make_bit_range(added_nodes + retired_nodes, nodes) | make_bit_range(0, added_nodes); - uint32_t new_up_mask = make_bit_range(0, nodes); - return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, new_up_mask); -} - -void -CalculateMovedDocsRatio::scan(Placements selected, Placements old_placement, Placements new_placement) -{ - if (old_placement._count >= _old_redundancy) { - if ((old_placement._mask & _new_up_mask) == 0) { - _lost_docs_base += _num_states[selected._count]; - _checked_states += _num_states[selected._count]; - return; - } - if (new_placement._count >= _new_redundancy) { - _checked_states += _num_states[selected._count]; - uint32_t only_new_mask = new_placement._mask & ~old_placement._mask; - if (only_new_mask != 0) { - _moved_docs += _num_states[selected._count] * (uint32_t)__builtin_popcount(only_new_mask); - for (uint32_t node_idx = 0; node_idx < _nodes; ++node_idx) { - if ((only_new_mask & (1u << node_idx)) != 0) { - _moved_docs_per_node[node_idx] += _num_states[selected._count]; - } - } - } - return; - } - } - assert(selected._count < _nodes); - for (uint32_t node_idx = 0; node_idx < _nodes; ++node_idx) { - if ((selected._mask & (1u << node_idx)) != 0) { - continue; - } - scan(selected.add(node_idx), old_placement.add(node_idx, _old_placement_mask, _old_redundancy), new_placement.add(node_idx, _new_placement_mask, _new_redundancy)); - } -} - -void -CalculateMovedDocsRatio::scan() -{ - scan(Placements(), Placements(), Placements()); - assert(_checked_states == _num_states[0]); -} - -} diff --git a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h b/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h deleted file mode 100644 index cd161177246..00000000000 --- a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <cstdint> -#include <vector> - -namespace search::bmcluster { - -/* - * Class for calculating moved docs ratio during - * document redistribution. - */ -class CalculateMovedDocsRatio -{ - class Placements; - std::vector<uint32_t> _num_states; - uint32_t _nodes; - uint32_t _old_placement_mask; - uint32_t _new_placement_mask; - uint32_t _new_up_mask; - uint32_t _moved_docs; - std::vector<uint32_t> _moved_docs_per_node; - uint32_t _checked_states; - uint32_t _lost_docs_base; - uint32_t _redundancy; - uint32_t _old_redundancy; - uint32_t _new_redundancy; - - void scan(Placements selected, Placements old_placement, Placements new_placement); -public: - CalculateMovedDocsRatio(uint32_t nodes, uint32_t redundancy, uint32_t old_placement_mask, uint32_t new_placement_mask, uint32_t new_up_mask); - ~CalculateMovedDocsRatio(); - static CalculateMovedDocsRatio make_grow_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t nodes); - static CalculateMovedDocsRatio make_shrink_calculator(uint32_t redundancy, uint32_t retired_nodes, uint32_t nodes); - static CalculateMovedDocsRatio make_crash_calculator(uint32_t redundancy, uint32_t crashed_nodes, uint32_t nodes); - static CalculateMovedDocsRatio make_replace_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t nodes); - void scan(); - uint32_t get_lost_docs_base() const noexcept { return _lost_docs_base; } - uint32_t get_checked_states() const noexcept { return _checked_states; } - uint32_t get_new_redundancy() const noexcept { return _new_redundancy; } - uint32_t get_moved_docs() const noexcept { return _moved_docs; } - const std::vector<uint32_t>& get_moved_docs_per_node() const noexcept { return _moved_docs_per_node; } - double get_lost_docs_base_ratio() const noexcept { return ((double) _lost_docs_base) / _checked_states; } - double get_moved_docs_ratio() const noexcept { return ((double) _moved_docs) / _checked_states; } -}; - -} diff --git a/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.cpp b/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.cpp deleted file mode 100644 index cffa6fc79e9..00000000000 --- a/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.cpp +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "estimate_moved_docs_ratio.h" - -#include <vespa/log/log.h> -LOG_SETUP(".bmcluster.estimate_moved_docs_ratio"); - -namespace search::bmcluster { - -EstimateMovedDocsRatio::EstimateMovedDocsRatio() - : EstimateMovedDocsRatio(false) -{ -} - -EstimateMovedDocsRatio::EstimateMovedDocsRatio(bool verbose) - : _verbose(verbose) -{ -} - -double -EstimateMovedDocsRatio::estimate_lost_docs_base_ratio(uint32_t redundancy, uint32_t lost_nodes, uint32_t num_nodes) -{ - if (redundancy > lost_nodes) { - return 0.0; - } - double loss_ratio = 1.0; - for (uint32_t i = 0; i < redundancy; ++i) { - loss_ratio *= ((double) (lost_nodes - i)) / (num_nodes - i); - } - if (_verbose) { - LOG(info, "estimated lost docs base ratio: %4.2f", loss_ratio); - } - return loss_ratio; -} - -double -EstimateMovedDocsRatio::estimate_moved_docs_ratio_grow(uint32_t redundancy, uint32_t added_nodes, uint32_t num_nodes) -{ - if (added_nodes == num_nodes) { - return 0.0; - } - double new_redundancy = redundancy; - double new_per_node_doc_ratio = new_redundancy / num_nodes; - double moved_ratio = new_per_node_doc_ratio * added_nodes; - if (_verbose) { - LOG(info, "estimated_moved_docs_ratio_grow(%u,%u,%u)=%4.2f", redundancy, added_nodes, num_nodes, moved_ratio); - } - return moved_ratio; -} - -double -EstimateMovedDocsRatio::estimate_moved_docs_ratio_shrink(uint32_t redundancy, uint32_t retired_nodes, uint32_t num_nodes) -{ - if (retired_nodes == num_nodes) { - return 0.0; - } - double old_redundancy = redundancy; - double old_per_node_doc_ratio = old_redundancy / num_nodes; - uint32_t new_nodes = num_nodes - retired_nodes; - double new_redundancy = std::min(redundancy, new_nodes); - double new_per_node_doc_ratio = new_redundancy / new_nodes; - double moved_ratio = (new_per_node_doc_ratio - old_per_node_doc_ratio) * new_nodes; - if (_verbose) { - LOG(info, "estimated_moved_docs_ratio_shrink(%u,%u,%u)=%4.2f", redundancy, retired_nodes, num_nodes, moved_ratio); - } - return moved_ratio; -} - -double -EstimateMovedDocsRatio::estimate_moved_docs_ratio_crash(uint32_t redundancy, uint32_t crashed_nodes, uint32_t num_nodes) -{ - if (crashed_nodes == num_nodes) { - return 0.0; - } - double old_redundancy = redundancy; - double old_per_node_doc_ratio = old_redundancy / num_nodes; - uint32_t new_nodes = num_nodes - crashed_nodes; - double new_redundancy = std::min(redundancy, new_nodes); - double new_per_node_doc_ratio = new_redundancy / new_nodes; - double lost_docs_ratio = estimate_lost_docs_base_ratio(redundancy, crashed_nodes, num_nodes) * new_redundancy; - double moved_ratio = (new_per_node_doc_ratio - old_per_node_doc_ratio) * new_nodes - lost_docs_ratio; - if (_verbose) { - LOG(info, "estimated_moved_docs_ratio_crash(%u,%u,%u)=%4.2f", redundancy, crashed_nodes, num_nodes, moved_ratio); - } - return moved_ratio; -} - -double -EstimateMovedDocsRatio::estimate_moved_docs_ratio_replace(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t num_nodes) -{ - if (added_nodes == num_nodes || retired_nodes == num_nodes) { - return 0.0; - } - uint32_t old_nodes = num_nodes - added_nodes; - double old_redundancy = std::min(redundancy, old_nodes); - [[maybe_unused]] double old_per_node_doc_ratio = old_redundancy / old_nodes; - uint32_t new_nodes = num_nodes - retired_nodes; - double new_redundancy = std::min(redundancy, new_nodes); - double new_per_node_doc_ratio = new_redundancy / new_nodes; - double moved_ratio = new_per_node_doc_ratio * added_nodes; - uint32_t stable_nodes = num_nodes - added_nodes - retired_nodes; - // Account for extra documents moved from retired nodes to stable nodes - // TODO: Fix calculation - double baseline_per_node_doc_ratio = ((double) redundancy) / num_nodes; - double extra_per_stable_node_doc_ratio = std::min(baseline_per_node_doc_ratio * retired_nodes / new_nodes, 1.0 - old_per_node_doc_ratio); - double extra_moved_ratio = extra_per_stable_node_doc_ratio * stable_nodes; - moved_ratio += extra_moved_ratio; - if (_verbose) { - LOG(info, "estimated_moved_docs_ratio_replace(%u,%u,%u,%u)=%4.2f, (of which %4.2f extra)", redundancy, added_nodes, retired_nodes, num_nodes, moved_ratio, extra_moved_ratio); - } - return moved_ratio; -} - -} diff --git a/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h b/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h deleted file mode 100644 index fb14e80098d..00000000000 --- a/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <cstdint> - -namespace search::bmcluster { - -/* - * Class for estimating moved docs ratio during - * document redistribution. - */ -class EstimateMovedDocsRatio { - bool _verbose; -public: - EstimateMovedDocsRatio(); - explicit EstimateMovedDocsRatio(bool verbose); - double estimate_lost_docs_base_ratio(uint32_t redundancy, uint32_t lost_nodes, uint32_t num_nodes); - double estimate_moved_docs_ratio_grow(uint32_t redundancy, uint32_t added_nodes, uint32_t num_nodes); - double estimate_moved_docs_ratio_shrink(uint32_t redundancy, uint32_t retired_nodes, uint32_t num_nodes); - double estimate_moved_docs_ratio_crash(uint32_t redundancy, uint32_t crashed_nodes, uint32_t num_nodes); - double estimate_moved_docs_ratio_replace(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t num_nodes); -}; - -} |