summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2021-09-29 10:45:07 +0200
committerTor Egge <Tor.Egge@online.no>2021-09-29 10:45:07 +0200
commit460eb04019c7c28b055332f12b145bc759dfb2cd (patch)
treee78debd81ca9f7b9081764aa14a2b59ca8399d1a /searchcore
parent8923accf7e72d147d6d57185eecc4faf2b4adeb7 (diff)
Remove estimates for moved documents during document redistribution.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/CMakeLists.txt1
-rw-r--r--searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp52
-rw-r--r--searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/CMakeLists.txt11
-rw-r--r--searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp134
-rw-r--r--searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt2
-rw-r--r--searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp142
-rw-r--r--searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h48
-rw-r--r--searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.cpp114
-rw-r--r--searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h25
9 files changed, 2 insertions, 527 deletions
diff --git a/searchcore/CMakeLists.txt b/searchcore/CMakeLists.txt
index 36ef347b7ac..c0353747e80 100644
--- a/searchcore/CMakeLists.txt
+++ b/searchcore/CMakeLists.txt
@@ -54,7 +54,6 @@ vespa_define_module(
src/apps/vespa-transactionlog-inspect
TESTS
- src/tests/bmcluster/estimate_moved_docs_ratio
src/tests/grouping
src/tests/proton/attribute
src/tests/proton/attribute/attribute_aspect_delayer
diff --git a/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp b/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp
index 43346c366c0..d9c2c1eda3e 100644
--- a/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp
+++ b/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp
@@ -18,8 +18,6 @@
#include <vespa/searchcore/bmcluster/bm_range.h>
#include <vespa/searchcore/bmcluster/bucket_db_snapshot_vector.h>
#include <vespa/searchcore/bmcluster/bucket_selector.h>
-#include <vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h>
-#include <vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h>
#include <vespa/searchcore/bmcluster/spi_bm_feed_handler.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/vespalib/io/fileutil.h>
@@ -54,8 +52,6 @@ using search::bmcluster::BmNode;
using search::bmcluster::BmNodeStatsReporter;
using search::bmcluster::BmRange;
using search::bmcluster::BucketSelector;
-using search::bmcluster::CalculateMovedDocsRatio;
-using search::bmcluster::EstimateMovedDocsRatio;
using search::index::DummyFileHeaderContext;
using storage::lib::State;
@@ -179,8 +175,6 @@ class Benchmark {
void adjust_cluster_state_before_feed();
void adjust_cluster_state_after_feed();
void adjust_cluster_state_after_first_redistribution();
- double estimate_lost_unique_docs();
- double estimate_moved_docs();
void feed();
std::chrono::duration<double> redistribute();
@@ -318,46 +312,6 @@ Benchmark::redistribute()
return reporter.get_change_time() - before;
}
-double
-Benchmark::estimate_lost_unique_docs()
-{
- switch (_params.get_mode()) {
- case Mode::PERM_CRASH:
- case Mode::TEMP_CRASH:
- {
- auto lost_docs_ratio = EstimateMovedDocsRatio().estimate_lost_docs_base_ratio(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes());
- return _params.get_documents() * lost_docs_ratio;
- }
- default:
- return 0.0;
- }
-}
-
-double
-Benchmark::estimate_moved_docs()
-{
- switch(_params.get_mode()) {
- case Mode::GROW:
- return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_grow(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes());
- case Mode::SHRINK:
- return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_shrink(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes());
- case Mode::PERM_CRASH:
- case Mode::TEMP_CRASH:
- return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_crash(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes());
- case Mode::REPLACE:
- if (_params.get_num_nodes() < 10) {
- // Calculate better estimate for moved docs ratio with brute force
- auto scanner = CalculateMovedDocsRatio::make_replace_calculator(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_flip_nodes(), _params.get_num_nodes());
- scanner.scan();
- return _params.get_documents() * scanner.get_moved_docs_ratio();
- } else {
- return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_replace(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_flip_nodes(), _params.get_num_nodes());
- }
- default:
- return 0.0;
- }
-}
-
void
Benchmark::run()
{
@@ -368,12 +322,10 @@ Benchmark::run()
auto old_snapshot = _cluster->get_bucket_db_snapshots();
adjust_cluster_state_after_feed();
auto elapsed = redistribute();
- double estimated_moved_docs = estimate_moved_docs();
- double estimated_lost_unique_docs = estimate_lost_unique_docs();
auto new_snapshot = _cluster->get_bucket_db_snapshots();
uint32_t moved_docs = new_snapshot.count_moved_documents(old_snapshot);
uint32_t lost_unique_docs = new_snapshot.count_lost_unique_documents(old_snapshot);
- LOG(info, "Redistributed (estimated %4.2f) %u docs in %5.3f seconds, %4.2f docs/s, (estimated %4.2f) %u lost unique docs", estimated_moved_docs, moved_docs, elapsed.count(), moved_docs / elapsed.count(), estimated_lost_unique_docs, lost_unique_docs);
+ LOG(info, "Redistributed %u docs in %5.3f seconds, %4.2f docs/s, %u lost unique docs", moved_docs, elapsed.count(), moved_docs / elapsed.count(), lost_unique_docs);
if (_params.get_mode() == Mode::TEMP_CRASH) {
if (_params.get_use_feed_settle()) {
LOG(info, "Settling redistribution");
@@ -381,7 +333,7 @@ Benchmark::run()
}
adjust_cluster_state_after_first_redistribution();
elapsed = redistribute();
- LOG(info, "Cleanup of (estimated %4.2f) %u docs in %5.3f seconds, %4.2f docs/s, (estimated %4.2f) %u refound unique docs", estimated_moved_docs, moved_docs, elapsed.count(), moved_docs / elapsed.count(), estimated_lost_unique_docs, lost_unique_docs);
+ LOG(info, "Cleanup of %u docs in %5.3f seconds, %4.2f docs/s, %u refound unique docs", moved_docs, elapsed.count(), moved_docs / elapsed.count(), lost_unique_docs);
}
_cluster->stop();
}
diff --git a/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/CMakeLists.txt b/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/CMakeLists.txt
deleted file mode 100644
index 23f72e4f1fa..00000000000
--- a/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchcore_bmcluster_estimate_moved_docs_ratio_test_app TEST
- SOURCES
- estimate_moved_docs_ratio_test.cpp
- DEPENDS
- searchcore_bmcluster
- GTest::GTest
-)
-
-vespa_add_test(NAME searchcore_bmcluster_estimate_moved_docs_ratio_test_app
- COMMAND searchcore_bmcluster_estimate_moved_docs_ratio_test_app)
diff --git a/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp b/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp
deleted file mode 100644
index 79af31e3247..00000000000
--- a/searchcore/src/tests/bmcluster/estimate_moved_docs_ratio/estimate_moved_docs_ratio_test.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/vespalib/gtest/gtest.h>
-#include <vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h>
-#include <vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h>
-#include <iostream>
-
-#include <vespa/log/log.h>
-LOG_SETUP("estimate_moved_docs_ratio_test");
-
-using search::bmcluster::CalculateMovedDocsRatio;
-using search::bmcluster::EstimateMovedDocsRatio;
-
-namespace {
-
-bool verbose;
-
-TEST(EstimateMovedDocsRatioTest, estimate_lost_docs_ratio)
-{
- for (uint32_t nodes = 1; nodes < 2; ++nodes) {
- for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) {
- for (uint32_t lost_nodes = 0; lost_nodes <= nodes; ++lost_nodes) {
- auto scanner = CalculateMovedDocsRatio::make_crash_calculator(redundancy, lost_nodes, nodes);
- scanner.scan();
- double lost_docs_base_ratio = scanner.get_lost_docs_base_ratio();
- double estimated_lost_docs_base_ratio = EstimateMovedDocsRatio().estimate_lost_docs_base_ratio(redundancy, lost_nodes, nodes);
- EXPECT_DOUBLE_EQ(lost_docs_base_ratio, estimated_lost_docs_base_ratio);
- }
- }
- }
-}
-
-TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_grow)
-{
- for (uint32_t nodes = 1; nodes < 10; ++nodes) {
- for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) {
- for (uint32_t added_nodes = 0; added_nodes <= nodes; ++added_nodes) {
- auto scanner = CalculateMovedDocsRatio::make_grow_calculator(redundancy, added_nodes, nodes);
- scanner.scan();
- double moved_docs_ratio = scanner.get_moved_docs_ratio();
- double estimated_moved_docs_ratio = EstimateMovedDocsRatio().estimate_moved_docs_ratio_grow(redundancy, added_nodes, nodes);
- EXPECT_DOUBLE_EQ(moved_docs_ratio, estimated_moved_docs_ratio);
- }
- }
- }
-}
-
-TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_shrink)
-{
- for (uint32_t nodes = 1; nodes < 10; ++nodes) {
- for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) {
- for (uint32_t retired_nodes = 0; retired_nodes <= nodes; ++retired_nodes) {
- auto scanner = CalculateMovedDocsRatio::make_shrink_calculator(redundancy, retired_nodes, nodes);
- scanner.scan();
- double moved_docs_ratio = scanner.get_moved_docs_ratio();
- double estimated_moved_docs_ratio = EstimateMovedDocsRatio().estimate_moved_docs_ratio_shrink(redundancy, retired_nodes, nodes);
- EXPECT_DOUBLE_EQ(moved_docs_ratio, estimated_moved_docs_ratio);
- }
- }
- }
-}
-
-TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_crash)
-{
- double epsilon = 1e-15;
- for (uint32_t nodes = 1; nodes < 10; ++nodes) {
- for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) {
- for (uint32_t crashed_nodes = 0; crashed_nodes <= nodes; ++crashed_nodes) {
- auto scanner = CalculateMovedDocsRatio::make_crash_calculator(redundancy, crashed_nodes, nodes);
- scanner.scan();
- double moved_docs_ratio = scanner.get_moved_docs_ratio();
- double estimated_moved_docs_ratio = EstimateMovedDocsRatio().estimate_moved_docs_ratio_crash(redundancy, crashed_nodes, nodes);
- EXPECT_NEAR(moved_docs_ratio, estimated_moved_docs_ratio, epsilon);
- }
- }
- }
-}
-
-TEST(EstimateMovedDocsRatioTest, estimate_moved_docs_ratio_replace)
-{
- uint32_t bad_cases = 0;
- uint32_t really_bad_cases = 0;
- if (verbose) {
- std::cout << "Summary: HDR Red A Ret N Act Est ScaleMv ScaleEs States" << std::endl;
- }
- for (uint32_t nodes = 1; nodes < 6; ++nodes) {
- for (uint32_t redundancy = 1; redundancy <= nodes; ++redundancy) {
- for (uint32_t retired_nodes = 0; retired_nodes <= nodes; ++retired_nodes) {
- for (uint32_t added_nodes = 0; added_nodes <= nodes - retired_nodes; ++added_nodes) {
- // std::cout << "Estimate moved docs ratio replace " << retired_nodes << " of " << nodes << " retired, added " << added_nodes << " nodes ,redundancy " << redundancy << std::endl;
- auto scanner = CalculateMovedDocsRatio::make_replace_calculator(redundancy, added_nodes, retired_nodes, nodes);
- scanner.scan();
- double moved_docs_ratio = scanner.get_moved_docs_ratio();
- double estimated_moved_docs_ratio = EstimateMovedDocsRatio(verbose).estimate_moved_docs_ratio_replace(redundancy, added_nodes, retired_nodes, nodes);
- double error_ratio = abs(moved_docs_ratio - estimated_moved_docs_ratio);
- bool bad = error_ratio > 1e-8;
- bool really_bad = error_ratio > 0.2 * estimated_moved_docs_ratio + 1e-8;
- if (bad) {
- ++bad_cases;
- }
- if (really_bad) {
- ++really_bad_cases;
- }
- if (verbose) {
- double scaled_moved = moved_docs_ratio * scanner.get_checked_states();
- double scaled_estimated_moved = estimated_moved_docs_ratio * scanner.get_checked_states();
- std::cout << "Summary: " << (bad ? "BAD" : "OK ") << std::setw(4) << redundancy << std::setw(4) << added_nodes << std::setw(4) << retired_nodes << std::setw(4) << nodes << " " << std::setw(12) << std::setprecision(5) << std::fixed << moved_docs_ratio << " " << std::setw(12) << std::setprecision(5) << std::fixed << estimated_moved_docs_ratio << " " << std::setw(12) << std::setprecision(5) << std::fixed << scaled_moved << " " << std::setw(12) << std::setprecision(5) << std::fixed << scaled_estimated_moved << std::setw(8) << scanner.get_checked_states();
- std::cout << " [";
- for (uint32_t node_idx = 0; node_idx < nodes; ++node_idx) {
- std::cout << std::setw(8) << scanner.get_moved_docs_per_node()[node_idx];
- }
- std::cout << " ]" << std::endl;
- }
- // TODO: Fix calculation so we get zero bad cases.
- // EXPECT_DOUBLE_EQ(moved_docs_ratio, estimated_moved_docs_ratio);
- }
- }
- }
- }
- EXPECT_LE(6, bad_cases);
- EXPECT_LE(1, really_bad_cases);
-}
-
-} // namespace
-
-int
-main(int argc, char* argv[])
-{
- ::testing::InitGoogleTest(&argc, argv);
- if (argc > 1 && std::string("--verbose") == argv[1]) {
- verbose = true;
- }
- return RUN_ALL_TESTS();
-}
diff --git a/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt b/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt
index 501c4af447b..0f2299958c3 100644
--- a/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt
+++ b/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt
@@ -21,9 +21,7 @@ vespa_add_library(searchcore_bmcluster STATIC
bucket_db_snapshot.cpp
bucket_db_snapshot_vector.cpp
bucket_info_queue.cpp
- calculate_moved_docs_ratio.cpp
document_api_message_bus_bm_feed_handler.cpp
- estimate_moved_docs_ratio.cpp
pending_tracker.cpp
pending_tracker_hash.cpp
spi_bm_feed_handler.cpp
diff --git a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp b/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp
deleted file mode 100644
index 63f23476a95..00000000000
--- a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "calculate_moved_docs_ratio.h"
-#include <cassert>
-
-namespace search::bmcluster {
-
-namespace {
-
-uint32_t make_bit_range(uint32_t low, uint32_t high)
-{
- return (1u << high) - (1u << low);
-}
-
-}
-
-struct CalculateMovedDocsRatio::Placements
-{
- uint32_t _mask;
- uint32_t _count;
-
- Placements()
- : _mask(0u),
- _count(0u)
- {
- }
-
- Placements(uint32_t mask, uint32_t count) noexcept
- : _mask(mask),
- _count(count)
- {
- }
-
- Placements add(uint32_t idx) const noexcept {
- return Placements(_mask | (1u << idx), _count + 1);
- }
-
- Placements add(uint32_t idx, uint32_t mask, uint32_t redundancy) const noexcept {
- return (((_count < redundancy) && ((1u << idx) & mask) != 0)) ? Placements(_mask | (1u << idx), _count + 1) : *this;
- }
-};
-
-
-CalculateMovedDocsRatio::CalculateMovedDocsRatio(uint32_t nodes, uint32_t redundancy, uint32_t old_placement_mask, uint32_t new_placement_mask, uint32_t new_up_mask)
- : _num_states(nodes + 1),
- _nodes(nodes),
- _old_placement_mask(old_placement_mask),
- _new_placement_mask(new_placement_mask),
- _new_up_mask(new_up_mask),
- _moved_docs(0u),
- _moved_docs_per_node(nodes),
- _checked_states(0u),
- _lost_docs_base(0u),
- _redundancy(redundancy),
- _old_redundancy(std::min(redundancy, (uint32_t)__builtin_popcount(old_placement_mask))),
- _new_redundancy(std::min(redundancy, (uint32_t)__builtin_popcount(new_placement_mask)))
-{
- assert((new_placement_mask & ~new_up_mask) == 0u);
- uint32_t states = 1;
- for (uint32_t level = nodes; level > 0; --level) {
- states *= std::max(1u, nodes - level);
- _num_states[level] = states;
- }
- _num_states[0] = states * nodes;
-}
-
-CalculateMovedDocsRatio::~CalculateMovedDocsRatio() = default;
-
-CalculateMovedDocsRatio
-CalculateMovedDocsRatio::make_grow_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t nodes)
-{
- uint32_t old_placement_mask = make_bit_range(added_nodes, nodes);
- uint32_t new_placement_mask = make_bit_range(0, nodes);
- return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, new_placement_mask);
-}
-
-CalculateMovedDocsRatio
-CalculateMovedDocsRatio::make_shrink_calculator(uint32_t redundancy, uint32_t retired_nodes, uint32_t nodes)
-{
- uint32_t old_placement_mask = make_bit_range(0, nodes);
- uint32_t new_placement_mask = make_bit_range(retired_nodes, nodes);
- return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, old_placement_mask);
-}
-
-CalculateMovedDocsRatio
-CalculateMovedDocsRatio::make_crash_calculator(uint32_t redundancy, uint32_t crashed_nodes, uint32_t nodes)
-{
- uint32_t old_placement_mask = make_bit_range(0, nodes);
- uint32_t new_placement_mask = make_bit_range(crashed_nodes, nodes);
- return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, new_placement_mask);
-
-}
-
-CalculateMovedDocsRatio
-CalculateMovedDocsRatio::make_replace_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t nodes)
-{
- uint32_t old_placement_mask = make_bit_range(added_nodes, nodes);
- uint32_t new_placement_mask = make_bit_range(added_nodes + retired_nodes, nodes) | make_bit_range(0, added_nodes);
- uint32_t new_up_mask = make_bit_range(0, nodes);
- return CalculateMovedDocsRatio(nodes, redundancy, old_placement_mask, new_placement_mask, new_up_mask);
-}
-
-void
-CalculateMovedDocsRatio::scan(Placements selected, Placements old_placement, Placements new_placement)
-{
- if (old_placement._count >= _old_redundancy) {
- if ((old_placement._mask & _new_up_mask) == 0) {
- _lost_docs_base += _num_states[selected._count];
- _checked_states += _num_states[selected._count];
- return;
- }
- if (new_placement._count >= _new_redundancy) {
- _checked_states += _num_states[selected._count];
- uint32_t only_new_mask = new_placement._mask & ~old_placement._mask;
- if (only_new_mask != 0) {
- _moved_docs += _num_states[selected._count] * (uint32_t)__builtin_popcount(only_new_mask);
- for (uint32_t node_idx = 0; node_idx < _nodes; ++node_idx) {
- if ((only_new_mask & (1u << node_idx)) != 0) {
- _moved_docs_per_node[node_idx] += _num_states[selected._count];
- }
- }
- }
- return;
- }
- }
- assert(selected._count < _nodes);
- for (uint32_t node_idx = 0; node_idx < _nodes; ++node_idx) {
- if ((selected._mask & (1u << node_idx)) != 0) {
- continue;
- }
- scan(selected.add(node_idx), old_placement.add(node_idx, _old_placement_mask, _old_redundancy), new_placement.add(node_idx, _new_placement_mask, _new_redundancy));
- }
-}
-
-void
-CalculateMovedDocsRatio::scan()
-{
- scan(Placements(), Placements(), Placements());
- assert(_checked_states == _num_states[0]);
-}
-
-}
diff --git a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h b/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h
deleted file mode 100644
index cd161177246..00000000000
--- a/searchcore/src/vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <cstdint>
-#include <vector>
-
-namespace search::bmcluster {
-
-/*
- * Class for calculating moved docs ratio during
- * document redistribution.
- */
-class CalculateMovedDocsRatio
-{
- class Placements;
- std::vector<uint32_t> _num_states;
- uint32_t _nodes;
- uint32_t _old_placement_mask;
- uint32_t _new_placement_mask;
- uint32_t _new_up_mask;
- uint32_t _moved_docs;
- std::vector<uint32_t> _moved_docs_per_node;
- uint32_t _checked_states;
- uint32_t _lost_docs_base;
- uint32_t _redundancy;
- uint32_t _old_redundancy;
- uint32_t _new_redundancy;
-
- void scan(Placements selected, Placements old_placement, Placements new_placement);
-public:
- CalculateMovedDocsRatio(uint32_t nodes, uint32_t redundancy, uint32_t old_placement_mask, uint32_t new_placement_mask, uint32_t new_up_mask);
- ~CalculateMovedDocsRatio();
- static CalculateMovedDocsRatio make_grow_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t nodes);
- static CalculateMovedDocsRatio make_shrink_calculator(uint32_t redundancy, uint32_t retired_nodes, uint32_t nodes);
- static CalculateMovedDocsRatio make_crash_calculator(uint32_t redundancy, uint32_t crashed_nodes, uint32_t nodes);
- static CalculateMovedDocsRatio make_replace_calculator(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t nodes);
- void scan();
- uint32_t get_lost_docs_base() const noexcept { return _lost_docs_base; }
- uint32_t get_checked_states() const noexcept { return _checked_states; }
- uint32_t get_new_redundancy() const noexcept { return _new_redundancy; }
- uint32_t get_moved_docs() const noexcept { return _moved_docs; }
- const std::vector<uint32_t>& get_moved_docs_per_node() const noexcept { return _moved_docs_per_node; }
- double get_lost_docs_base_ratio() const noexcept { return ((double) _lost_docs_base) / _checked_states; }
- double get_moved_docs_ratio() const noexcept { return ((double) _moved_docs) / _checked_states; }
-};
-
-}
diff --git a/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.cpp b/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.cpp
deleted file mode 100644
index cffa6fc79e9..00000000000
--- a/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "estimate_moved_docs_ratio.h"
-
-#include <vespa/log/log.h>
-LOG_SETUP(".bmcluster.estimate_moved_docs_ratio");
-
-namespace search::bmcluster {
-
-EstimateMovedDocsRatio::EstimateMovedDocsRatio()
- : EstimateMovedDocsRatio(false)
-{
-}
-
-EstimateMovedDocsRatio::EstimateMovedDocsRatio(bool verbose)
- : _verbose(verbose)
-{
-}
-
-double
-EstimateMovedDocsRatio::estimate_lost_docs_base_ratio(uint32_t redundancy, uint32_t lost_nodes, uint32_t num_nodes)
-{
- if (redundancy > lost_nodes) {
- return 0.0;
- }
- double loss_ratio = 1.0;
- for (uint32_t i = 0; i < redundancy; ++i) {
- loss_ratio *= ((double) (lost_nodes - i)) / (num_nodes - i);
- }
- if (_verbose) {
- LOG(info, "estimated lost docs base ratio: %4.2f", loss_ratio);
- }
- return loss_ratio;
-}
-
-double
-EstimateMovedDocsRatio::estimate_moved_docs_ratio_grow(uint32_t redundancy, uint32_t added_nodes, uint32_t num_nodes)
-{
- if (added_nodes == num_nodes) {
- return 0.0;
- }
- double new_redundancy = redundancy;
- double new_per_node_doc_ratio = new_redundancy / num_nodes;
- double moved_ratio = new_per_node_doc_ratio * added_nodes;
- if (_verbose) {
- LOG(info, "estimated_moved_docs_ratio_grow(%u,%u,%u)=%4.2f", redundancy, added_nodes, num_nodes, moved_ratio);
- }
- return moved_ratio;
-}
-
-double
-EstimateMovedDocsRatio::estimate_moved_docs_ratio_shrink(uint32_t redundancy, uint32_t retired_nodes, uint32_t num_nodes)
-{
- if (retired_nodes == num_nodes) {
- return 0.0;
- }
- double old_redundancy = redundancy;
- double old_per_node_doc_ratio = old_redundancy / num_nodes;
- uint32_t new_nodes = num_nodes - retired_nodes;
- double new_redundancy = std::min(redundancy, new_nodes);
- double new_per_node_doc_ratio = new_redundancy / new_nodes;
- double moved_ratio = (new_per_node_doc_ratio - old_per_node_doc_ratio) * new_nodes;
- if (_verbose) {
- LOG(info, "estimated_moved_docs_ratio_shrink(%u,%u,%u)=%4.2f", redundancy, retired_nodes, num_nodes, moved_ratio);
- }
- return moved_ratio;
-}
-
-double
-EstimateMovedDocsRatio::estimate_moved_docs_ratio_crash(uint32_t redundancy, uint32_t crashed_nodes, uint32_t num_nodes)
-{
- if (crashed_nodes == num_nodes) {
- return 0.0;
- }
- double old_redundancy = redundancy;
- double old_per_node_doc_ratio = old_redundancy / num_nodes;
- uint32_t new_nodes = num_nodes - crashed_nodes;
- double new_redundancy = std::min(redundancy, new_nodes);
- double new_per_node_doc_ratio = new_redundancy / new_nodes;
- double lost_docs_ratio = estimate_lost_docs_base_ratio(redundancy, crashed_nodes, num_nodes) * new_redundancy;
- double moved_ratio = (new_per_node_doc_ratio - old_per_node_doc_ratio) * new_nodes - lost_docs_ratio;
- if (_verbose) {
- LOG(info, "estimated_moved_docs_ratio_crash(%u,%u,%u)=%4.2f", redundancy, crashed_nodes, num_nodes, moved_ratio);
- }
- return moved_ratio;
-}
-
-double
-EstimateMovedDocsRatio::estimate_moved_docs_ratio_replace(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t num_nodes)
-{
- if (added_nodes == num_nodes || retired_nodes == num_nodes) {
- return 0.0;
- }
- uint32_t old_nodes = num_nodes - added_nodes;
- double old_redundancy = std::min(redundancy, old_nodes);
- [[maybe_unused]] double old_per_node_doc_ratio = old_redundancy / old_nodes;
- uint32_t new_nodes = num_nodes - retired_nodes;
- double new_redundancy = std::min(redundancy, new_nodes);
- double new_per_node_doc_ratio = new_redundancy / new_nodes;
- double moved_ratio = new_per_node_doc_ratio * added_nodes;
- uint32_t stable_nodes = num_nodes - added_nodes - retired_nodes;
- // Account for extra documents moved from retired nodes to stable nodes
- // TODO: Fix calculation
- double baseline_per_node_doc_ratio = ((double) redundancy) / num_nodes;
- double extra_per_stable_node_doc_ratio = std::min(baseline_per_node_doc_ratio * retired_nodes / new_nodes, 1.0 - old_per_node_doc_ratio);
- double extra_moved_ratio = extra_per_stable_node_doc_ratio * stable_nodes;
- moved_ratio += extra_moved_ratio;
- if (_verbose) {
- LOG(info, "estimated_moved_docs_ratio_replace(%u,%u,%u,%u)=%4.2f, (of which %4.2f extra)", redundancy, added_nodes, retired_nodes, num_nodes, moved_ratio, extra_moved_ratio);
- }
- return moved_ratio;
-}
-
-}
diff --git a/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h b/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h
deleted file mode 100644
index fb14e80098d..00000000000
--- a/searchcore/src/vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <cstdint>
-
-namespace search::bmcluster {
-
-/*
- * Class for estimating moved docs ratio during
- * document redistribution.
- */
-class EstimateMovedDocsRatio {
- bool _verbose;
-public:
- EstimateMovedDocsRatio();
- explicit EstimateMovedDocsRatio(bool verbose);
- double estimate_lost_docs_base_ratio(uint32_t redundancy, uint32_t lost_nodes, uint32_t num_nodes);
- double estimate_moved_docs_ratio_grow(uint32_t redundancy, uint32_t added_nodes, uint32_t num_nodes);
- double estimate_moved_docs_ratio_shrink(uint32_t redundancy, uint32_t retired_nodes, uint32_t num_nodes);
- double estimate_moved_docs_ratio_crash(uint32_t redundancy, uint32_t crashed_nodes, uint32_t num_nodes);
- double estimate_moved_docs_ratio_replace(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t num_nodes);
-};
-
-}