summaryrefslogtreecommitdiffstats
path: root/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-09-27 23:08:47 +0200
committerGitHub <noreply@github.com>2021-09-27 23:08:47 +0200
commit8f3fb1a105ded07144f6de527266a438e48a1766 (patch)
treed2923a45682e91d80e7011c60cfb301e05acead3 /searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp
parent0a858cc0f3425fc792d978b6221ef4f3c3bc6067 (diff)
parent037f756caf4cfb99bcd988174839d7bc385267b9 (diff)
Merge branch 'master' into bratseth/linguistics-components
Diffstat (limited to 'searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp')
-rw-r--r--searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp91
1 files changed, 16 insertions, 75 deletions
diff --git a/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp b/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp
index e5c3959d2d4..0227d9539d2 100644
--- a/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp
+++ b/searchcore/src/apps/vespa-redistribute-bm/vespa_redistribute_bm.cpp
@@ -17,6 +17,8 @@
#include <vespa/searchcore/bmcluster/bm_node_stats_reporter.h>
#include <vespa/searchcore/bmcluster/bm_range.h>
#include <vespa/searchcore/bmcluster/bucket_selector.h>
+#include <vespa/searchcore/bmcluster/calculate_moved_docs_ratio.h>
+#include <vespa/searchcore/bmcluster/estimate_moved_docs_ratio.h>
#include <vespa/searchcore/bmcluster/spi_bm_feed_handler.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/vespalib/io/fileutil.h>
@@ -51,6 +53,8 @@ using search::bmcluster::BmNode;
using search::bmcluster::BmNodeStatsReporter;
using search::bmcluster::BmRange;
using search::bmcluster::BucketSelector;
+using search::bmcluster::CalculateMovedDocsRatio;
+using search::bmcluster::EstimateMovedDocsRatio;
using search::index::DummyFileHeaderContext;
using storage::lib::State;
@@ -100,76 +104,6 @@ vespalib::string& get_mode_name(Mode mode) {
return (i < mode_names.size()) ? mode_names[i] : bad_mode_name;
}
-double
-estimate_lost_docs_base_ratio(uint32_t redundancy, uint32_t lost_nodes, uint32_t num_nodes)
-{
- if (redundancy > lost_nodes) {
- return 0.0;
- }
- double loss_ratio = 1.0;
- for (uint32_t i = 0; i < redundancy; ++i) {
- loss_ratio *= ((double) (lost_nodes - i)) / (num_nodes - i);
- }
- LOG(info, "estimated lost docs base ratio: %4.2f", loss_ratio);
- return loss_ratio;
-}
-
-double
-estimate_moved_docs_ratio_grow(uint32_t redundancy, uint32_t added_nodes, uint32_t num_nodes)
-{
- double new_redundancy = redundancy;
- double new_per_node_doc_ratio = new_redundancy / num_nodes;
- double moved_ratio = new_per_node_doc_ratio * added_nodes;
- LOG(info, "estimated_moved_docs_ratio_grow(%u,%u,%u)=%4.2f", redundancy, added_nodes, num_nodes, moved_ratio);
- return moved_ratio;
-}
-
-double
-estimate_moved_docs_ratio_shrink(uint32_t redundancy, uint32_t retired_nodes, uint32_t num_nodes)
-{
- double old_redundancy = redundancy;
- double old_per_node_doc_ratio = old_redundancy / num_nodes;
- uint32_t new_nodes = num_nodes - retired_nodes;
- double new_redundancy = std::min(redundancy, new_nodes);
- double new_per_node_doc_ratio = new_redundancy / new_nodes;
- double moved_ratio = (new_per_node_doc_ratio - old_per_node_doc_ratio) * new_nodes;
- LOG(info, "estimated_moved_docs_ratio_shrink(%u,%u,%u)=%4.2f", redundancy, retired_nodes, num_nodes, moved_ratio);
- return moved_ratio;
-}
-
-double
-estimate_moved_docs_ratio_crash(uint32_t redundancy, uint32_t crashed_nodes, uint32_t num_nodes)
-{
- double old_redundancy = redundancy;
- double old_per_node_doc_ratio = old_redundancy / num_nodes;
- uint32_t new_nodes = num_nodes - crashed_nodes;
- double new_redundancy = std::min(redundancy, new_nodes);
- double new_per_node_doc_ratio = new_redundancy / new_nodes;
- double lost_docs_ratio = estimate_lost_docs_base_ratio(redundancy, crashed_nodes, num_nodes) * new_redundancy;
- double moved_ratio = (new_per_node_doc_ratio - old_per_node_doc_ratio) * new_nodes - lost_docs_ratio;
- LOG(info, "estimated_moved_docs_ratio_crash(%u,%u,%u)=%4.2f", redundancy, crashed_nodes, num_nodes, moved_ratio);
- return moved_ratio;
-}
-
-double
-estimate_moved_docs_ratio_replace(uint32_t redundancy, uint32_t added_nodes, uint32_t retired_nodes, uint32_t num_nodes)
-{
- uint32_t old_nodes = num_nodes - added_nodes;
- double old_redundancy = std::min(redundancy, old_nodes);
- double old_per_node_doc_ratio = old_redundancy / old_nodes;
- uint32_t new_nodes = num_nodes - retired_nodes;
- double new_redundancy = std::min(redundancy, new_nodes);
- double new_per_node_doc_ratio = new_redundancy / new_nodes;
- double moved_ratio = new_per_node_doc_ratio * added_nodes;
- uint32_t stable_nodes = num_nodes - added_nodes - retired_nodes;
- // Account for extra documents moved from retired nodes to stable nodes
- double extra_per_stable_node_doc_ratio = new_per_node_doc_ratio * added_nodes / old_nodes;
- double extra_moved_ratio = (std::min(1.0, new_per_node_doc_ratio + extra_per_stable_node_doc_ratio) - old_per_node_doc_ratio) * stable_nodes;
- moved_ratio += extra_moved_ratio;
- LOG(info, "estimated_moved_docs_ratio_replace(%u,%u,%u,%u)=%4.2f, (of which %4.2f extra)", redundancy, added_nodes, retired_nodes, num_nodes, moved_ratio, extra_moved_ratio);
- return moved_ratio;
-}
-
class BMParams : public BmClusterParams,
public BmFeedParams
{
@@ -391,7 +325,7 @@ Benchmark::estimate_lost_docs()
case Mode::TEMP_CRASH:
{
double new_redundancy = std::min(_params.get_redundancy(), _params.get_num_nodes() - _params.get_flip_nodes());
- auto lost_docs_ratio = estimate_lost_docs_base_ratio(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()) * new_redundancy;
+ auto lost_docs_ratio = EstimateMovedDocsRatio().estimate_lost_docs_base_ratio(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes()) * new_redundancy;
return _params.get_documents() * lost_docs_ratio;
}
default:
@@ -404,14 +338,21 @@ Benchmark::estimate_moved_docs()
{
switch(_params.get_mode()) {
case Mode::GROW:
- return _params.get_documents() * estimate_moved_docs_ratio_grow(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes());
+ return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_grow(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes());
case Mode::SHRINK:
- return _params.get_documents() * estimate_moved_docs_ratio_shrink(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes());
+ return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_shrink(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes());
case Mode::PERM_CRASH:
case Mode::TEMP_CRASH:
- return _params.get_documents() * estimate_moved_docs_ratio_crash(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes());
+ return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_crash(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_num_nodes());
case Mode::REPLACE:
- return _params.get_documents() * estimate_moved_docs_ratio_replace(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_flip_nodes(), _params.get_num_nodes());
+ if (_params.get_num_nodes() < 10) {
+ // Calculate better estimate for moved docs ratio with brute force
+ auto scanner = CalculateMovedDocsRatio::make_replace_calculator(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_flip_nodes(), _params.get_num_nodes());
+ scanner.scan();
+ return _params.get_documents() * scanner.get_moved_docs_ratio();
+ } else {
+ return _params.get_documents() * EstimateMovedDocsRatio().estimate_moved_docs_ratio_replace(_params.get_redundancy(), _params.get_flip_nodes(), _params.get_flip_nodes(), _params.get_num_nodes());
+ }
default:
return 0.0;
}