summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@oath.com>2017-12-04 10:59:46 +0100
committerGitHub <noreply@github.com>2017-12-04 10:59:46 +0100
commitd164fbb93e277ef23ab610320a7cf8556e3c036e (patch)
tree525da75cd979d3fab30b5dbc7f6a7a3542d54086 /storage
parent0fc8970a2e11d5e2374f85e969098b114d014394 (diff)
parentc5a85e80e30700f3a14d58fa4c7b3fde4b928e1a (diff)
Merge pull request #4328 from vespa-engine/vekterli/add-global-distribution-transformation-utility
Add global distribution config transformation utility
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/common/CMakeLists.txt1
-rw-r--r--storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp385
-rw-r--r--storage/src/vespa/storage/common/CMakeLists.txt1
-rw-r--r--storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp157
-rw-r--r--storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h16
5 files changed, 560 insertions, 0 deletions
diff --git a/storage/src/tests/common/CMakeLists.txt b/storage/src/tests/common/CMakeLists.txt
index 8dd4e969d04..991726c935b 100644
--- a/storage/src/tests/common/CMakeLists.txt
+++ b/storage/src/tests/common/CMakeLists.txt
@@ -2,6 +2,7 @@
vespa_add_library(storage_testcommon TEST
SOURCES
dummystoragelink.cpp
+ global_bucket_space_distribution_converter_test.cpp
metricstest.cpp
storagelinktest.cpp
testhelper.cpp
diff --git a/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp
new file mode 100644
index 00000000000..d34bed304a0
--- /dev/null
+++ b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp
@@ -0,0 +1,385 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/storage/common/global_bucket_space_distribution_converter.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/config/config.h>
+#include <vespa/config/print/asciiconfigwriter.h>
+#include <vespa/config/print/asciiconfigreader.h>
+#include <vespa/vdslib/state/clusterstate.h>
+#include <random>
+
+namespace storage {
+
+struct GlobalBucketSpaceDistributionConverterTest : public CppUnit::TestFixture {
+ CPPUNIT_TEST_SUITE(GlobalBucketSpaceDistributionConverterTest);
+ CPPUNIT_TEST(can_transform_flat_cluster_config);
+ CPPUNIT_TEST(can_transform_single_level_multi_group_config);
+ CPPUNIT_TEST(can_transform_multi_level_multi_group_config);
+ CPPUNIT_TEST(can_transform_heterogenous_multi_group_config);
+ CPPUNIT_TEST(config_retired_state_is_propagated);
+ CPPUNIT_TEST(group_capacities_are_propagated);
+ CPPUNIT_TEST(global_distribution_has_same_owner_distributors_as_default);
+ CPPUNIT_TEST_SUITE_END();
+
+ void can_transform_flat_cluster_config();
+ void can_transform_single_level_multi_group_config();
+ void can_transform_multi_level_multi_group_config();
+ void can_transform_heterogenous_multi_group_config();
+ void config_retired_state_is_propagated();
+ void group_capacities_are_propagated();
+ void global_distribution_has_same_owner_distributors_as_default();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(GlobalBucketSpaceDistributionConverterTest);
+
+using DistributionConfig = vespa::config::content::StorDistributionConfig;
+
+namespace {
+
+std::unique_ptr<DistributionConfig> string_to_config(const vespalib::string& cfg) {
+ vespalib::asciistream iss(cfg);
+ config::AsciiConfigReader<vespa::config::content::StorDistributionConfig> reader(iss);
+ return reader.read();
+}
+
+vespalib::string config_to_string(const DistributionConfig& cfg) {
+ vespalib::asciistream ost;
+ config::AsciiConfigWriter writer(ost);
+ writer.write(cfg);
+ return ost.str();
+}
+
+vespalib::string default_to_global_config(const vespalib::string& default_config) {
+ auto default_cfg = string_to_config(default_config);
+ auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg);
+ return config_to_string(*as_global);
+}
+
+}
+
+void GlobalBucketSpaceDistributionConverterTest::can_transform_flat_cluster_config() {
+ vespalib::string default_config(
+R"(redundancy 1
+group[1]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].nodes[3]
+group[0].nodes[0].index 0
+group[0].nodes[1].index 1
+group[0].nodes[2].index 2
+)");
+
+ vespalib::string expected_global_config(
+R"(redundancy 3
+initial_redundancy 0
+ensure_primary_persisted true
+ready_copies 3
+active_per_leaf_group true
+distributor_auto_ownership_transfer_on_whole_group_down true
+group[0].index "invalid"
+group[0].name "invalid"
+group[0].capacity 1
+group[0].partitions "*"
+group[0].nodes[0].index 0
+group[0].nodes[0].retired false
+group[0].nodes[1].index 1
+group[0].nodes[1].retired false
+group[0].nodes[2].index 2
+group[0].nodes[2].retired false
+disk_distribution MODULO_BID
+)");
+ CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config));
+}
+
+
+void GlobalBucketSpaceDistributionConverterTest::can_transform_single_level_multi_group_config() {
+ vespalib::string default_config(
+R"(redundancy 2
+group[3]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].nodes[0]
+group[1].name rack0
+group[1].index 0
+group[1].nodes[3]
+group[1].nodes[0].index 0
+group[1].nodes[1].index 1
+group[1].nodes[2].index 2
+group[2].name rack1
+group[2].index 1
+group[2].nodes[3]
+group[2].nodes[0].index 3
+group[2].nodes[1].index 4
+group[2].nodes[2].index 5
+)");
+
+ // The config converter cannot distinguish between default values
+ // and explicitly set ones, so we get a few more entries in our output
+ // config string.
+ // Most crucial parts of the transformed config is the root redundancy
+ // and the new partition config. We test _all_ config fields here so that
+ // we catch anything we miss transferring state of.
+ vespalib::string expected_global_config(
+R"(redundancy 6
+initial_redundancy 0
+ensure_primary_persisted true
+ready_copies 6
+active_per_leaf_group true
+distributor_auto_ownership_transfer_on_whole_group_down true
+group[0].index "invalid"
+group[0].name "invalid"
+group[0].capacity 1
+group[0].partitions "3|3|*"
+group[1].index "0"
+group[1].name "rack0"
+group[1].capacity 1
+group[1].partitions ""
+group[1].nodes[0].index 0
+group[1].nodes[0].retired false
+group[1].nodes[1].index 1
+group[1].nodes[1].retired false
+group[1].nodes[2].index 2
+group[1].nodes[2].retired false
+group[2].index "1"
+group[2].name "rack1"
+group[2].capacity 1
+group[2].partitions ""
+group[2].nodes[0].index 3
+group[2].nodes[0].retired false
+group[2].nodes[1].index 4
+group[2].nodes[1].retired false
+group[2].nodes[2].index 5
+group[2].nodes[2].retired false
+disk_distribution MODULO_BID
+)");
+ CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config));
+}
+
+void GlobalBucketSpaceDistributionConverterTest::can_transform_multi_level_multi_group_config() {
+ vespalib::string default_config(
+R"(redundancy 2
+group[5]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions *|*
+group[0].nodes[0]
+group[1].name switch0
+group[1].index 0
+group[1].partitions 1|*
+group[1].nodes[0]
+group[2].name rack0
+group[2].index 0.0
+group[2].nodes[1]
+group[2].nodes[0].index 0
+group[3].name rack1
+group[3].index 0.1
+group[3].nodes[1]
+group[3].nodes[0].index 1
+group[4].name switch0
+group[4].index 1
+group[4].partitions *
+group[4].nodes[0]
+group[5].name rack0
+group[5].index 1.0
+group[5].nodes[1]
+group[5].nodes[0].index 2
+group[6].name rack1
+group[6].index 1.1
+group[6].nodes[1]
+group[6].nodes[0].index 3
+)");
+
+ // Note: leaf groups do not have a partition spec, only inner groups.
+ vespalib::string expected_global_config(
+R"(redundancy 4
+initial_redundancy 0
+ensure_primary_persisted true
+ready_copies 4
+active_per_leaf_group true
+distributor_auto_ownership_transfer_on_whole_group_down true
+group[0].index "invalid"
+group[0].name "invalid"
+group[0].capacity 1
+group[0].partitions "2|2|*"
+group[1].index "0"
+group[1].name "switch0"
+group[1].capacity 1
+group[1].partitions "1|1|*"
+group[2].index "0.0"
+group[2].name "rack0"
+group[2].capacity 1
+group[2].partitions ""
+group[2].nodes[0].index 0
+group[2].nodes[0].retired false
+group[3].index "0.1"
+group[3].name "rack1"
+group[3].capacity 1
+group[3].partitions ""
+group[3].nodes[0].index 1
+group[3].nodes[0].retired false
+group[4].index "1"
+group[4].name "switch0"
+group[4].capacity 1
+group[4].partitions "1|1|*"
+group[5].index "1.0"
+group[5].name "rack0"
+group[5].capacity 1
+group[5].partitions ""
+group[5].nodes[0].index 2
+group[5].nodes[0].retired false
+group[6].index "1.1"
+group[6].name "rack1"
+group[6].capacity 1
+group[6].partitions ""
+group[6].nodes[0].index 3
+group[6].nodes[0].retired false
+disk_distribution MODULO_BID
+)");
+ CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config));
+}
+
+void GlobalBucketSpaceDistributionConverterTest::can_transform_heterogenous_multi_group_config() {
+ vespalib::string default_config(
+R"(redundancy 2
+ready_copies 2
+group[3]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].nodes[0]
+group[1].name rack0
+group[1].index 0
+group[1].nodes[1]
+group[1].nodes[0].index 0
+group[2].name rack1
+group[2].index 1
+group[2].nodes[2]
+group[2].nodes[0].index 1
+group[2].nodes[1].index 2
+)");
+
+ vespalib::string expected_global_config(
+R"(redundancy 3
+initial_redundancy 0
+ensure_primary_persisted true
+ready_copies 3
+active_per_leaf_group true
+distributor_auto_ownership_transfer_on_whole_group_down true
+group[0].index "invalid"
+group[0].name "invalid"
+group[0].capacity 1
+group[0].partitions "1|2|*"
+group[1].index "0"
+group[1].name "rack0"
+group[1].capacity 1
+group[1].partitions ""
+group[1].nodes[0].index 0
+group[1].nodes[0].retired false
+group[2].index "1"
+group[2].name "rack1"
+group[2].capacity 1
+group[2].partitions ""
+group[2].nodes[0].index 1
+group[2].nodes[0].retired false
+group[2].nodes[1].index 2
+group[2].nodes[1].retired false
+disk_distribution MODULO_BID
+)");
+ CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config));
+}
+
+void GlobalBucketSpaceDistributionConverterTest::config_retired_state_is_propagated() {
+ vespalib::string default_config(
+R"(redundancy 1
+group[1]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].nodes[3]
+group[0].nodes[0].index 0
+group[0].nodes[0].retired false
+group[0].nodes[1].index 1
+group[0].nodes[1].retired true
+group[0].nodes[2].index 2
+group[0].nodes[2].retired true
+)");
+
+ auto default_cfg = string_to_config(default_config);
+ auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg);
+
+ CPPUNIT_ASSERT_EQUAL(size_t(1), as_global->group.size());
+ CPPUNIT_ASSERT_EQUAL(size_t(3), as_global->group[0].nodes.size());
+ CPPUNIT_ASSERT_EQUAL(false, as_global->group[0].nodes[0].retired);
+ CPPUNIT_ASSERT_EQUAL(true, as_global->group[0].nodes[1].retired);
+ CPPUNIT_ASSERT_EQUAL(true, as_global->group[0].nodes[2].retired);
+}
+
+void GlobalBucketSpaceDistributionConverterTest::group_capacities_are_propagated() {
+ vespalib::string default_config(
+R"(redundancy 2
+group[3]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].capacity 5
+group[0].nodes[0]
+group[1].name rack0
+group[1].index 0
+group[1].capacity 2
+group[1].nodes[1]
+group[1].nodes[0].index 0
+group[2].name rack1
+group[2].capacity 3
+group[2].index 1
+group[2].nodes[1]
+group[2].nodes[0].index 1
+)");
+ auto default_cfg = string_to_config(default_config);
+ auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg);
+
+ CPPUNIT_ASSERT_EQUAL(size_t(3), as_global->group.size());
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(5.0, as_global->group[0].capacity, 0.00001);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, as_global->group[1].capacity, 0.00001);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, as_global->group[2].capacity, 0.00001);
+}
+
+void GlobalBucketSpaceDistributionConverterTest::global_distribution_has_same_owner_distributors_as_default() {
+ vespalib::string default_config(
+R"(redundancy 2
+ready_copies 2
+group[3]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].nodes[0]
+group[1].name rack0
+group[1].index 0
+group[1].nodes[1]
+group[1].nodes[0].index 0
+group[2].name rack1
+group[2].index 1
+group[2].nodes[2]
+group[2].nodes[0].index 1
+group[2].nodes[1].index 2
+)");
+
+ auto default_cfg = string_to_config(default_config);
+ auto global_cfg = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg);
+
+ lib::Distribution default_distr(*default_cfg);
+ lib::Distribution global_distr(*global_cfg);
+ lib::ClusterState state("distributor:6 storage:6");
+
+ std::mt19937 rng;
+ std::uniform_int_distribution<uint64_t> d(0, UINT64_MAX);
+ for (int i = 0; i < 100; ++i) {
+ document::BucketId bucket(16, d(rng));
+ const auto default_index = default_distr.getIdealDistributorNode(state, bucket, "ui");
+ const auto global_index = global_distr.getIdealDistributorNode(state, bucket, "ui");
+ CPPUNIT_ASSERT_EQUAL(default_index, global_index);
+ }
+}
+
+} \ No newline at end of file
diff --git a/storage/src/vespa/storage/common/CMakeLists.txt b/storage/src/vespa/storage/common/CMakeLists.txt
index b98058b3c3d..c53aead2ba2 100644
--- a/storage/src/vespa/storage/common/CMakeLists.txt
+++ b/storage/src/vespa/storage/common/CMakeLists.txt
@@ -6,6 +6,7 @@ vespa_add_library(storage_common OBJECT
content_bucket_space.cpp
content_bucket_space_repo.cpp
distributorcomponent.cpp
+ global_bucket_space_distribution_converter.cpp
messagebucket.cpp
messagesender.cpp
servicelayercomponent.cpp
diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp
new file mode 100644
index 00000000000..d8a3dd4780f
--- /dev/null
+++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp
@@ -0,0 +1,157 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "global_bucket_space_distribution_converter.h"
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vdslib/distribution/distribution_config_util.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <cassert>
+#include <map>
+#include <memory>
+
+namespace storage {
+
+using DistributionConfig = vespa::config::content::StorDistributionConfig;
+using DistributionConfigBuilder = vespa::config::content::StorDistributionConfigBuilder;
+
+namespace {
+
+struct Group {
+ uint16_t nested_leaf_count{0};
+ std::map<uint16_t, std::unique_ptr<Group>> sub_groups;
+};
+
+void set_distribution_invariant_config_fields(DistributionConfigBuilder& builder, const DistributionConfig& source) {
+ builder.diskDistribution = source.diskDistribution;
+ builder.distributorAutoOwnershipTransferOnWholeGroupDown = true;
+ builder.activePerLeafGroup = true;
+ // TODO consider how to best support n-of-m replication for global docs
+ builder.ensurePrimaryPersisted = true;
+ builder.initialRedundancy = 0;
+}
+
+const Group& find_non_root_group_by_index(const vespalib::string& index, const Group& root) {
+ auto path = lib::DistributionConfigUtil::getGroupPath(index);
+ auto* node = &root;
+ for (auto idx : path) {
+ auto child_iter = node->sub_groups.find(idx);
+ assert(child_iter != node->sub_groups.end());
+ node = child_iter->second.get();
+ }
+ return *node;
+}
+
+vespalib::string sub_groups_to_partition_spec(const Group& parent) {
+ vespalib::asciistream partitions;
+ // In case of a flat cluster config, this ends up with a partition spec of '*',
+ // which is fine. It basically means "put all replicas in this group", which
+ // happens to be exactly what we want.
+ for (auto& child : parent.sub_groups) {
+ partitions << child.second->nested_leaf_count << '|';
+ }
+ partitions << '*';
+ return partitions.str();
+}
+
+bool is_leaf_group(const DistributionConfigBuilder::Group& g) noexcept {
+ return !g.nodes.empty();
+}
+
+void insert_new_group_into_tree(
+ std::unique_ptr<Group> new_group,
+ const DistributionConfigBuilder::Group& config_source_group,
+ Group& root) {
+ const auto path = lib::DistributionConfigUtil::getGroupPath(config_source_group.index);
+ assert(!path.empty());
+
+ Group* parent = &root;
+ for (size_t i = 0; i < path.size(); ++i) {
+ const auto idx = path[i];
+ parent->nested_leaf_count += config_source_group.nodes.size(); // Empty if added group is not a leaf.
+ auto g_iter = parent->sub_groups.find(idx);
+ if (g_iter != parent->sub_groups.end()) {
+ assert(i != path.size() - 1);
+ parent = g_iter->second.get();
+ } else {
+ assert(i == path.size() - 1); // Only valid case for last item in path.
+ parent->sub_groups.emplace(path.back(), std::move(new_group));
+ }
+ }
+}
+
+void build_transformed_root_group(DistributionConfigBuilder& builder,
+ const DistributionConfigBuilder::Group& config_source_root,
+ const Group& parsed_root) {
+ DistributionConfigBuilder::Group new_root(config_source_root);
+ new_root.partitions = sub_groups_to_partition_spec(parsed_root);
+ builder.group.emplace_back(std::move(new_root));
+}
+
+void build_transformed_non_root_group(DistributionConfigBuilder& builder,
+ const DistributionConfigBuilder::Group& config_source_group,
+ const Group& parsed_root) {
+ DistributionConfigBuilder::Group new_group(config_source_group);
+ if (!is_leaf_group(config_source_group)) { // Partition specs only apply to inner nodes
+ const auto& g = find_non_root_group_by_index(config_source_group.index, parsed_root);
+ new_group.partitions = sub_groups_to_partition_spec(g);
+ }
+ builder.group.emplace_back(std::move(new_group));
+}
+
+std::unique_ptr<Group> create_group_tree_from_config(const DistributionConfig& source) {
+ std::unique_ptr<Group> root;
+ for (auto& g : source.group) {
+ auto new_group = std::make_unique<Group>();
+ assert(g.nodes.size() < UINT16_MAX);
+ new_group->nested_leaf_count = static_cast<uint16_t>(g.nodes.size());
+ if (root) {
+ insert_new_group_into_tree(std::move(new_group), g, *root);
+ } else {
+ root = std::move(new_group);
+ }
+ }
+ return root;
+}
+
+/* Even though groups are inherently hierarchical, the config is a flat array with a
+ * hierarchy bolted on through the use of (more or less) "multi-dimensional" index strings.
+ * Index string of root group is always "invalid" (or possibly some other string that cannot
+ * be interpreted as a dot-separated tree node path). Other groups have an index of the
+ * form "X.Y.Z", where Z is the group's immediate parent index, Y is Z's parent and so on. Just
+ * stating Z itself is not sufficient to uniquely identify the group, as group indices are
+ * not unique _across_ groups. For indices "0.1" and "1.1", the trailing "1" refers to 2
+ * distinct groups, as they have different parents.
+ *
+ * It may be noted that the group index strings do _not_ include the root group, so we
+ * have to always implicitly include it ourselves.
+ *
+ * Config groups are ordered so that when a group is encountered, all its parents (and
+ * transitively, its parents again etc) have already been processed. This directly
+ * implies that the root group is always the first group present in the config.
+ */
+void build_global_groups(DistributionConfigBuilder& builder, const DistributionConfig& source) {
+ assert(!source.group.empty()); // TODO gracefully handle empty config?
+ auto root = create_group_tree_from_config(source);
+
+ auto g_iter = source.group.begin();
+ const auto g_end = source.group.end();
+ build_transformed_root_group(builder, *g_iter, *root);
+ ++g_iter;
+ for (; g_iter != g_end; ++g_iter) {
+ build_transformed_non_root_group(builder, *g_iter, *root);
+ }
+
+ builder.redundancy = root->nested_leaf_count;
+ builder.readyCopies = builder.redundancy;
+}
+
+} // anon ns
+
+std::shared_ptr<DistributionConfig>
+GlobalBucketSpaceDistributionConverter::convert_to_global(const DistributionConfig& source) {
+ DistributionConfigBuilder builder;
+ set_distribution_invariant_config_fields(builder, source);
+ build_global_groups(builder, source);
+ return std::make_shared<DistributionConfig>(builder);
+}
+
+} \ No newline at end of file
diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h
new file mode 100644
index 00000000000..32a43b3081e
--- /dev/null
+++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h
@@ -0,0 +1,16 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vdslib/distribution/distribution.h>
+#include <vespa/config-stor-distribution.h>
+#include <memory>
+
+namespace storage {
+
+struct GlobalBucketSpaceDistributionConverter {
+ using DistributionConfig = vespa::config::content::StorDistributionConfig;
+ static std::shared_ptr<DistributionConfig> convert_to_global(const DistributionConfig&);
+};
+
+} \ No newline at end of file