From c5a85e80e30700f3a14d58fa4c7b3fde4b928e1a Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Thu, 30 Nov 2017 16:25:08 +0000 Subject: Add utility for transforming default distribution config to global Global distribution config in this context means a config that places a replica of every bucket on every node in the group hierarchy. --- storage/src/tests/common/CMakeLists.txt | 1 + ...al_bucket_space_distribution_converter_test.cpp | 385 +++++++++++++++++++++ storage/src/vespa/storage/common/CMakeLists.txt | 1 + .../global_bucket_space_distribution_converter.cpp | 157 +++++++++ .../global_bucket_space_distribution_converter.h | 16 + 5 files changed, 560 insertions(+) create mode 100644 storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp create mode 100644 storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp create mode 100644 storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h (limited to 'storage') diff --git a/storage/src/tests/common/CMakeLists.txt b/storage/src/tests/common/CMakeLists.txt index 8dd4e969d04..991726c935b 100644 --- a/storage/src/tests/common/CMakeLists.txt +++ b/storage/src/tests/common/CMakeLists.txt @@ -2,6 +2,7 @@ vespa_add_library(storage_testcommon TEST SOURCES dummystoragelink.cpp + global_bucket_space_distribution_converter_test.cpp metricstest.cpp storagelinktest.cpp testhelper.cpp diff --git a/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp new file mode 100644 index 00000000000..d34bed304a0 --- /dev/null +++ b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp @@ -0,0 +1,385 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include + +namespace storage { + +struct GlobalBucketSpaceDistributionConverterTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(GlobalBucketSpaceDistributionConverterTest); + CPPUNIT_TEST(can_transform_flat_cluster_config); + CPPUNIT_TEST(can_transform_single_level_multi_group_config); + CPPUNIT_TEST(can_transform_multi_level_multi_group_config); + CPPUNIT_TEST(can_transform_heterogenous_multi_group_config); + CPPUNIT_TEST(config_retired_state_is_propagated); + CPPUNIT_TEST(group_capacities_are_propagated); + CPPUNIT_TEST(global_distribution_has_same_owner_distributors_as_default); + CPPUNIT_TEST_SUITE_END(); + + void can_transform_flat_cluster_config(); + void can_transform_single_level_multi_group_config(); + void can_transform_multi_level_multi_group_config(); + void can_transform_heterogenous_multi_group_config(); + void config_retired_state_is_propagated(); + void group_capacities_are_propagated(); + void global_distribution_has_same_owner_distributors_as_default(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(GlobalBucketSpaceDistributionConverterTest); + +using DistributionConfig = vespa::config::content::StorDistributionConfig; + +namespace { + +std::unique_ptr string_to_config(const vespalib::string& cfg) { + vespalib::asciistream iss(cfg); + config::AsciiConfigReader reader(iss); + return reader.read(); +} + +vespalib::string config_to_string(const DistributionConfig& cfg) { + vespalib::asciistream ost; + config::AsciiConfigWriter writer(ost); + writer.write(cfg); + return ost.str(); +} + +vespalib::string default_to_global_config(const vespalib::string& default_config) { + auto default_cfg = string_to_config(default_config); + auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg); + return config_to_string(*as_global); +} + +} + +void GlobalBucketSpaceDistributionConverterTest::can_transform_flat_cluster_config() { + vespalib::string default_config( +R"(redundancy 1 +group[1] +group[0].name "invalid" +group[0].index "invalid" +group[0].partitions 1|* +group[0].nodes[3] +group[0].nodes[0].index 0 +group[0].nodes[1].index 1 +group[0].nodes[2].index 2 +)"); + + vespalib::string expected_global_config( +R"(redundancy 3 +initial_redundancy 0 +ensure_primary_persisted true +ready_copies 3 +active_per_leaf_group true +distributor_auto_ownership_transfer_on_whole_group_down true +group[0].index "invalid" +group[0].name "invalid" +group[0].capacity 1 +group[0].partitions "*" +group[0].nodes[0].index 0 +group[0].nodes[0].retired false +group[0].nodes[1].index 1 +group[0].nodes[1].retired false +group[0].nodes[2].index 2 +group[0].nodes[2].retired false +disk_distribution MODULO_BID +)"); + CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config)); +} + + +void GlobalBucketSpaceDistributionConverterTest::can_transform_single_level_multi_group_config() { + vespalib::string default_config( +R"(redundancy 2 +group[3] +group[0].name "invalid" +group[0].index "invalid" +group[0].partitions 1|* +group[0].nodes[0] +group[1].name rack0 +group[1].index 0 +group[1].nodes[3] +group[1].nodes[0].index 0 +group[1].nodes[1].index 1 +group[1].nodes[2].index 2 +group[2].name rack1 +group[2].index 1 +group[2].nodes[3] +group[2].nodes[0].index 3 +group[2].nodes[1].index 4 +group[2].nodes[2].index 5 +)"); + + // The config converter cannot distinguish between default values + // and explicitly set ones, so we get a few more entries in our output + // config string. + // Most crucial parts of the transformed config is the root redundancy + // and the new partition config. We test _all_ config fields here so that + // we catch anything we miss transferring state of. + vespalib::string expected_global_config( +R"(redundancy 6 +initial_redundancy 0 +ensure_primary_persisted true +ready_copies 6 +active_per_leaf_group true +distributor_auto_ownership_transfer_on_whole_group_down true +group[0].index "invalid" +group[0].name "invalid" +group[0].capacity 1 +group[0].partitions "3|3|*" +group[1].index "0" +group[1].name "rack0" +group[1].capacity 1 +group[1].partitions "" +group[1].nodes[0].index 0 +group[1].nodes[0].retired false +group[1].nodes[1].index 1 +group[1].nodes[1].retired false +group[1].nodes[2].index 2 +group[1].nodes[2].retired false +group[2].index "1" +group[2].name "rack1" +group[2].capacity 1 +group[2].partitions "" +group[2].nodes[0].index 3 +group[2].nodes[0].retired false +group[2].nodes[1].index 4 +group[2].nodes[1].retired false +group[2].nodes[2].index 5 +group[2].nodes[2].retired false +disk_distribution MODULO_BID +)"); + CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config)); +} + +void GlobalBucketSpaceDistributionConverterTest::can_transform_multi_level_multi_group_config() { + vespalib::string default_config( +R"(redundancy 2 +group[5] +group[0].name "invalid" +group[0].index "invalid" +group[0].partitions *|* +group[0].nodes[0] +group[1].name switch0 +group[1].index 0 +group[1].partitions 1|* +group[1].nodes[0] +group[2].name rack0 +group[2].index 0.0 +group[2].nodes[1] +group[2].nodes[0].index 0 +group[3].name rack1 +group[3].index 0.1 +group[3].nodes[1] +group[3].nodes[0].index 1 +group[4].name switch0 +group[4].index 1 +group[4].partitions * +group[4].nodes[0] +group[5].name rack0 +group[5].index 1.0 +group[5].nodes[1] +group[5].nodes[0].index 2 +group[6].name rack1 +group[6].index 1.1 +group[6].nodes[1] +group[6].nodes[0].index 3 +)"); + + // Note: leaf groups do not have a partition spec, only inner groups. + vespalib::string expected_global_config( +R"(redundancy 4 +initial_redundancy 0 +ensure_primary_persisted true +ready_copies 4 +active_per_leaf_group true +distributor_auto_ownership_transfer_on_whole_group_down true +group[0].index "invalid" +group[0].name "invalid" +group[0].capacity 1 +group[0].partitions "2|2|*" +group[1].index "0" +group[1].name "switch0" +group[1].capacity 1 +group[1].partitions "1|1|*" +group[2].index "0.0" +group[2].name "rack0" +group[2].capacity 1 +group[2].partitions "" +group[2].nodes[0].index 0 +group[2].nodes[0].retired false +group[3].index "0.1" +group[3].name "rack1" +group[3].capacity 1 +group[3].partitions "" +group[3].nodes[0].index 1 +group[3].nodes[0].retired false +group[4].index "1" +group[4].name "switch0" +group[4].capacity 1 +group[4].partitions "1|1|*" +group[5].index "1.0" +group[5].name "rack0" +group[5].capacity 1 +group[5].partitions "" +group[5].nodes[0].index 2 +group[5].nodes[0].retired false +group[6].index "1.1" +group[6].name "rack1" +group[6].capacity 1 +group[6].partitions "" +group[6].nodes[0].index 3 +group[6].nodes[0].retired false +disk_distribution MODULO_BID +)"); + CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config)); +} + +void GlobalBucketSpaceDistributionConverterTest::can_transform_heterogenous_multi_group_config() { + vespalib::string default_config( +R"(redundancy 2 +ready_copies 2 +group[3] +group[0].name "invalid" +group[0].index "invalid" +group[0].partitions 1|* +group[0].nodes[0] +group[1].name rack0 +group[1].index 0 +group[1].nodes[1] +group[1].nodes[0].index 0 +group[2].name rack1 +group[2].index 1 +group[2].nodes[2] +group[2].nodes[0].index 1 +group[2].nodes[1].index 2 +)"); + + vespalib::string expected_global_config( +R"(redundancy 3 +initial_redundancy 0 +ensure_primary_persisted true +ready_copies 3 +active_per_leaf_group true +distributor_auto_ownership_transfer_on_whole_group_down true +group[0].index "invalid" +group[0].name "invalid" +group[0].capacity 1 +group[0].partitions "1|2|*" +group[1].index "0" +group[1].name "rack0" +group[1].capacity 1 +group[1].partitions "" +group[1].nodes[0].index 0 +group[1].nodes[0].retired false +group[2].index "1" +group[2].name "rack1" +group[2].capacity 1 +group[2].partitions "" +group[2].nodes[0].index 1 +group[2].nodes[0].retired false +group[2].nodes[1].index 2 +group[2].nodes[1].retired false +disk_distribution MODULO_BID +)"); + CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config)); +} + +void GlobalBucketSpaceDistributionConverterTest::config_retired_state_is_propagated() { + vespalib::string default_config( +R"(redundancy 1 +group[1] +group[0].name "invalid" +group[0].index "invalid" +group[0].partitions 1|* +group[0].nodes[3] +group[0].nodes[0].index 0 +group[0].nodes[0].retired false +group[0].nodes[1].index 1 +group[0].nodes[1].retired true +group[0].nodes[2].index 2 +group[0].nodes[2].retired true +)"); + + auto default_cfg = string_to_config(default_config); + auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg); + + CPPUNIT_ASSERT_EQUAL(size_t(1), as_global->group.size()); + CPPUNIT_ASSERT_EQUAL(size_t(3), as_global->group[0].nodes.size()); + CPPUNIT_ASSERT_EQUAL(false, as_global->group[0].nodes[0].retired); + CPPUNIT_ASSERT_EQUAL(true, as_global->group[0].nodes[1].retired); + CPPUNIT_ASSERT_EQUAL(true, as_global->group[0].nodes[2].retired); +} + +void GlobalBucketSpaceDistributionConverterTest::group_capacities_are_propagated() { + vespalib::string default_config( +R"(redundancy 2 +group[3] +group[0].name "invalid" +group[0].index "invalid" +group[0].partitions 1|* +group[0].capacity 5 +group[0].nodes[0] +group[1].name rack0 +group[1].index 0 +group[1].capacity 2 +group[1].nodes[1] +group[1].nodes[0].index 0 +group[2].name rack1 +group[2].capacity 3 +group[2].index 1 +group[2].nodes[1] +group[2].nodes[0].index 1 +)"); + auto default_cfg = string_to_config(default_config); + auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg); + + CPPUNIT_ASSERT_EQUAL(size_t(3), as_global->group.size()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(5.0, as_global->group[0].capacity, 0.00001); + CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, as_global->group[1].capacity, 0.00001); + CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, as_global->group[2].capacity, 0.00001); +} + +void GlobalBucketSpaceDistributionConverterTest::global_distribution_has_same_owner_distributors_as_default() { + vespalib::string default_config( +R"(redundancy 2 +ready_copies 2 +group[3] +group[0].name "invalid" +group[0].index "invalid" +group[0].partitions 1|* +group[0].nodes[0] +group[1].name rack0 +group[1].index 0 +group[1].nodes[1] +group[1].nodes[0].index 0 +group[2].name rack1 +group[2].index 1 +group[2].nodes[2] +group[2].nodes[0].index 1 +group[2].nodes[1].index 2 +)"); + + auto default_cfg = string_to_config(default_config); + auto global_cfg = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg); + + lib::Distribution default_distr(*default_cfg); + lib::Distribution global_distr(*global_cfg); + lib::ClusterState state("distributor:6 storage:6"); + + std::mt19937 rng; + std::uniform_int_distribution d(0, UINT64_MAX); + for (int i = 0; i < 100; ++i) { + document::BucketId bucket(16, d(rng)); + const auto default_index = default_distr.getIdealDistributorNode(state, bucket, "ui"); + const auto global_index = global_distr.getIdealDistributorNode(state, bucket, "ui"); + CPPUNIT_ASSERT_EQUAL(default_index, global_index); + } +} + +} \ No newline at end of file diff --git a/storage/src/vespa/storage/common/CMakeLists.txt b/storage/src/vespa/storage/common/CMakeLists.txt index b98058b3c3d..c53aead2ba2 100644 --- a/storage/src/vespa/storage/common/CMakeLists.txt +++ b/storage/src/vespa/storage/common/CMakeLists.txt @@ -6,6 +6,7 @@ vespa_add_library(storage_common OBJECT content_bucket_space.cpp content_bucket_space_repo.cpp distributorcomponent.cpp + global_bucket_space_distribution_converter.cpp messagebucket.cpp messagesender.cpp servicelayercomponent.cpp diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp new file mode 100644 index 00000000000..d8a3dd4780f --- /dev/null +++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp @@ -0,0 +1,157 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "global_bucket_space_distribution_converter.h" +#include +#include +#include +#include +#include +#include + +namespace storage { + +using DistributionConfig = vespa::config::content::StorDistributionConfig; +using DistributionConfigBuilder = vespa::config::content::StorDistributionConfigBuilder; + +namespace { + +struct Group { + uint16_t nested_leaf_count{0}; + std::map> sub_groups; +}; + +void set_distribution_invariant_config_fields(DistributionConfigBuilder& builder, const DistributionConfig& source) { + builder.diskDistribution = source.diskDistribution; + builder.distributorAutoOwnershipTransferOnWholeGroupDown = true; + builder.activePerLeafGroup = true; + // TODO consider how to best support n-of-m replication for global docs + builder.ensurePrimaryPersisted = true; + builder.initialRedundancy = 0; +} + +const Group& find_non_root_group_by_index(const vespalib::string& index, const Group& root) { + auto path = lib::DistributionConfigUtil::getGroupPath(index); + auto* node = &root; + for (auto idx : path) { + auto child_iter = node->sub_groups.find(idx); + assert(child_iter != node->sub_groups.end()); + node = child_iter->second.get(); + } + return *node; +} + +vespalib::string sub_groups_to_partition_spec(const Group& parent) { + vespalib::asciistream partitions; + // In case of a flat cluster config, this ends up with a partition spec of '*', + // which is fine. It basically means "put all replicas in this group", which + // happens to be exactly what we want. + for (auto& child : parent.sub_groups) { + partitions << child.second->nested_leaf_count << '|'; + } + partitions << '*'; + return partitions.str(); +} + +bool is_leaf_group(const DistributionConfigBuilder::Group& g) noexcept { + return !g.nodes.empty(); +} + +void insert_new_group_into_tree( + std::unique_ptr new_group, + const DistributionConfigBuilder::Group& config_source_group, + Group& root) { + const auto path = lib::DistributionConfigUtil::getGroupPath(config_source_group.index); + assert(!path.empty()); + + Group* parent = &root; + for (size_t i = 0; i < path.size(); ++i) { + const auto idx = path[i]; + parent->nested_leaf_count += config_source_group.nodes.size(); // Empty if added group is not a leaf. + auto g_iter = parent->sub_groups.find(idx); + if (g_iter != parent->sub_groups.end()) { + assert(i != path.size() - 1); + parent = g_iter->second.get(); + } else { + assert(i == path.size() - 1); // Only valid case for last item in path. + parent->sub_groups.emplace(path.back(), std::move(new_group)); + } + } +} + +void build_transformed_root_group(DistributionConfigBuilder& builder, + const DistributionConfigBuilder::Group& config_source_root, + const Group& parsed_root) { + DistributionConfigBuilder::Group new_root(config_source_root); + new_root.partitions = sub_groups_to_partition_spec(parsed_root); + builder.group.emplace_back(std::move(new_root)); +} + +void build_transformed_non_root_group(DistributionConfigBuilder& builder, + const DistributionConfigBuilder::Group& config_source_group, + const Group& parsed_root) { + DistributionConfigBuilder::Group new_group(config_source_group); + if (!is_leaf_group(config_source_group)) { // Partition specs only apply to inner nodes + const auto& g = find_non_root_group_by_index(config_source_group.index, parsed_root); + new_group.partitions = sub_groups_to_partition_spec(g); + } + builder.group.emplace_back(std::move(new_group)); +} + +std::unique_ptr create_group_tree_from_config(const DistributionConfig& source) { + std::unique_ptr root; + for (auto& g : source.group) { + auto new_group = std::make_unique(); + assert(g.nodes.size() < UINT16_MAX); + new_group->nested_leaf_count = static_cast(g.nodes.size()); + if (root) { + insert_new_group_into_tree(std::move(new_group), g, *root); + } else { + root = std::move(new_group); + } + } + return root; +} + +/* Even though groups are inherently hierarchical, the config is a flat array with a + * hierarchy bolted on through the use of (more or less) "multi-dimensional" index strings. + * Index string of root group is always "invalid" (or possibly some other string that cannot + * be interpreted as a dot-separated tree node path). Other groups have an index of the + * form "X.Y.Z", where Z is the group's immediate parent index, Y is Z's parent and so on. Just + * stating Z itself is not sufficient to uniquely identify the group, as group indices are + * not unique _across_ groups. For indices "0.1" and "1.1", the trailing "1" refers to 2 + * distinct groups, as they have different parents. + * + * It may be noted that the group index strings do _not_ include the root group, so we + * have to always implicitly include it ourselves. + * + * Config groups are ordered so that when a group is encountered, all its parents (and + * transitively, its parents again etc) have already been processed. This directly + * implies that the root group is always the first group present in the config. + */ +void build_global_groups(DistributionConfigBuilder& builder, const DistributionConfig& source) { + assert(!source.group.empty()); // TODO gracefully handle empty config? + auto root = create_group_tree_from_config(source); + + auto g_iter = source.group.begin(); + const auto g_end = source.group.end(); + build_transformed_root_group(builder, *g_iter, *root); + ++g_iter; + for (; g_iter != g_end; ++g_iter) { + build_transformed_non_root_group(builder, *g_iter, *root); + } + + builder.redundancy = root->nested_leaf_count; + builder.readyCopies = builder.redundancy; +} + +} // anon ns + +std::shared_ptr +GlobalBucketSpaceDistributionConverter::convert_to_global(const DistributionConfig& source) { + DistributionConfigBuilder builder; + set_distribution_invariant_config_fields(builder, source); + build_global_groups(builder, source); + return std::make_shared(builder); +} + +} \ No newline at end of file diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h new file mode 100644 index 00000000000..32a43b3081e --- /dev/null +++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h @@ -0,0 +1,16 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace storage { + +struct GlobalBucketSpaceDistributionConverter { + using DistributionConfig = vespa::config::content::StorDistributionConfig; + static std::shared_ptr convert_to_global(const DistributionConfig&); +}; + +} \ No newline at end of file -- cgit v1.2.3