From a5f49846238184c86a58147053e0f4aa2edd9cd7 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Tue, 12 Feb 2019 15:32:20 +0000 Subject: Derive correct distribution partition spec for grouped clusters Simplify code by emitting wildcards for all groups instead of using explicit leaf counts. Distribution code will distribute replicas evenly across all wildcarded groups. This fixes #8475 --- ...al_bucket_space_distribution_converter_test.cpp | 38 +++++++++++----------- .../global_bucket_space_distribution_converter.cpp | 20 +++++++----- vdslib/src/tests/state/grouptest.cpp | 14 ++++++++ 3 files changed, 45 insertions(+), 27 deletions(-) diff --git a/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp index 60e54dbec39..5afea9cd3cd 100644 --- a/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp +++ b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp @@ -4,7 +4,6 @@ #include #include #include -#include namespace storage { @@ -119,7 +118,7 @@ distributor_auto_ownership_transfer_on_whole_group_down true group[0].index "invalid" group[0].name "invalid" group[0].capacity 1 -group[0].partitions "3|3|*" +group[0].partitions "*|*" group[1].index "0" group[1].name "rack0" group[1].capacity 1 @@ -165,7 +164,7 @@ group[3].name rack1 group[3].index 0.1 group[3].nodes[1] group[3].nodes[0].index 1 -group[4].name switch0 +group[4].name switch1 group[4].index 1 group[4].partitions * group[4].nodes[0] @@ -190,11 +189,11 @@ distributor_auto_ownership_transfer_on_whole_group_down true group[0].index "invalid" group[0].name "invalid" group[0].capacity 1 -group[0].partitions "2|2|*" +group[0].partitions "*|*" group[1].index "0" group[1].name "switch0" group[1].capacity 1 -group[1].partitions "1|1|*" +group[1].partitions "*|*" group[2].index "0.0" group[2].name "rack0" group[2].capacity 1 @@ -208,9 +207,9 @@ group[3].partitions "" group[3].nodes[0].index 1 group[3].nodes[0].retired false group[4].index "1" -group[4].name "switch0" +group[4].name "switch1" group[4].capacity 1 -group[4].partitions "1|1|*" +group[4].partitions "*|*" group[5].index "1.0" group[5].name "rack0" group[5].capacity 1 @@ -228,6 +227,9 @@ disk_distribution MODULO_BID CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config)); } +// FIXME partition specs are order-invariant with regards to groups, so heterogenous +// setups will not produce the expected replica distribution. +// TODO Consider disallowing entirely when using global docs. void GlobalBucketSpaceDistributionConverterTest::can_transform_heterogenous_multi_group_config() { vespalib::string default_config( R"(redundancy 2 @@ -235,16 +237,16 @@ ready_copies 2 group[3] group[0].name "invalid" group[0].index "invalid" -group[0].partitions 1|* +group[0].partitions "1|*" group[0].nodes[0] group[1].name rack0 group[1].index 0 -group[1].nodes[1] +group[1].nodes[2] group[1].nodes[0].index 0 +group[1].nodes[1].index 1 group[2].name rack1 group[2].index 1 -group[2].nodes[2] -group[2].nodes[0].index 1 +group[2].nodes[1] group[2].nodes[1].index 2 )"); @@ -258,21 +260,21 @@ distributor_auto_ownership_transfer_on_whole_group_down true group[0].index "invalid" group[0].name "invalid" group[0].capacity 1 -group[0].partitions "1|2|*" +group[0].partitions "*|*" group[1].index "0" group[1].name "rack0" group[1].capacity 1 group[1].partitions "" group[1].nodes[0].index 0 group[1].nodes[0].retired false +group[1].nodes[1].index 1 +group[1].nodes[1].retired false group[2].index "1" group[2].name "rack1" group[2].capacity 1 group[2].partitions "" -group[2].nodes[0].index 1 +group[2].nodes[0].index 2 group[2].nodes[0].retired false -group[2].nodes[1].index 2 -group[2].nodes[1].retired false disk_distribution MODULO_BID )"); CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config)); @@ -367,10 +369,8 @@ group[2].nodes[1].index 2 lib::Distribution global_distr(*global_cfg); lib::ClusterState state("distributor:6 storage:6"); - std::mt19937 rng; - std::uniform_int_distribution d(0, UINT64_MAX); - for (int i = 0; i < 100; ++i) { - document::BucketId bucket(16, d(rng)); + for (unsigned int i = 0; i < UINT16_MAX; ++i) { + document::BucketId bucket(16, i); const auto default_index = default_distr.getIdealDistributorNode(state, bucket, "ui"); const auto global_index = global_distr.getIdealDistributorNode(state, bucket, "ui"); CPPUNIT_ASSERT_EQUAL(default_index, global_index); diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp index 5734fb6bb51..534644458bc 100644 --- a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp +++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp @@ -44,15 +44,19 @@ const Group& find_non_root_group_by_index(const vespalib::string& index, const G } vespalib::string sub_groups_to_partition_spec(const Group& parent) { - vespalib::asciistream partitions; - // In case of a flat cluster config, this ends up with a partition spec of '*', - // which is fine. It basically means "put all replicas in this group", which - // happens to be exactly what we want. - for (auto& child : parent.sub_groups) { - partitions << child.second->nested_leaf_count << '|'; + if (parent.sub_groups.empty()) { + return "*"; } - partitions << '*'; - return partitions.str(); + vespalib::asciistream spec; + // We simplify the generated partition spec by only emitting wildcard entries. + // These will have replicas evenly divided amongst them. + for (size_t i = 0; i < parent.sub_groups.size(); ++i) { + if (i != 0) { + spec << '|'; + } + spec << '*'; + } + return spec.str(); } bool is_leaf_group(const DistributionConfigBuilder::Group& g) noexcept { diff --git a/vdslib/src/tests/state/grouptest.cpp b/vdslib/src/tests/state/grouptest.cpp index 282e91e860f..94694154b83 100644 --- a/vdslib/src/tests/state/grouptest.cpp +++ b/vdslib/src/tests/state/grouptest.cpp @@ -180,6 +180,12 @@ GroupTest::testOperators() void GroupTest::testStarConversion() { + { + MAKEGROUP(g, "group", 0, "*"); + std::vector distribution = g.getDistribution(3); + CPPUNIT_ASSERT_EQUAL((size_t) 1, distribution.size()); + CPPUNIT_ASSERT_EQUAL((double) 3, distribution[0]); + } { MAKEGROUP(g, "group", 0, "1|*|*"); std::vector distribution = g.getDistribution(5); @@ -244,6 +250,14 @@ GroupTest::testStarConversion() CPPUNIT_ASSERT_EQUAL((double) 2, distribution[1]); CPPUNIT_ASSERT_EQUAL((double) 1, distribution[2]); } + { + MAKEGROUP(g, "group", 0, "*|*|*"); + std::vector distribution = g.getDistribution(12); // Shall be evenly divided + CPPUNIT_ASSERT_EQUAL((size_t) 3, distribution.size()); + CPPUNIT_ASSERT_EQUAL((double) 4, distribution[0]); + CPPUNIT_ASSERT_EQUAL((double) 4, distribution[1]); + CPPUNIT_ASSERT_EQUAL((double) 4, distribution[2]); + } { MAKEGROUP(g, "group", 0, "*|*|*|*"); std::vector distribution = g.getDistribution(5); -- cgit v1.2.3