diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2020-06-25 15:12:39 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2020-06-29 11:14:25 +0000 |
commit | bfd64c5fd361d8d6a8bc6c217073f7786a6e045f (patch) | |
tree | 191c14a5c2745e44fb3421c82b27678817a264cb /vdslib | |
parent | e8eb7c3df52e9e3c8068d5f2932b6d255ba90868 (diff) |
Only sort once during ideal group calculations
Avoids invoking `std::sort` O(n) times in favor of just once.
Benchmark for 150 groups of 1 node each:
* Before: 0.0004381478 seconds per invocation
* After: 0.0000377917 seconds per invocation
Diffstat (limited to 'vdslib')
-rw-r--r-- | vdslib/src/tests/distribution/distributiontest.cpp | 106 | ||||
-rw-r--r-- | vdslib/src/vespa/vdslib/distribution/distribution.cpp | 45 |
2 files changed, 127 insertions, 24 deletions
diff --git a/vdslib/src/tests/distribution/distributiontest.cpp b/vdslib/src/tests/distribution/distributiontest.cpp index f0b48faebef..5a337433bdb 100644 --- a/vdslib/src/tests/distribution/distributiontest.cpp +++ b/vdslib/src/tests/distribution/distributiontest.cpp @@ -13,10 +13,14 @@ #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/stllike/lexical_cast.h> #include <vespa/vespalib/text/stringtokenizer.h> +#include <vespa/vespalib/util/benchmark_timer.h> +#include <gmock/gmock.h> #include <chrono> #include <thread> #include <fstream> +using namespace ::testing; + namespace storage::lib { template <typename T> @@ -1143,4 +1147,106 @@ TEST(DistributionTest, test_hierarchical_distribute_less_than_redundancy) } } +TEST(DistributionTest, wildcard_top_level_distribution_gives_expected_node_results) { + std::string raw_config = R"(redundancy 2 +initial_redundancy 2 +ensure_primary_persisted true +ready_copies 2 +active_per_leaf_group false +distributor_auto_ownership_transfer_on_whole_group_down true +group[0].index "invalid" +group[0].name "invalid" +group[0].capacity 5 +group[0].partitions "*" +group[1].index "0" +group[1].name "switch0" +group[1].capacity 3 +group[1].partitions "" +group[1].nodes[0].index 0 +group[1].nodes[0].retired false +group[1].nodes[1].index 1 +group[1].nodes[1].retired false +group[1].nodes[2].index 2 +group[1].nodes[2].retired false +group[2].index "1" +group[2].name "switch1" +group[2].capacity 2 +group[2].partitions "" +group[2].nodes[0].index 3 +group[2].nodes[0].retired false +group[2].nodes[1].index 4 +group[2].nodes[1].retired false +disk_distribution "MODULO_BID" +)"; + Distribution distr(raw_config); + ClusterState state("version:1 distributor:5 storage:5"); + + auto nodes_of = [&](uint32_t bucket){ + std::vector<uint16_t> actual; + distr.getIdealNodes(NodeType::STORAGE, state, document::BucketId(16, bucket), actual); + return actual; + }; + + EXPECT_THAT(nodes_of(1), ElementsAre(0, 2)); + EXPECT_THAT(nodes_of(2), ElementsAre(2, 0)); + EXPECT_THAT(nodes_of(3), ElementsAre(4, 3)); + EXPECT_THAT(nodes_of(4), ElementsAre(3, 4)); + EXPECT_THAT(nodes_of(5), ElementsAre(4, 3)); + EXPECT_THAT(nodes_of(6), ElementsAre(1, 0)); + EXPECT_THAT(nodes_of(7), ElementsAre(2, 0)); +} + +namespace { + +std::string generate_config_with_n_1node_groups(int n_groups) { + std::ostringstream config_os; + std::ostringstream partition_os; + for (int i = 0; i < n_groups - 1; ++i) { + partition_os << "1|"; + } + partition_os << '*'; + config_os << "redundancy " << n_groups << "\n" + << "initial_redundancy " << n_groups << "\n" + << "ensure_primary_persisted true\n" + << "ready_copies " << n_groups << "\n" + << "active_per_leaf_group true\n" + << "distributor_auto_ownership_transfer_on_whole_group_down true\n" + << "group[0].index \"invalid\"\n" + << "group[0].name \"invalid\"\n" + << "group[0].capacity " << n_groups << "\n" + << "group[0].partitions \"" << partition_os.str() << "\"\n"; + + for (int i = 0; i < n_groups; ++i) { + int g = i + 1; + config_os << "group[" << g << "].index \"" << i << "\"\n" + << "group[" << g << "].name \"group" << g << "\"\n" + << "group[" << g << "].capacity 1\n" + << "group[" << g << "].partitions \"\"\n" + << "group[" << g << "].nodes[0].index \"" << i << "\"\n" + << "group[" << g << "].nodes[0].retired false\n"; + } + return config_os.str(); +} + +std::string generate_state_with_n_nodes_up(int n_nodes) { + std::ostringstream state_os; + state_os << "version:1 bits:8 distributor:" << n_nodes << " storage:" << n_nodes; + return state_os.str(); +} + +} + +TEST(DistributionTest, DISABLED_benchmark_ideal_state_for_many_groups) { + const int n_groups = 150; + Distribution distr(generate_config_with_n_1node_groups(n_groups)); + ClusterState state(generate_state_with_n_nodes_up(n_groups)); + + std::vector<uint16_t> actual; + uint32_t bucket = 0; + auto min_time = vespalib::BenchmarkTimer::benchmark([&]{ + distr.getIdealNodes(NodeType::STORAGE, state, document::BucketId(16, (bucket++ & 0xffffU)), actual); + }, 5.0); + fprintf(stderr, "%.10f seconds\n", min_time); +} + } diff --git a/vdslib/src/vespa/vdslib/distribution/distribution.cpp b/vdslib/src/vespa/vdslib/distribution/distribution.cpp index 0abb99e17d2..4c74923c58b 100644 --- a/vdslib/src/vespa/vdslib/distribution/distribution.cpp +++ b/vdslib/src/vespa/vdslib/distribution/distribution.cpp @@ -351,6 +351,7 @@ namespace { const Group* _group; double _score; + ScoredGroup() : _group(nullptr), _score(0) {} ScoredGroup(const Group* group, double score) : _group(group), _score(score) {} @@ -430,40 +431,36 @@ Distribution::getIdealGroups(const document::BucketId& bucket, std::vector<ResultGroup>& results) const { if (parent.isLeafGroup()) { - results.push_back(ResultGroup(parent, redundancy)); + results.emplace_back(parent, redundancy); return; } - const Group::Distribution& redundancyArray( - parent.getDistribution(redundancy)); - std::vector<ScoredGroup> tmpResults(redundancyArray.size(), - ScoredGroup(0, 0)); - uint32_t seed(getGroupSeed(bucket, clusterState, parent)); + const Group::Distribution& redundancyArray = parent.getDistribution(redundancy); + uint32_t seed = getGroupSeed(bucket, clusterState, parent); RandomGen random(seed); uint32_t currentIndex = 0; - const std::map<uint16_t, Group*>& subGroups(parent.getSubGroups()); - for (std::map<uint16_t, Group*>::const_iterator it = subGroups.begin(); - it != subGroups.end(); ++it) - { - while (it->first < currentIndex++) random.nextDouble(); - double score = random.nextDouble(); - if (it->second->getCapacity() != 1) { - // Capacity shouldn't possibly be 0. - // Verified in Group::setCapacity() - score = std::pow(score, 1.0 / it->second->getCapacity().getValue()); + const auto& subGroups = parent.getSubGroups(); + std::vector<ScoredGroup> tmpResults; + tmpResults.reserve(subGroups.size()); + for (const auto& g : subGroups) { + while (g.first < currentIndex++) { + random.nextDouble(); } - if (score > tmpResults.back()._score) { - tmpResults.push_back(ScoredGroup(it->second, score)); - std::sort(tmpResults.begin(), tmpResults.end()); - tmpResults.pop_back(); + double score = random.nextDouble(); + if (g.second->getCapacity() != 1) { + // Capacity shouldn't possibly be 0. + // Verified in Group::setCapacity() + score = std::pow(score, 1.0 / g.second->getCapacity().getValue()); } + tmpResults.emplace_back(g.second, score); } - while (tmpResults.back()._group == nullptr) { - tmpResults.pop_back(); + std::sort(tmpResults.begin(), tmpResults.end()); + if (tmpResults.size() > redundancyArray.size()) { + tmpResults.resize(redundancyArray.size()); } for (uint32_t i=0, n=tmpResults.size(); i<n; ++i) { ScoredGroup& group(tmpResults[i]); - // This should never happen. Config should verify that each group - // has enough groups beneath them. + // This should never happen. Config should verify that each group + // has enough groups beneath them. assert(group._group != nullptr); getIdealGroups(bucket, clusterState, *group._group, redundancyArray[i], results); |