diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2020-06-25 15:12:39 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2020-06-25 15:49:09 +0000 |
commit | 24d0b26fc28276aa5bf1ee4545eb02d2bf6d77ca (patch) | |
tree | 58aa216bafa4605833fff09eac63d22c1588d89a /vdslib | |
parent | 0e441d4aa6ee951367dfbb9aa684b4b556039a13 (diff) |
Only sort once during ideal group calculations
Avoids invoking `std::sort` O(n) times in favor of just once.
Benchmark for 150 groups of 1 node each:
* Before: 0.0004381478 seconds per invocation
* After: 0.0000377917 seconds per invocation
Diffstat (limited to 'vdslib')
-rw-r--r-- | vdslib/src/tests/distribution/distributiontest.cpp | 54 | ||||
-rw-r--r-- | vdslib/src/vespa/vdslib/distribution/distribution.cpp | 45 |
2 files changed, 75 insertions, 24 deletions
diff --git a/vdslib/src/tests/distribution/distributiontest.cpp b/vdslib/src/tests/distribution/distributiontest.cpp index f0b48faebef..387146accfc 100644 --- a/vdslib/src/tests/distribution/distributiontest.cpp +++ b/vdslib/src/tests/distribution/distributiontest.cpp @@ -13,6 +13,7 @@ #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/stllike/lexical_cast.h> #include <vespa/vespalib/text/stringtokenizer.h> +#include <vespa/vespalib/util/benchmark_timer.h> #include <chrono> #include <thread> #include <fstream> @@ -1143,4 +1144,57 @@ TEST(DistributionTest, test_hierarchical_distribute_less_than_redundancy) } } +namespace { + +std::string generate_config_with_n_1node_groups(int n_groups) { + std::ostringstream config_os; + std::ostringstream partition_os; + for (int i = 0; i < n_groups - 1; ++i) { + partition_os << "1|"; + } + partition_os << '*'; + config_os << "redundancy " << n_groups << "\n" + << "initial_redundancy " << n_groups << "\n" + << "ensure_primary_persisted true\n" + << "ready_copies " << n_groups << "\n" + << "active_per_leaf_group true\n" + << "distributor_auto_ownership_transfer_on_whole_group_down true\n" + << "group[0].index \"invalid\"\n" + << "group[0].name \"invalid\"\n" + << "group[0].capacity " << n_groups << "\n" + << "group[0].partitions \"" << partition_os.str() << "\"\n"; + + for (int i = 0; i < n_groups; ++i) { + int g = i + 1; + config_os << "group[" << g << "].index \"" << i << "\"\n" + << "group[" << g << "].name \"group" << g << "\"\n" + << "group[" << g << "].capacity 1\n" + << "group[" << g << "].partitions \"\"\n" + << "group[" << g << "].nodes[0].index \"" << i << "\"\n" + << "group[" << g << "].nodes[0].retired false\n"; + } + return config_os.str(); +} + +std::string generate_state_with_n_nodes_up(int n_nodes) { + std::ostringstream state_os; + state_os << "version:1 bits:8 distributor:" << n_nodes << " storage:" << n_nodes; + return state_os.str(); +} + +} + +TEST(DistributionTest, DISABLED_benchmark_ideal_state_for_many_groups) { + const int n_groups = 150; + Distribution distr(generate_config_with_n_1node_groups(n_groups)); + ClusterState state(generate_state_with_n_nodes_up(n_groups)); + + std::vector<uint16_t> actual; + uint32_t bucket = 0; + auto min_time = vespalib::BenchmarkTimer::benchmark([&]{ + distr.getIdealNodes(NodeType::STORAGE, state, document::BucketId(16, (bucket++ & 0xffffU)), actual); + }, 5.0); + fprintf(stderr, "%.10f seconds\n", min_time); +} + } diff --git a/vdslib/src/vespa/vdslib/distribution/distribution.cpp b/vdslib/src/vespa/vdslib/distribution/distribution.cpp index 52d523071e6..3bdee0447c0 100644 --- a/vdslib/src/vespa/vdslib/distribution/distribution.cpp +++ b/vdslib/src/vespa/vdslib/distribution/distribution.cpp @@ -345,6 +345,7 @@ namespace { const Group* _group; double _score; + ScoredGroup() : _group(nullptr), _score(0) {} ScoredGroup(const Group* group, double score) : _group(group), _score(score) {} @@ -424,40 +425,36 @@ Distribution::getIdealGroups(const document::BucketId& bucket, std::vector<ResultGroup>& results) const { if (parent.isLeafGroup()) { - results.push_back(ResultGroup(parent, redundancy)); + results.emplace_back(parent, redundancy); return; } - const Group::Distribution& redundancyArray( - parent.getDistribution(redundancy)); - std::vector<ScoredGroup> tmpResults(redundancyArray.size(), - ScoredGroup(0, 0)); - uint32_t seed(getGroupSeed(bucket, clusterState, parent)); + const Group::Distribution& redundancyArray = parent.getDistribution(redundancy); + std::vector<ScoredGroup> tmpResults; + tmpResults.reserve(redundancyArray.size()); + uint32_t seed = getGroupSeed(bucket, clusterState, parent); RandomGen random(seed); uint32_t currentIndex = 0; - const std::map<uint16_t, Group*>& subGroups(parent.getSubGroups()); - for (std::map<uint16_t, Group*>::const_iterator it = subGroups.begin(); - it != subGroups.end(); ++it) - { - while (it->first < currentIndex++) random.nextDouble(); - double score = random.nextDouble(); - if (it->second->getCapacity() != 1) { - // Capacity shouldn't possibly be 0. - // Verified in Group::setCapacity() - score = std::pow(score, 1.0 / it->second->getCapacity().getValue()); + const auto& subGroups = parent.getSubGroups(); + for (const auto& g : subGroups) { + while (g.first < currentIndex++) { + random.nextDouble(); } - if (score > tmpResults.back()._score) { - tmpResults.push_back(ScoredGroup(it->second, score)); - std::sort(tmpResults.begin(), tmpResults.end()); - tmpResults.pop_back(); + double score = random.nextDouble(); + if (g.second->getCapacity() != 1) { + // Capacity shouldn't possibly be 0. + // Verified in Group::setCapacity() + score = std::pow(score, 1.0 / g.second->getCapacity().getValue()); } + tmpResults.emplace_back(g.second, score); } - while (tmpResults.back()._group == nullptr) { - tmpResults.pop_back(); + std::sort(tmpResults.begin(), tmpResults.end()); + if (tmpResults.size() > redundancy) { + tmpResults.resize(redundancy); } for (uint32_t i=0, n=tmpResults.size(); i<n; ++i) { ScoredGroup& group(tmpResults[i]); - // This should never happen. Config should verify that each group - // has enough groups beneath them. + // This should never happen. Config should verify that each group + // has enough groups beneath them. assert(group._group != nullptr); getIdealGroups(bucket, clusterState, *group._group, redundancyArray[i], results); |