summaryrefslogtreecommitdiffstats
path: root/vdslib
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-06-25 15:12:39 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-06-25 15:49:09 +0000
commit24d0b26fc28276aa5bf1ee4545eb02d2bf6d77ca (patch)
tree58aa216bafa4605833fff09eac63d22c1588d89a /vdslib
parent0e441d4aa6ee951367dfbb9aa684b4b556039a13 (diff)
Only sort once during ideal group calculations
Avoids invoking `std::sort` O(n) times in favor of just once. Benchmark for 150 groups of 1 node each: * Before: 0.0004381478 seconds per invocation * After: 0.0000377917 seconds per invocation
Diffstat (limited to 'vdslib')
-rw-r--r--vdslib/src/tests/distribution/distributiontest.cpp54
-rw-r--r--vdslib/src/vespa/vdslib/distribution/distribution.cpp45
2 files changed, 75 insertions, 24 deletions
diff --git a/vdslib/src/tests/distribution/distributiontest.cpp b/vdslib/src/tests/distribution/distributiontest.cpp
index f0b48faebef..387146accfc 100644
--- a/vdslib/src/tests/distribution/distributiontest.cpp
+++ b/vdslib/src/tests/distribution/distributiontest.cpp
@@ -13,6 +13,7 @@
#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/stllike/lexical_cast.h>
#include <vespa/vespalib/text/stringtokenizer.h>
+#include <vespa/vespalib/util/benchmark_timer.h>
#include <chrono>
#include <thread>
#include <fstream>
@@ -1143,4 +1144,57 @@ TEST(DistributionTest, test_hierarchical_distribute_less_than_redundancy)
}
}
+namespace {
+
+std::string generate_config_with_n_1node_groups(int n_groups) {
+ std::ostringstream config_os;
+ std::ostringstream partition_os;
+ for (int i = 0; i < n_groups - 1; ++i) {
+ partition_os << "1|";
+ }
+ partition_os << '*';
+ config_os << "redundancy " << n_groups << "\n"
+ << "initial_redundancy " << n_groups << "\n"
+ << "ensure_primary_persisted true\n"
+ << "ready_copies " << n_groups << "\n"
+ << "active_per_leaf_group true\n"
+ << "distributor_auto_ownership_transfer_on_whole_group_down true\n"
+ << "group[0].index \"invalid\"\n"
+ << "group[0].name \"invalid\"\n"
+ << "group[0].capacity " << n_groups << "\n"
+ << "group[0].partitions \"" << partition_os.str() << "\"\n";
+
+ for (int i = 0; i < n_groups; ++i) {
+ int g = i + 1;
+ config_os << "group[" << g << "].index \"" << i << "\"\n"
+ << "group[" << g << "].name \"group" << g << "\"\n"
+ << "group[" << g << "].capacity 1\n"
+ << "group[" << g << "].partitions \"\"\n"
+ << "group[" << g << "].nodes[0].index \"" << i << "\"\n"
+ << "group[" << g << "].nodes[0].retired false\n";
+ }
+ return config_os.str();
+}
+
+std::string generate_state_with_n_nodes_up(int n_nodes) {
+ std::ostringstream state_os;
+ state_os << "version:1 bits:8 distributor:" << n_nodes << " storage:" << n_nodes;
+ return state_os.str();
+}
+
+}
+
+TEST(DistributionTest, DISABLED_benchmark_ideal_state_for_many_groups) {
+ const int n_groups = 150;
+ Distribution distr(generate_config_with_n_1node_groups(n_groups));
+ ClusterState state(generate_state_with_n_nodes_up(n_groups));
+
+ std::vector<uint16_t> actual;
+ uint32_t bucket = 0;
+ auto min_time = vespalib::BenchmarkTimer::benchmark([&]{
+ distr.getIdealNodes(NodeType::STORAGE, state, document::BucketId(16, (bucket++ & 0xffffU)), actual);
+ }, 5.0);
+ fprintf(stderr, "%.10f seconds\n", min_time);
+}
+
}
diff --git a/vdslib/src/vespa/vdslib/distribution/distribution.cpp b/vdslib/src/vespa/vdslib/distribution/distribution.cpp
index 52d523071e6..3bdee0447c0 100644
--- a/vdslib/src/vespa/vdslib/distribution/distribution.cpp
+++ b/vdslib/src/vespa/vdslib/distribution/distribution.cpp
@@ -345,6 +345,7 @@ namespace {
const Group* _group;
double _score;
+ ScoredGroup() : _group(nullptr), _score(0) {}
ScoredGroup(const Group* group, double score)
: _group(group), _score(score) {}
@@ -424,40 +425,36 @@ Distribution::getIdealGroups(const document::BucketId& bucket,
std::vector<ResultGroup>& results) const
{
if (parent.isLeafGroup()) {
- results.push_back(ResultGroup(parent, redundancy));
+ results.emplace_back(parent, redundancy);
return;
}
- const Group::Distribution& redundancyArray(
- parent.getDistribution(redundancy));
- std::vector<ScoredGroup> tmpResults(redundancyArray.size(),
- ScoredGroup(0, 0));
- uint32_t seed(getGroupSeed(bucket, clusterState, parent));
+ const Group::Distribution& redundancyArray = parent.getDistribution(redundancy);
+ std::vector<ScoredGroup> tmpResults;
+ tmpResults.reserve(redundancyArray.size());
+ uint32_t seed = getGroupSeed(bucket, clusterState, parent);
RandomGen random(seed);
uint32_t currentIndex = 0;
- const std::map<uint16_t, Group*>& subGroups(parent.getSubGroups());
- for (std::map<uint16_t, Group*>::const_iterator it = subGroups.begin();
- it != subGroups.end(); ++it)
- {
- while (it->first < currentIndex++) random.nextDouble();
- double score = random.nextDouble();
- if (it->second->getCapacity() != 1) {
- // Capacity shouldn't possibly be 0.
- // Verified in Group::setCapacity()
- score = std::pow(score, 1.0 / it->second->getCapacity().getValue());
+ const auto& subGroups = parent.getSubGroups();
+ for (const auto& g : subGroups) {
+ while (g.first < currentIndex++) {
+ random.nextDouble();
}
- if (score > tmpResults.back()._score) {
- tmpResults.push_back(ScoredGroup(it->second, score));
- std::sort(tmpResults.begin(), tmpResults.end());
- tmpResults.pop_back();
+ double score = random.nextDouble();
+ if (g.second->getCapacity() != 1) {
+ // Capacity shouldn't possibly be 0.
+ // Verified in Group::setCapacity()
+ score = std::pow(score, 1.0 / g.second->getCapacity().getValue());
}
+ tmpResults.emplace_back(g.second, score);
}
- while (tmpResults.back()._group == nullptr) {
- tmpResults.pop_back();
+ std::sort(tmpResults.begin(), tmpResults.end());
+ if (tmpResults.size() > redundancy) {
+ tmpResults.resize(redundancy);
}
for (uint32_t i=0, n=tmpResults.size(); i<n; ++i) {
ScoredGroup& group(tmpResults[i]);
- // This should never happen. Config should verify that each group
- // has enough groups beneath them.
+ // This should never happen. Config should verify that each group
+ // has enough groups beneath them.
assert(group._group != nullptr);
getIdealGroups(bucket, clusterState, *group._group,
redundancyArray[i], results);