summaryrefslogtreecommitdiffstats
path: root/vdslib
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-06-25 15:12:39 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-06-29 11:14:25 +0000
commitbfd64c5fd361d8d6a8bc6c217073f7786a6e045f (patch)
tree191c14a5c2745e44fb3421c82b27678817a264cb /vdslib
parente8eb7c3df52e9e3c8068d5f2932b6d255ba90868 (diff)
Only sort once during ideal group calculations
Avoids invoking `std::sort` O(n) times in favor of just once. Benchmark for 150 groups of 1 node each: * Before: 0.0004381478 seconds per invocation * After: 0.0000377917 seconds per invocation
Diffstat (limited to 'vdslib')
-rw-r--r--vdslib/src/tests/distribution/distributiontest.cpp106
-rw-r--r--vdslib/src/vespa/vdslib/distribution/distribution.cpp45
2 files changed, 127 insertions, 24 deletions
diff --git a/vdslib/src/tests/distribution/distributiontest.cpp b/vdslib/src/tests/distribution/distributiontest.cpp
index f0b48faebef..5a337433bdb 100644
--- a/vdslib/src/tests/distribution/distributiontest.cpp
+++ b/vdslib/src/tests/distribution/distributiontest.cpp
@@ -13,10 +13,14 @@
#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/stllike/lexical_cast.h>
#include <vespa/vespalib/text/stringtokenizer.h>
+#include <vespa/vespalib/util/benchmark_timer.h>
+#include <gmock/gmock.h>
#include <chrono>
#include <thread>
#include <fstream>
+using namespace ::testing;
+
namespace storage::lib {
template <typename T>
@@ -1143,4 +1147,106 @@ TEST(DistributionTest, test_hierarchical_distribute_less_than_redundancy)
}
}
+TEST(DistributionTest, wildcard_top_level_distribution_gives_expected_node_results) {
+ std::string raw_config = R"(redundancy 2
+initial_redundancy 2
+ensure_primary_persisted true
+ready_copies 2
+active_per_leaf_group false
+distributor_auto_ownership_transfer_on_whole_group_down true
+group[0].index "invalid"
+group[0].name "invalid"
+group[0].capacity 5
+group[0].partitions "*"
+group[1].index "0"
+group[1].name "switch0"
+group[1].capacity 3
+group[1].partitions ""
+group[1].nodes[0].index 0
+group[1].nodes[0].retired false
+group[1].nodes[1].index 1
+group[1].nodes[1].retired false
+group[1].nodes[2].index 2
+group[1].nodes[2].retired false
+group[2].index "1"
+group[2].name "switch1"
+group[2].capacity 2
+group[2].partitions ""
+group[2].nodes[0].index 3
+group[2].nodes[0].retired false
+group[2].nodes[1].index 4
+group[2].nodes[1].retired false
+disk_distribution "MODULO_BID"
+)";
+ Distribution distr(raw_config);
+ ClusterState state("version:1 distributor:5 storage:5");
+
+ auto nodes_of = [&](uint32_t bucket){
+ std::vector<uint16_t> actual;
+ distr.getIdealNodes(NodeType::STORAGE, state, document::BucketId(16, bucket), actual);
+ return actual;
+ };
+
+ EXPECT_THAT(nodes_of(1), ElementsAre(0, 2));
+ EXPECT_THAT(nodes_of(2), ElementsAre(2, 0));
+ EXPECT_THAT(nodes_of(3), ElementsAre(4, 3));
+ EXPECT_THAT(nodes_of(4), ElementsAre(3, 4));
+ EXPECT_THAT(nodes_of(5), ElementsAre(4, 3));
+ EXPECT_THAT(nodes_of(6), ElementsAre(1, 0));
+ EXPECT_THAT(nodes_of(7), ElementsAre(2, 0));
+}
+
+namespace {
+
+std::string generate_config_with_n_1node_groups(int n_groups) {
+ std::ostringstream config_os;
+ std::ostringstream partition_os;
+ for (int i = 0; i < n_groups - 1; ++i) {
+ partition_os << "1|";
+ }
+ partition_os << '*';
+ config_os << "redundancy " << n_groups << "\n"
+ << "initial_redundancy " << n_groups << "\n"
+ << "ensure_primary_persisted true\n"
+ << "ready_copies " << n_groups << "\n"
+ << "active_per_leaf_group true\n"
+ << "distributor_auto_ownership_transfer_on_whole_group_down true\n"
+ << "group[0].index \"invalid\"\n"
+ << "group[0].name \"invalid\"\n"
+ << "group[0].capacity " << n_groups << "\n"
+ << "group[0].partitions \"" << partition_os.str() << "\"\n";
+
+ for (int i = 0; i < n_groups; ++i) {
+ int g = i + 1;
+ config_os << "group[" << g << "].index \"" << i << "\"\n"
+ << "group[" << g << "].name \"group" << g << "\"\n"
+ << "group[" << g << "].capacity 1\n"
+ << "group[" << g << "].partitions \"\"\n"
+ << "group[" << g << "].nodes[0].index \"" << i << "\"\n"
+ << "group[" << g << "].nodes[0].retired false\n";
+ }
+ return config_os.str();
+}
+
+std::string generate_state_with_n_nodes_up(int n_nodes) {
+ std::ostringstream state_os;
+ state_os << "version:1 bits:8 distributor:" << n_nodes << " storage:" << n_nodes;
+ return state_os.str();
+}
+
+}
+
+TEST(DistributionTest, DISABLED_benchmark_ideal_state_for_many_groups) {
+ const int n_groups = 150;
+ Distribution distr(generate_config_with_n_1node_groups(n_groups));
+ ClusterState state(generate_state_with_n_nodes_up(n_groups));
+
+ std::vector<uint16_t> actual;
+ uint32_t bucket = 0;
+ auto min_time = vespalib::BenchmarkTimer::benchmark([&]{
+ distr.getIdealNodes(NodeType::STORAGE, state, document::BucketId(16, (bucket++ & 0xffffU)), actual);
+ }, 5.0);
+ fprintf(stderr, "%.10f seconds\n", min_time);
+}
+
}
diff --git a/vdslib/src/vespa/vdslib/distribution/distribution.cpp b/vdslib/src/vespa/vdslib/distribution/distribution.cpp
index 0abb99e17d2..4c74923c58b 100644
--- a/vdslib/src/vespa/vdslib/distribution/distribution.cpp
+++ b/vdslib/src/vespa/vdslib/distribution/distribution.cpp
@@ -351,6 +351,7 @@ namespace {
const Group* _group;
double _score;
+ ScoredGroup() : _group(nullptr), _score(0) {}
ScoredGroup(const Group* group, double score)
: _group(group), _score(score) {}
@@ -430,40 +431,36 @@ Distribution::getIdealGroups(const document::BucketId& bucket,
std::vector<ResultGroup>& results) const
{
if (parent.isLeafGroup()) {
- results.push_back(ResultGroup(parent, redundancy));
+ results.emplace_back(parent, redundancy);
return;
}
- const Group::Distribution& redundancyArray(
- parent.getDistribution(redundancy));
- std::vector<ScoredGroup> tmpResults(redundancyArray.size(),
- ScoredGroup(0, 0));
- uint32_t seed(getGroupSeed(bucket, clusterState, parent));
+ const Group::Distribution& redundancyArray = parent.getDistribution(redundancy);
+ uint32_t seed = getGroupSeed(bucket, clusterState, parent);
RandomGen random(seed);
uint32_t currentIndex = 0;
- const std::map<uint16_t, Group*>& subGroups(parent.getSubGroups());
- for (std::map<uint16_t, Group*>::const_iterator it = subGroups.begin();
- it != subGroups.end(); ++it)
- {
- while (it->first < currentIndex++) random.nextDouble();
- double score = random.nextDouble();
- if (it->second->getCapacity() != 1) {
- // Capacity shouldn't possibly be 0.
- // Verified in Group::setCapacity()
- score = std::pow(score, 1.0 / it->second->getCapacity().getValue());
+ const auto& subGroups = parent.getSubGroups();
+ std::vector<ScoredGroup> tmpResults;
+ tmpResults.reserve(subGroups.size());
+ for (const auto& g : subGroups) {
+ while (g.first < currentIndex++) {
+ random.nextDouble();
}
- if (score > tmpResults.back()._score) {
- tmpResults.push_back(ScoredGroup(it->second, score));
- std::sort(tmpResults.begin(), tmpResults.end());
- tmpResults.pop_back();
+ double score = random.nextDouble();
+ if (g.second->getCapacity() != 1) {
+ // Capacity shouldn't possibly be 0.
+ // Verified in Group::setCapacity()
+ score = std::pow(score, 1.0 / g.second->getCapacity().getValue());
}
+ tmpResults.emplace_back(g.second, score);
}
- while (tmpResults.back()._group == nullptr) {
- tmpResults.pop_back();
+ std::sort(tmpResults.begin(), tmpResults.end());
+ if (tmpResults.size() > redundancyArray.size()) {
+ tmpResults.resize(redundancyArray.size());
}
for (uint32_t i=0, n=tmpResults.size(); i<n; ++i) {
ScoredGroup& group(tmpResults[i]);
- // This should never happen. Config should verify that each group
- // has enough groups beneath them.
+ // This should never happen. Config should verify that each group
+ // has enough groups beneath them.
assert(group._group != nullptr);
getIdealGroups(bucket, clusterState, *group._group,
redundancyArray[i], results);