diff options
Diffstat (limited to 'vdslib')
7 files changed, 183 insertions, 91 deletions
diff --git a/vdslib/src/tests/distribution/distributiontest.cpp b/vdslib/src/tests/distribution/distributiontest.cpp index f0b48faebef..5a337433bdb 100644 --- a/vdslib/src/tests/distribution/distributiontest.cpp +++ b/vdslib/src/tests/distribution/distributiontest.cpp @@ -13,10 +13,14 @@ #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/stllike/lexical_cast.h> #include <vespa/vespalib/text/stringtokenizer.h> +#include <vespa/vespalib/util/benchmark_timer.h> +#include <gmock/gmock.h> #include <chrono> #include <thread> #include <fstream> +using namespace ::testing; + namespace storage::lib { template <typename T> @@ -1143,4 +1147,106 @@ TEST(DistributionTest, test_hierarchical_distribute_less_than_redundancy) } } +TEST(DistributionTest, wildcard_top_level_distribution_gives_expected_node_results) { + std::string raw_config = R"(redundancy 2 +initial_redundancy 2 +ensure_primary_persisted true +ready_copies 2 +active_per_leaf_group false +distributor_auto_ownership_transfer_on_whole_group_down true +group[0].index "invalid" +group[0].name "invalid" +group[0].capacity 5 +group[0].partitions "*" +group[1].index "0" +group[1].name "switch0" +group[1].capacity 3 +group[1].partitions "" +group[1].nodes[0].index 0 +group[1].nodes[0].retired false +group[1].nodes[1].index 1 +group[1].nodes[1].retired false +group[1].nodes[2].index 2 +group[1].nodes[2].retired false +group[2].index "1" +group[2].name "switch1" +group[2].capacity 2 +group[2].partitions "" +group[2].nodes[0].index 3 +group[2].nodes[0].retired false +group[2].nodes[1].index 4 +group[2].nodes[1].retired false +disk_distribution "MODULO_BID" +)"; + Distribution distr(raw_config); + ClusterState state("version:1 distributor:5 storage:5"); + + auto nodes_of = [&](uint32_t bucket){ + std::vector<uint16_t> actual; + distr.getIdealNodes(NodeType::STORAGE, state, document::BucketId(16, bucket), actual); + return actual; + }; + + EXPECT_THAT(nodes_of(1), ElementsAre(0, 2)); + EXPECT_THAT(nodes_of(2), ElementsAre(2, 0)); + EXPECT_THAT(nodes_of(3), ElementsAre(4, 3)); + EXPECT_THAT(nodes_of(4), ElementsAre(3, 4)); + EXPECT_THAT(nodes_of(5), ElementsAre(4, 3)); + EXPECT_THAT(nodes_of(6), ElementsAre(1, 0)); + EXPECT_THAT(nodes_of(7), ElementsAre(2, 0)); +} + +namespace { + +std::string generate_config_with_n_1node_groups(int n_groups) { + std::ostringstream config_os; + std::ostringstream partition_os; + for (int i = 0; i < n_groups - 1; ++i) { + partition_os << "1|"; + } + partition_os << '*'; + config_os << "redundancy " << n_groups << "\n" + << "initial_redundancy " << n_groups << "\n" + << "ensure_primary_persisted true\n" + << "ready_copies " << n_groups << "\n" + << "active_per_leaf_group true\n" + << "distributor_auto_ownership_transfer_on_whole_group_down true\n" + << "group[0].index \"invalid\"\n" + << "group[0].name \"invalid\"\n" + << "group[0].capacity " << n_groups << "\n" + << "group[0].partitions \"" << partition_os.str() << "\"\n"; + + for (int i = 0; i < n_groups; ++i) { + int g = i + 1; + config_os << "group[" << g << "].index \"" << i << "\"\n" + << "group[" << g << "].name \"group" << g << "\"\n" + << "group[" << g << "].capacity 1\n" + << "group[" << g << "].partitions \"\"\n" + << "group[" << g << "].nodes[0].index \"" << i << "\"\n" + << "group[" << g << "].nodes[0].retired false\n"; + } + return config_os.str(); +} + +std::string generate_state_with_n_nodes_up(int n_nodes) { + std::ostringstream state_os; + state_os << "version:1 bits:8 distributor:" << n_nodes << " storage:" << n_nodes; + return state_os.str(); +} + +} + +TEST(DistributionTest, DISABLED_benchmark_ideal_state_for_many_groups) { + const int n_groups = 150; + Distribution distr(generate_config_with_n_1node_groups(n_groups)); + ClusterState state(generate_state_with_n_nodes_up(n_groups)); + + std::vector<uint16_t> actual; + uint32_t bucket = 0; + auto min_time = vespalib::BenchmarkTimer::benchmark([&]{ + distr.getIdealNodes(NodeType::STORAGE, state, document::BucketId(16, (bucket++ & 0xffffU)), actual); + }, 5.0); + fprintf(stderr, "%.10f seconds\n", min_time); +} + } diff --git a/vdslib/src/vespa/vdslib/distribution/distribution.cpp b/vdslib/src/vespa/vdslib/distribution/distribution.cpp index 52d523071e6..4c74923c58b 100644 --- a/vdslib/src/vespa/vdslib/distribution/distribution.cpp +++ b/vdslib/src/vespa/vdslib/distribution/distribution.cpp @@ -39,6 +39,7 @@ VESPA_IMPLEMENT_EXCEPTION(TooFewBucketBitsInUseException, vespalib::Exception); Distribution::Distribution() : _distributionBitMasks(getDistributionBitMasks()), _nodeGraph(), + _node2Group(), _redundancy(), _initialRedundancy(0), _ensurePrimaryPersisted(true), @@ -55,6 +56,7 @@ Distribution::Distribution() Distribution::Distribution(const Distribution& d) : _distributionBitMasks(getDistributionBitMasks()), _nodeGraph(), + _node2Group(), _redundancy(), _initialRedundancy(0), _ensurePrimaryPersisted(true), @@ -69,7 +71,7 @@ Distribution::Distribution(const Distribution& d) Distribution::ConfigWrapper::ConfigWrapper(std::unique_ptr<DistributionConfig> cfg) : _cfg(std::move(cfg)) { } -Distribution::ConfigWrapper::~ConfigWrapper() { } +Distribution::ConfigWrapper::~ConfigWrapper() = default; Distribution::Distribution(const ConfigWrapper & config) : Distribution(config.get()) @@ -78,6 +80,7 @@ Distribution::Distribution(const ConfigWrapper & config) : Distribution::Distribution(const vespa::config::content::StorDistributionConfig & config) : _distributionBitMasks(getDistributionBitMasks()), _nodeGraph(), + _node2Group(), _redundancy(), _initialRedundancy(0), _ensurePrimaryPersisted(true), @@ -93,6 +96,7 @@ Distribution::Distribution(const vespa::config::content::StorDistributionConfig Distribution::Distribution(const vespalib::string& serialized) : _distributionBitMasks(getDistributionBitMasks()), _nodeGraph(), + _node2Group(), _redundancy(), _initialRedundancy(0), _ensurePrimaryPersisted(true), @@ -113,7 +117,7 @@ Distribution::operator=(const Distribution& d) return *this; } -Distribution::~Distribution() { } +Distribution::~Distribution() = default; namespace { using ConfigDiskDistribution = vespa::config::content::StorDistributionConfig::DiskDistribution; @@ -142,34 +146,35 @@ Distribution::configure(const vespa::config::content::StorDistributionConfig& co { typedef vespa::config::content::StorDistributionConfig::Group ConfigGroup; std::unique_ptr<Group> nodeGraph; + std::vector<const Group *> node2Group; for (uint32_t i=0, n=config.group.size(); i<n; ++i) { const ConfigGroup& cg(config.group[i]); std::vector<uint16_t> path; - if (nodeGraph.get() != nullptr) { + if (nodeGraph) { path = DistributionConfigUtil::getGroupPath(cg.index); } bool isLeafGroup = (cg.nodes.size() > 0); - std::unique_ptr<Group> group; uint16_t index = (path.empty() ? 0 : path.back()); - if (isLeafGroup) { - group.reset(new Group(index, cg.name)); - } else { - group.reset(new Group( - index, cg.name, - Group::Distribution(cg.partitions), config.redundancy)); - } + std::unique_ptr<Group> group = (isLeafGroup) + ? std::make_unique<Group>(index, cg.name) + : std::make_unique<Group>(index, cg.name, Group::Distribution(cg.partitions), config.redundancy); group->setCapacity(cg.capacity); if (isLeafGroup) { std::vector<uint16_t> nodes(cg.nodes.size()); for (uint32_t j=0, m=nodes.size(); j<m; ++j) { - nodes[j] = cg.nodes[j].index; + uint16_t nodeIndex = cg.nodes[j].index; + nodes[j] = nodeIndex; + if (node2Group.size() <= nodeIndex) { + node2Group.resize(nodeIndex + 1); + } + node2Group[nodeIndex] = group.get(); } group->setNodes(nodes); } if (path.empty()) { nodeGraph = std::move(group); } else { - assert(nodeGraph.get() != nullptr); + assert(nodeGraph); Group* parent = nodeGraph.get(); for (uint32_t j=0; j<path.size() - 1; ++j) { parent = parent->getSubGroups()[path[j]]; @@ -177,7 +182,7 @@ Distribution::configure(const vespa::config::content::StorDistributionConfig& co parent->addSubGroup(std::move(group)); } } - if (nodeGraph.get() == nullptr) { + if ( ! nodeGraph) { throw vespalib::IllegalStateException( "Got config that didn't seem to specify even a root group. Must " "have a root group at minimum:\n" @@ -185,6 +190,7 @@ Distribution::configure(const vespa::config::content::StorDistributionConfig& co } nodeGraph->calculateDistributionHashValues(); _nodeGraph = std::move(nodeGraph); + _node2Group = std::move(node2Group); _redundancy = config.redundancy; _initialRedundancy = config.initialRedundancy; _ensurePrimaryPersisted = config.ensurePrimaryPersisted; @@ -345,6 +351,7 @@ namespace { const Group* _group; double _score; + ScoredGroup() : _group(nullptr), _score(0) {} ScoredGroup(const Group* group, double score) : _group(group), _score(score) {} @@ -424,40 +431,36 @@ Distribution::getIdealGroups(const document::BucketId& bucket, std::vector<ResultGroup>& results) const { if (parent.isLeafGroup()) { - results.push_back(ResultGroup(parent, redundancy)); + results.emplace_back(parent, redundancy); return; } - const Group::Distribution& redundancyArray( - parent.getDistribution(redundancy)); - std::vector<ScoredGroup> tmpResults(redundancyArray.size(), - ScoredGroup(0, 0)); - uint32_t seed(getGroupSeed(bucket, clusterState, parent)); + const Group::Distribution& redundancyArray = parent.getDistribution(redundancy); + uint32_t seed = getGroupSeed(bucket, clusterState, parent); RandomGen random(seed); uint32_t currentIndex = 0; - const std::map<uint16_t, Group*>& subGroups(parent.getSubGroups()); - for (std::map<uint16_t, Group*>::const_iterator it = subGroups.begin(); - it != subGroups.end(); ++it) - { - while (it->first < currentIndex++) random.nextDouble(); - double score = random.nextDouble(); - if (it->second->getCapacity() != 1) { - // Capacity shouldn't possibly be 0. - // Verified in Group::setCapacity() - score = std::pow(score, 1.0 / it->second->getCapacity().getValue()); + const auto& subGroups = parent.getSubGroups(); + std::vector<ScoredGroup> tmpResults; + tmpResults.reserve(subGroups.size()); + for (const auto& g : subGroups) { + while (g.first < currentIndex++) { + random.nextDouble(); } - if (score > tmpResults.back()._score) { - tmpResults.push_back(ScoredGroup(it->second, score)); - std::sort(tmpResults.begin(), tmpResults.end()); - tmpResults.pop_back(); + double score = random.nextDouble(); + if (g.second->getCapacity() != 1) { + // Capacity shouldn't possibly be 0. + // Verified in Group::setCapacity() + score = std::pow(score, 1.0 / g.second->getCapacity().getValue()); } + tmpResults.emplace_back(g.second, score); } - while (tmpResults.back()._group == nullptr) { - tmpResults.pop_back(); + std::sort(tmpResults.begin(), tmpResults.end()); + if (tmpResults.size() > redundancyArray.size()) { + tmpResults.resize(redundancyArray.size()); } for (uint32_t i=0, n=tmpResults.size(); i<n; ++i) { ScoredGroup& group(tmpResults[i]); - // This should never happen. Config should verify that each group - // has enough groups beneath them. + // This should never happen. Config should verify that each group + // has enough groups beneath them. assert(group._group != nullptr); getIdealGroups(bucket, clusterState, *group._group, redundancyArray[i], results); @@ -669,20 +672,19 @@ Distribution::splitNodesIntoLeafGroups(IndexList nodeList) const { std::vector<IndexList> result; std::map<uint16_t, IndexList> nodes; - for (uint32_t i=0, n=nodeList.size(); i<n; ++i) { - const Group* group(_nodeGraph->getGroupForNode(nodeList[i])); + for (auto node : nodeList) { + const Group* group(_node2Group[node]); if (group == nullptr) { LOGBP(warning, "Node %u is not assigned to a group. " - "Should not happen?", nodeList[i]); + "Should not happen?", node); } else { assert(group->isLeafGroup()); - nodes[group->getIndex()].push_back(nodeList[i]); + nodes[group->getIndex()].push_back(node); } } - for (std::map<uint16_t, IndexList>::const_iterator it(nodes.begin()); - it != nodes.end(); ++it) - { - result.push_back(it->second); + result.reserve(nodes.size()); + for (auto & node : nodes) { + result.emplace_back(std::move(node.second)); } return result; } diff --git a/vdslib/src/vespa/vdslib/distribution/distribution.h b/vdslib/src/vespa/vdslib/distribution/distribution.h index 6da60e084bb..9ee505be7cd 100644 --- a/vdslib/src/vespa/vdslib/distribution/distribution.h +++ b/vdslib/src/vespa/vdslib/distribution/distribution.h @@ -33,8 +33,9 @@ public: enum DiskDistribution { MODULO, MODULO_INDEX, MODULO_KNUTH, MODULO_BID }; private: - std::vector<uint32_t> _distributionBitMasks; - std::unique_ptr<Group> _nodeGraph; + std::vector<uint32_t> _distributionBitMasks; + std::unique_ptr<Group> _nodeGraph; + std::vector<const Group *> _node2Group; uint16_t _redundancy; uint16_t _initialRedundancy; uint16_t _readyCopies; diff --git a/vdslib/src/vespa/vdslib/distribution/group.cpp b/vdslib/src/vespa/vdslib/distribution/group.cpp index 91e27715911..45f8f0f8aea 100644 --- a/vdslib/src/vespa/vdslib/distribution/group.cpp +++ b/vdslib/src/vespa/vdslib/distribution/group.cpp @@ -39,11 +39,9 @@ Group::Group(uint16_t index, vespalib::stringref name, Group::~Group() { - for (std::map<uint16_t, Group*>::iterator it = _subGroups.begin(); - it != _subGroups.end(); ++it) - { - delete it->second; - it->second = 0; + for (auto & subGroup : _subGroups) { + delete subGroup.second; + subGroup.second = nullptr; } } @@ -75,8 +73,8 @@ Group::print(std::ostream& out, bool verbose, } if (_distributionSpec.size() == 0) { out << ", nodes( "; - for (uint32_t i = 0; i < _nodes.size(); i++) { - out << _nodes[i] << " "; + for (auto node : _nodes) { + out << node << " "; } out << ")"; } @@ -88,10 +86,9 @@ Group::print(std::ostream& out, bool verbose, out << ") {"; if (_subGroups.size()>0) { - for (std::map<uint16_t, Group*>::const_iterator it = _subGroups.begin(); - it != _subGroups.end(); ++it) { + for (const auto & subGroup : _subGroups) { out << "\n" << indent << " "; - it->second->print(out, verbose, indent + " "); + subGroup.second->print(out, verbose, indent + " "); } } @@ -106,12 +103,11 @@ Group::addSubGroup(Group::UP group) "Cannot add sub groups to a group without a valid distribution", VESPA_STRLOC); } - if (!group.get()) { + if (!group) { throw vespalib::IllegalArgumentException( "Cannot add null group.", VESPA_STRLOC); } - std::map<uint16_t, Group*>::const_iterator it( - _subGroups.find(group->getIndex())); + auto it =_subGroups.find(group->getIndex()); if (it != _subGroups.end()) { throw vespalib::IllegalArgumentException( "Another subgroup with same index is already added.", @@ -148,32 +144,28 @@ Group::setNodes(const std::vector<uint16_t>& nodes) const Group* Group::getGroupForNode(uint16_t nodeIdx) const { - for (uint32_t i = 0; i < _nodes.size(); ++i) { - if (_nodes[i] == nodeIdx) { + for (auto node : _nodes) { + if (node == nodeIdx) { return this; } } - for (std::map<uint16_t, Group*>::const_iterator iter = _subGroups.begin(); - iter != _subGroups.end(); - ++iter) { - const Group* g = iter->second->getGroupForNode(nodeIdx); - if (g != NULL) { + for (const auto & subGroup : _subGroups) { + const Group* g = subGroup.second->getGroupForNode(nodeIdx); + if (g != nullptr) { return g; } } - return NULL; + return nullptr; } void Group::calculateDistributionHashValues(uint32_t parentHash) { _distributionHash = parentHash ^ (1664525L * _index + 1013904223L); - for (std::map<uint16_t, Group*>::iterator it = _subGroups.begin(); - it != _subGroups.end(); ++it) - { - it->second->calculateDistributionHashValues(_distributionHash); + for (const auto & subGroup : _subGroups) { + subGroup.second->calculateDistributionHashValues(_distributionHash); } } diff --git a/vdslib/src/vespa/vdslib/distribution/group.h b/vdslib/src/vespa/vdslib/distribution/group.h index 238a56d0295..d6af005130d 100644 --- a/vdslib/src/vespa/vdslib/distribution/group.h +++ b/vdslib/src/vespa/vdslib/distribution/group.h @@ -19,8 +19,7 @@ namespace vespalib { class asciistream; } -namespace storage { -namespace lib { +namespace storage::lib { class IdealGroup; class SystemState; @@ -101,6 +100,4 @@ public: vespalib::string getDistributionConfigHash() const; }; -} // lib -} // storage - +} diff --git a/vdslib/src/vespa/vdslib/distribution/redundancygroupdistribution.cpp b/vdslib/src/vespa/vdslib/distribution/redundancygroupdistribution.cpp index 99974225e9e..e52ad78c2ba 100644 --- a/vdslib/src/vespa/vdslib/distribution/redundancygroupdistribution.cpp +++ b/vdslib/src/vespa/vdslib/distribution/redundancygroupdistribution.cpp @@ -1,14 +1,12 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vdslib/distribution/redundancygroupdistribution.h> - -#include <algorithm> -#include <boost/lexical_cast.hpp> +#include "redundancygroupdistribution.h" #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/text/stringtokenizer.h> +#include <boost/lexical_cast.hpp> +#include <algorithm> -namespace storage { -namespace lib { +namespace storage::lib { namespace { void verifyLegal(vespalib::StringTokenizer& st, @@ -144,6 +142,4 @@ RedundancyGroupDistribution::divideSpecifiedCopies( return redundancy; } -} // lib -} // storage - +} diff --git a/vdslib/src/vespa/vdslib/distribution/redundancygroupdistribution.h b/vdslib/src/vespa/vdslib/distribution/redundancygroupdistribution.h index 33f895cadf0..73013b6ce81 100644 --- a/vdslib/src/vespa/vdslib/distribution/redundancygroupdistribution.h +++ b/vdslib/src/vespa/vdslib/distribution/redundancygroupdistribution.h @@ -11,8 +11,7 @@ #include <vector> #include <vespa/vespalib/stllike/string.h> -namespace storage { -namespace lib { +namespace storage::lib { class RedundancyGroupDistribution : public document::Printable { std::vector<uint16_t> _values; @@ -47,5 +46,4 @@ private: uint16_t redundancy, const std::vector<uint16_t>& maxValues); }; -} // lib -} // storage +} |