diff options
author | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2017-05-29 10:46:26 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-29 10:46:26 +0200 |
commit | 61e613620a5934273cd069b4d46635757feba8bc (patch) | |
tree | a6536b623bdfab23ab8a449b217888225a3b24b4 /storage | |
parent | a6ebf71d87365ce49077934d2f3e0c7675a5cf8e (diff) | |
parent | 8b270a58ec72feffe5f151ce69957e88fad327dd (diff) |
Merge pull request #2478 from yahoo/vekterli/remove-special-handling-of-trusted-replicas-for-merge-source-only-labeling-rebased-fixed
Remove special handling of trusted replicas for merge source only labeling (rebased/fixed)
Diffstat (limited to 'storage')
7 files changed, 203 insertions, 151 deletions
diff --git a/storage/src/tests/distributor/mergelimitertest.cpp b/storage/src/tests/distributor/mergelimitertest.cpp index 57b3690dc24..04dfa1b4e7b 100644 --- a/storage/src/tests/distributor/mergelimitertest.cpp +++ b/storage/src/tests/distributor/mergelimitertest.cpp @@ -3,8 +3,7 @@ #include <vespa/storage/distributor/operations/idealstate/mergelimiter.h> #include <vespa/vdstestlib/cppunit/macros.h> -namespace storage { -namespace distributor { +namespace storage::distributor { struct MergeLimiterTest : public CppUnit::TestFixture { @@ -14,6 +13,10 @@ struct MergeLimiterTest : public CppUnit::TestFixture void testAllUntrustedLessThanMaxVariants(); void testAllUntrustedMoreThanMaxVariants(); void testSourceOnlyLast(); + void limited_set_cannot_be_just_source_only(); + void non_source_only_replica_chosen_from_in_sync_group(); + void non_source_only_replicas_preferred_when_replicas_not_in_sync(); + void at_least_one_non_source_only_replica_chosen_when_all_trusted(); CPPUNIT_TEST_SUITE(MergeLimiterTest); CPPUNIT_TEST(testKeepsAllBelowLimit); @@ -22,6 +25,10 @@ struct MergeLimiterTest : public CppUnit::TestFixture CPPUNIT_TEST(testAllUntrustedLessThanMaxVariants); CPPUNIT_TEST(testAllUntrustedMoreThanMaxVariants); CPPUNIT_TEST(testSourceOnlyLast); + CPPUNIT_TEST(limited_set_cannot_be_just_source_only); + CPPUNIT_TEST(non_source_only_replica_chosen_from_in_sync_group); + CPPUNIT_TEST(non_source_only_replicas_preferred_when_replicas_not_in_sync); + CPPUNIT_TEST(at_least_one_non_source_only_replica_chosen_when_all_trusted); CPPUNIT_TEST_SUITE_END(); }; @@ -56,12 +63,13 @@ namespace { #define ASSERT_LIMIT(maxNodes, nodes, result) \ { \ MergeLimiter limiter(maxNodes); \ - limiter.limitMergeToMaxNodes(nodes); \ + auto nodesCopy = nodes; \ + limiter.limitMergeToMaxNodes(nodesCopy); \ std::ostringstream actual; \ - for (uint32_t i=0; i<nodes.size(); ++i) { \ + for (uint32_t i = 0; i < nodesCopy.size(); ++i) { \ if (i != 0) actual << ","; \ - actual << nodes[i]._nodeIndex; \ - if (nodes[i]._sourceOnly) actual << 's'; \ + actual << nodesCopy[i]._nodeIndex; \ + if (nodesCopy[i]._sourceOnly) actual << 's'; \ } \ CPPUNIT_ASSERT_EQUAL(std::string(result), actual.str()); \ } @@ -153,8 +161,51 @@ MergeLimiterTest::testSourceOnlyLast() .add(13, 0x9) .add(1, 0x7) .add(4, 0x5)); - ASSERT_LIMIT(4, nodes, "13,1,2s,5s"); + ASSERT_LIMIT(4, nodes, "9,3,5s,2s"); } -} // distributor -} // storage +void MergeLimiterTest::limited_set_cannot_be_just_source_only() { + MergeLimiter::NodeArray nodes(NodeFactory() + .addTrusted(9, 0x6) + .addTrusted(2, 0x6) + .addTrusted(13, 0x6).setSourceOnly() + .add(1, 0x7).setSourceOnly()); + ASSERT_LIMIT(2, nodes, "2,13s"); + ASSERT_LIMIT(3, nodes, "2,13s,1s"); +} + +void MergeLimiterTest::non_source_only_replica_chosen_from_in_sync_group() { + // nodes 9, 2, 13 are all in sync. Merge limiter will currently by default + // pop the _last_ node of an in-sync replica "group" when outputting a limited + // set. Unless we special-case source-only replicas here, we'd end up with an + // output set of "13s,1s", i.e. all source-only. + MergeLimiter::NodeArray nodes(NodeFactory() + .add(9, 0x6) + .add(2, 0x6) + .add(13, 0x6).setSourceOnly() + .add(1, 0x7).setSourceOnly()); + ASSERT_LIMIT(2, nodes, "2,13s"); + ASSERT_LIMIT(3, nodes, "2,13s,1s"); +} + +void MergeLimiterTest::non_source_only_replicas_preferred_when_replicas_not_in_sync() { + MergeLimiter::NodeArray nodes(NodeFactory() + .add(9, 0x4) + .add(2, 0x5) + .add(13, 0x6).setSourceOnly() + .add(1, 0x7).setSourceOnly()); + ASSERT_LIMIT(2, nodes, "9,2"); + ASSERT_LIMIT(3, nodes, "9,2,13s"); +} + +void MergeLimiterTest::at_least_one_non_source_only_replica_chosen_when_all_trusted() { + MergeLimiter::NodeArray nodes(NodeFactory() + .addTrusted(9, 0x6) + .addTrusted(2, 0x6) + .addTrusted(13, 0x6).setSourceOnly() + .addTrusted(1, 0x6).setSourceOnly()); + ASSERT_LIMIT(2, nodes, "2,13s"); + ASSERT_LIMIT(3, nodes, "2,13s,1s"); +} + +} // storage::distributor diff --git a/storage/src/tests/distributor/mergeoperationtest.cpp b/storage/src/tests/distributor/mergeoperationtest.cpp index 1e317e20b74..79c6acd5f4b 100644 --- a/storage/src/tests/distributor/mergeoperationtest.cpp +++ b/storage/src/tests/distributor/mergeoperationtest.cpp @@ -26,6 +26,7 @@ class MergeOperationTest : public CppUnit::TestFixture, CPPUNIT_TEST(testDoNotRemoveActiveSourceOnlyCopies); CPPUNIT_TEST(testMarkRedundantTrustedCopiesAsSourceOnly); CPPUNIT_TEST(onlyMarkRedundantRetiredReplicasAsSourceOnly); + CPPUNIT_TEST(mark_post_merge_redundant_replicas_source_only); CPPUNIT_TEST_SUITE_END(); std::unique_ptr<PendingMessageTracker> _pendingTracker; @@ -38,6 +39,7 @@ protected: void testDoNotRemoveActiveSourceOnlyCopies(); void testMarkRedundantTrustedCopiesAsSourceOnly(); void onlyMarkRedundantRetiredReplicasAsSourceOnly(); + void mark_post_merge_redundant_replicas_source_only(); public: void setUp() override { @@ -269,13 +271,13 @@ MergeOperationTest::testGenerateNodeList() std::string("3,5,7,6,8,0,9,2,1,4"), getNodeList("storage:10", 10, "9,8,7,6,5,4,3,2,1,0")); - // Trusted copies should not be source only. + // Trusted copies can be source-only if they are in the non-ideal node set. CPPUNIT_ASSERT_EQUAL( - std::string("3,5,7,6,8,0,9,1,2,4s"), + std::string("3,5,7,6,8,0,9,1s,2s,4s"), getNodeList("storage:10", 7, "0,1t,2t,3,4,5,6,7,8,9")); CPPUNIT_ASSERT_EQUAL( - std::string("3,5,7,6,8,0,9,2,1s,4s"), + std::string("3,5,7,6,8,0,9,1s,2s,4s"), getNodeList("storage:10", 7, "0,1,2t,3,4,5,6,7,8,9")); // Retired nodes are not in ideal state @@ -376,35 +378,31 @@ MergeOperationTest::testMarkRedundantTrustedCopiesAsSourceOnly() std::string("3,5,7,6s,8s"), getNodeList("storage:10", 3, "3t,5t,7t,6,8")); - // 3 redundancy, 4 trusted -> 1 source only trusted. - // We allow marking a trusted, non-ideal copy as source even when we don't - // have #redundancy trusted _ideal_ copies, as long as we're left with >= - // #redundancy trusted copies in total. + // Trusted-ness should not be taken into account when marking nodes as source-only. + // 2 out of 3 ideal replicas trusted. CPPUNIT_ASSERT_EQUAL( - std::string("3,5,7,6,8s"), + std::string("3,5,7,6s,8s"), getNodeList("storage:10", 3, "3t,5t,7,6t,8t")); - // Not sufficient number of trusted copies to mark any as source only. + // 1 out of 3 ideal replicas trusted. CPPUNIT_ASSERT_EQUAL( - std::string("3,5,7,6,8"), + std::string("3,5,7,6s,8s"), getNodeList("storage:10", 3, "3t,5,7,6t,8t")); - // Same as above, with all trusted copies being non-ideal. + // 0 out of 3 ideal replicas trusted. CPPUNIT_ASSERT_EQUAL( - std::string("3,5,7,6,8"), + std::string("3,5,7,6s,8s"), getNodeList("storage:10", 3, "3,5,7,6t,8t")); - // #redundancy of trusted, but none are ideal. Non-ideal trusted should - // not be marked as source only (though we can mark non-trusted non-ideal - // node as source only). - // Note the node reordering since trusted are added before the rest. + // #redundancy of trusted, but none are ideal. Non-ideal trusted may be + // marked as source only. CPPUNIT_ASSERT_EQUAL( - std::string("3,5,7,8,0,9,6s"), + std::string("3,5,7,6s,8s,0s,9s"), getNodeList("storage:10", 3, "3,5,7,6,8t,0t,9t")); - // But allow for removing excess trusted, non-ideal copies. + // Allow for removing excess trusted, non-ideal copies. CPPUNIT_ASSERT_EQUAL( - std::string("3,5,7,6,8,0,9s"), + std::string("3,5,7,6s,8s,0s,9s"), getNodeList("storage:10", 3, "3,5,7,6t,8t,0t,9t")); } @@ -417,8 +415,41 @@ MergeOperationTest::onlyMarkRedundantRetiredReplicasAsSourceOnly() // source-only nodes will have their replica removed after a successful // merge, which we cannot allow to happen here. CPPUNIT_ASSERT_EQUAL( - std::string("0,1,2s"), - getNodeList("storage:3 .0.s.:r .1.s:r .2.s:r", 2, "1,0,2")); + std::string("1,0,2s"), + getNodeList("storage:3 .0.s:r .1.s:r .2.s:r", 2, "1,0,2")); +} + +void MergeOperationTest::mark_post_merge_redundant_replicas_source_only() { + // Ideal state sequence is [3, 5, 7, 6, 8, 0, 9, 2, 1, 4] + + // Retired node 7 is not part of the #redundancy ideal state and should be moved + // to node 6. Once the merge is done we'll end up with too many replicas unless + // we allow marking the to-be-moved replica as source only. + CPPUNIT_ASSERT_EQUAL( + std::string("3,5,6,7s"), + getNodeList("storage:10 .7.s:r", 3, "3t,5t,7t,6")); + + // Should be allowed to mark as source only even if retired replica is the + // only trusted replica at the time the merge starts. + CPPUNIT_ASSERT_EQUAL( + std::string("3,5,6,7s"), + getNodeList("storage:10 .7.s:r", 3, "3,5,7t,6")); + + // This extends to multiple retired nodes. + CPPUNIT_ASSERT_EQUAL( + std::string("3,6,8,5s,7s"), + getNodeList("storage:10 .5.s:r .7.s:r", 3, "3t,5t,7t,6,8")); + + // If number of post-merge ideal nodes is lower than desired redundancy, don't + // mark any as source only. + CPPUNIT_ASSERT_EQUAL( + std::string("3,5,7,6"), + getNodeList("storage:10", 5, "3,5,7,6")); + + // Same applies to when post-merge ideal nodes is _equal_ to desired redundancy. + CPPUNIT_ASSERT_EQUAL( + std::string("3,5,7,6"), + getNodeList("storage:10", 4, "3,5,7,6")); } } // distributor diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp index 618ba9ba884..d708b902454 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp @@ -1,20 +1,32 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/storage/distributor/operations/idealstate/mergelimiter.h> +#include <cassert> #include <vespa/log/log.h> - LOG_SETUP(".distributor.operations.merge.limiter"); -namespace storage { -namespace distributor { +namespace storage::distributor { MergeLimiter::MergeLimiter(uint16_t maxNodes) : _maxNodes(maxNodes) { + assert(maxNodes > 1); LOG(spam, "Limiter initialized with %u nodes.", uint32_t(maxNodes)); } +// TODO replace this overly complicated set of heuristics with something simpler. +// Suggestion: +// 1. Find non-source only replica with highest meta entry count. Emit it and remove from set. +// This tries to maintain a "seed" replica that can hopefully let the remaining replicas +// converge to the complete document entry set as quickly as possible. +// 2. Create mapping from checksum -> replica set. +// 3. Circularly loop through mapping and emit+remove the first replica in each mapping's set. +// Distributing the merge across replica checksum groups is a heuristic to fetch as many +// distinct document entries in one merge operation as possible, as these are all known to +// be pairwise divergent from each other. +// 3.1 Once merge limit is reached, break +// 4. Do a stable sort on the emitted list such that source only replicas are last in the sequence. namespace { class EqualCopies { uint32_t _checksum; @@ -28,15 +40,24 @@ namespace { { } - bool hasTrusted() const { return (_trustedCopies > 0); } - uint32_t trustedCount() const { return _trustedCopies; } - uint32_t size() const { return _copies.size(); } - bool operator==(const MergeMetaData& mmd) const { + bool hasTrusted() const noexcept { return (_trustedCopies > 0); } + uint32_t trustedCount() const noexcept { return _trustedCopies; } + uint32_t size() const noexcept { return static_cast<uint32_t>(_copies.size()); } + bool operator==(const MergeMetaData& mmd) const noexcept { return (_checksum == mmd.checksum()); } void add(const MergeMetaData& mmd) { - if (_copies.empty()) _checksum = mmd.checksum(); - if (mmd.trusted()) ++_trustedCopies; + if (_copies.empty()) { + _checksum = mmd.checksum(); + } + // Don't treat source only replicas as trusted from the perspective of + // picking replica groups. "Trusted" in the context of the merge limiter + // logic _in practice_ means "may be output as the sole non-source only node + // in the resulting node set", which obviously doesn't work if it is in fact + // source only to begin with... + if (mmd.trusted() && !mmd.source_only()) { + ++_trustedCopies; + } _copies.push_back(mmd); } MergeMetaData extractNext() { @@ -56,36 +77,22 @@ namespace { : _trustedCopies(0) { _groups.reserve(a.size()); - for (uint32_t i=0, n=a.size(); i<n; ++i) { + for (uint32_t i = 0, n = static_cast<uint32_t>(a.size()); i < n; ++i) { add(a[i]); - if (a[i].trusted()) { + if (a[i].trusted() && !a[i].source_only()) { ++_trustedCopies; } } } - EqualCopies& getMajority() { - EqualCopies* candidate = 0; - uint32_t size = 0; - for (uint32_t i=0, n=_groups.size(); i<n; ++i) { - if (_groups[i].size() > size) { - candidate = &_groups[i]; - size = candidate->size(); - } - } - assert(candidate != 0); - return *candidate; - } - - bool hasTrusted() const { return (_trustedCopies > 0); } - uint32_t trustedCount() const { return _trustedCopies; } + bool hasTrusted() const noexcept { return (_trustedCopies > 0); } Statistics extractGroupsWithTrustedCopies() { std::vector<EqualCopies> remaining; Statistics trusted; remaining.reserve(_groups.size()); trusted._groups.reserve(_groups.size()); - for (uint32_t i=0, n=_groups.size(); i<n; ++i) { + for (uint32_t i = 0, n = static_cast<uint32_t>(_groups.size()); i < n; ++i) { if (_groups[i].hasTrusted()) { trusted._groups.push_back(_groups[i]); trusted._trustedCopies += _groups[i].trustedCount(); @@ -110,7 +117,7 @@ namespace { void removeGroup(uint32_t groupIndex) { std::vector<EqualCopies> remaining; remaining.reserve(_groups.size()-1); - for (uint32_t i=0, n=_groups.size(); i<n; ++i) { + for (uint32_t i = 0, n = static_cast<uint32_t>(_groups.size()); i < n; ++i) { if (i != groupIndex) { remaining.push_back(_groups[i]); } @@ -120,10 +127,16 @@ namespace { private: void add(const MergeMetaData& mmd) { - for (uint32_t i=0; i<_groups.size(); ++i) { - if (_groups[i] == mmd) { - _groups[i].add(mmd); - return; + // Treat source only replicas as their own distinct "groups" with regards + // to picking replicas for being part of the merge. This way, we avoid + // accidentally picking a trusted source only replica as our one trusted + // replica that will be part of the merge. + if (!mmd.source_only()) { + for (uint32_t i = 0; i < _groups.size(); ++i) { + if (_groups[i] == mmd) { + _groups[i].add(mmd); + return; + } } } _groups.push_back(EqualCopies()); @@ -131,13 +144,15 @@ namespace { } }; - // Add up to max nodes, where different variants exist, prefer having - // some of each. + // Add up to max nodes, where different variants exist, prefer having + // some of each. void addNodes(uint32_t max, Statistics& stats, MergeLimiter::NodeArray& result) { + // FIXME redesign! `last` will unsigned over/underflow in extractNext, which + // is not a very pretty solution, to say the least. uint32_t last = -1; - for (uint32_t i=0; i<max; ++i) { + for (uint32_t i = 0; i < max; ++i) { MergeMetaData data; if (!stats.extractNext(data, last)) return; result.push_back(data); @@ -152,16 +167,22 @@ namespace { }; } +// FIXME the only reason why this code doesn't end up accidentally picking +// just source-only replicas as the output node set today is due to an implicit +// guarantee that the input to this function always has source-only replicas +// listed _last_ in the sequence. void MergeLimiter::limitMergeToMaxNodes(NodeArray& nodes) { - // If not above max anyhow, we need not do anything - if (nodes.size() <= _maxNodes) return; - // Gather some statistics to base decision on what we are going to do on + // If not above max anyhow, we need not do anything + if (nodes.size() <= _maxNodes) { + return; + } + // Gather some statistics to base decision on what we are going to do on Statistics stats(nodes); NodeArray result; - // If we have trusted copies, these should be complete. Pick one of them - // and merge with as many untrusted copies as possible + // If we have trusted copies, these are likely to be complete. Pick one of them + // and merge with as many untrusted copies as possible if (stats.hasTrusted()) { Statistics trusted(stats.extractGroupsWithTrustedCopies()); addNodes(_maxNodes - 1, stats, result); @@ -173,5 +194,4 @@ MergeLimiter::limitMergeToMaxNodes(NodeArray& nodes) result.swap(nodes); } -} // distributor -} // storage +} // storage::distributor diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h index acde58e6061..ef86fc07810 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h +++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h @@ -5,8 +5,7 @@ #include <vespa/storage/distributor/operations/idealstate/mergemetadata.h> #include <vector> -namespace storage { -namespace distributor { +namespace storage::distributor { class MergeLimiter { uint16_t _maxNodes; @@ -19,5 +18,4 @@ public: void limitMergeToMaxNodes(NodeArray&); }; -} // distributor -} // storage +} // storage::distributor diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h b/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h index 8c1cb02bcda..4c4225587bc 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h +++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h @@ -25,6 +25,7 @@ struct MergeMetaData { assert(_copy != 0); return _copy->getChecksum(); } + bool source_only() const noexcept { return _sourceOnly; } }; vespalib::asciistream& operator<<(vespalib::asciistream& out, const MergeMetaData& e); diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp index 75c58ca4f87..0821408560b 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp @@ -20,18 +20,13 @@ MergeOperation::getStatus() const void MergeOperation::addIdealNodes( - const lib::Distribution& distribution, - const lib::ClusterState& state, - const document::BucketId& bucketId, + const std::vector<uint16_t>& idealNodes, const std::vector<MergeMetaData>& nodes, std::vector<MergeMetaData>& result) { - std::vector<uint16_t> idealNodes( - distribution.getIdealStorageNodes(state, bucketId, "ui")); - - // Add all ideal nodes first + // Add all ideal nodes first. These are never marked source-only. for (uint32_t i = 0; i < idealNodes.size(); i++) { - const MergeMetaData* entry = 0; + const MergeMetaData* entry = nullptr; for (uint32_t j = 0; j < nodes.size(); j++) { if (idealNodes[i] == nodes[j]._nodeIndex) { entry = &nodes[j]; @@ -39,52 +34,13 @@ MergeOperation::addIdealNodes( } } - if (entry != 0) { + if (entry != nullptr) { result.push_back(*entry); result.back()._sourceOnly = false; } } } -uint16_t -MergeOperation::countTrusted(const std::vector<MergeMetaData>& nodes) -{ - uint16_t trusted = 0; - for (const auto& n : nodes) { - if (n.trusted()) { - ++trusted; - } - } - return trusted; -} - -void -MergeOperation::addTrustedNodesNotAlreadyAdded( - uint16_t redundancy, - const std::vector<MergeMetaData>& nodes, - std::vector<MergeMetaData>& result) -{ - uint16_t alreadyTrusted = countTrusted(result); - for (uint32_t i = 0; i < nodes.size(); i++) { - if (!nodes[i].trusted()) { - continue; - } - - bool found = false; - for (uint32_t j = 0; j < result.size(); j++) { - if (result[j]._nodeIndex == nodes[i]._nodeIndex) { - found = true; - } - } - - if (!found) { - result.push_back(nodes[i]); - result.back()._sourceOnly = (alreadyTrusted >= redundancy); - ++alreadyTrusted; - } - } -} - void MergeOperation::addCopiesNotAlreadyAdded( uint16_t redundancy, @@ -114,11 +70,19 @@ MergeOperation::generateSortedNodeList( MergeLimiter& limiter, std::vector<MergeMetaData>& nodes) { + std::vector<uint16_t> idealNodes( + distribution.getIdealStorageNodes(state, bucketId, "ui")); + std::vector<MergeMetaData> result; const uint16_t redundancy = distribution.getRedundancy(); - addIdealNodes(distribution, state, bucketId, nodes, result); - addTrustedNodesNotAlreadyAdded(redundancy, nodes, result); + addIdealNodes(idealNodes, nodes, result); addCopiesNotAlreadyAdded(redundancy, nodes, result); + // TODO optimization: when merge case is obviously a replica move (all existing N replicas + // are in sync and new replicas are empty), could prune away N-1 lowest indexed replicas + // from the node list. This would minimize the number of nodes involved in the merge without + // sacrificing the end result. Avoiding the lower indexed nodes would take pressure off the + // merge throttling "locks" and could potentially greatly speed up node retirement in the common + // case. Existing replica could also be marked as source-only if it's not in the ideal state. limiter.limitMergeToMaxNodes(result); result.swap(nodes); } @@ -154,7 +118,7 @@ MergeOperation::onStart(DistributorMessageSender& sender) for (uint32_t i = 0; i < getNodes().size(); ++i) { const BucketCopy* copy = entry->getNode(getNodes()[i]); if (copy == 0) { // New copies? - newCopies.push_back(std::unique_ptr<BucketCopy>(new BucketCopy(0, getNodes()[i], api::BucketInfo()))); + newCopies.push_back(std::make_unique<BucketCopy>(0, getNodes()[i], api::BucketInfo())); copy = newCopies.back().get(); } nodes.push_back(MergeMetaData(getNodes()[i], *copy)); @@ -172,12 +136,11 @@ MergeOperation::onStart(DistributorMessageSender& sender) } if (_mnodes.size() > 1) { - std::shared_ptr<api::MergeBucketCommand> msg( - new api::MergeBucketCommand( - getBucketId(), - _mnodes, - _manager->getDistributorComponent().getUniqueTimestamp(), - clusterState.getVersion())); + auto msg = std::make_shared<api::MergeBucketCommand>( + getBucketId(), + _mnodes, + _manager->getDistributorComponent().getUniqueTimestamp(), + clusterState.getVersion()); // Due to merge forwarding/chaining semantics, we must always send // the merge command to the lowest indexed storage node involved in @@ -192,10 +155,7 @@ MergeOperation::onStart(DistributorMessageSender& sender) msg->setTimeout(60 * 60 * 1000); setCommandMeta(*msg); - sender.sendToNode( - lib::NodeType::STORAGE, - _mnodes[0].index, - msg); + sender.sendToNode(lib::NodeType::STORAGE, _mnodes[0].index, msg); _sentMessageTime = _manager->getDistributorComponent().getClock().getTimeInSeconds(); } else { @@ -213,13 +173,11 @@ MergeOperation::sourceOnlyCopyChangedDuringMerge( { assert(currentState.valid()); for (size_t i = 0; i < _mnodes.size(); ++i) { - const BucketCopy* copyBefore( - _infoBefore.getNode(_mnodes[i].index)); + const BucketCopy* copyBefore(_infoBefore.getNode(_mnodes[i].index)); if (!copyBefore) { continue; } - const BucketCopy* copyAfter( - currentState->getNode(_mnodes[i].index)); + const BucketCopy* copyAfter(currentState->getNode(_mnodes[i].index)); if (!copyAfter) { LOG(debug, "Copy of %s on node %u removed during merge. Was %s", getBucketId().toString().c_str(), @@ -295,6 +253,8 @@ MergeOperation::deleteSourceOnlyNodes( done(); } } + // FIXME what about the else-case here...? done() is not invoked by caller in this branch. + // Not calling done() doesn't leak anything, but causes metric updates to be missed. } void diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.h b/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.h index 8fdf22c88f5..f50226dbd3c 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.h +++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.h @@ -49,14 +49,7 @@ public: bool shouldBlockThisOperation(uint32_t messageType, uint8_t pri) const override ; private: static void addIdealNodes( - const lib::Distribution&, - const lib::ClusterState&, - const document::BucketId&, - const std::vector<MergeMetaData>& nodes, - std::vector<MergeMetaData>& result); - - static void addTrustedNodesNotAlreadyAdded( - uint16_t redundancy, + const std::vector<uint16_t>& idealNodes, const std::vector<MergeMetaData>& nodes, std::vector<MergeMetaData>& result); @@ -65,8 +58,6 @@ private: const std::vector<MergeMetaData>& nodes, std::vector<MergeMetaData>& result); - static uint16_t countTrusted(const std::vector<MergeMetaData>& nodes); - void deleteSourceOnlyNodes(const BucketDatabase::Entry& currentState, DistributorMessageSender& sender); }; |