diff options
author | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2017-05-16 12:49:40 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2017-05-16 12:52:00 +0000 |
commit | 1f6295e97ba29ffb5fd288771a7f4c5309198825 (patch) | |
tree | 2aeab56b14346d3891c2f88a884be5af461cbd17 /storage | |
parent | a3a6febc5d5e6382ec44e38a761c06a3f7a1d995 (diff) |
Ensure merge node limiter handles trusted source-only replicas
Could previously risk having a resulting node set be comprised solely of
source-only replicas, which luckily gets rejected by the merge throttling
component.
Diffstat (limited to 'storage')
4 files changed, 126 insertions, 55 deletions
diff --git a/storage/src/tests/distributor/mergelimitertest.cpp b/storage/src/tests/distributor/mergelimitertest.cpp index 57b3690dc24..23d0734d484 100644 --- a/storage/src/tests/distributor/mergelimitertest.cpp +++ b/storage/src/tests/distributor/mergelimitertest.cpp @@ -3,8 +3,7 @@ #include <vespa/storage/distributor/operations/idealstate/mergelimiter.h> #include <vespa/vdstestlib/cppunit/macros.h> -namespace storage { -namespace distributor { +namespace storage::distributor { struct MergeLimiterTest : public CppUnit::TestFixture { @@ -14,6 +13,10 @@ struct MergeLimiterTest : public CppUnit::TestFixture void testAllUntrustedLessThanMaxVariants(); void testAllUntrustedMoreThanMaxVariants(); void testSourceOnlyLast(); + void limited_set_cannot_be_just_source_only(); + void non_source_only_replica_chosen_from_in_sync_group(); + void non_source_only_replicas_preferred_when_replicas_not_in_sync(); + void at_least_one_non_source_only_replica_chosen_when_all_trusted(); CPPUNIT_TEST_SUITE(MergeLimiterTest); CPPUNIT_TEST(testKeepsAllBelowLimit); @@ -22,6 +25,10 @@ struct MergeLimiterTest : public CppUnit::TestFixture CPPUNIT_TEST(testAllUntrustedLessThanMaxVariants); CPPUNIT_TEST(testAllUntrustedMoreThanMaxVariants); CPPUNIT_TEST(testSourceOnlyLast); + CPPUNIT_TEST(limited_set_cannot_be_just_source_only); + CPPUNIT_TEST(non_source_only_replica_chosen_from_in_sync_group); + CPPUNIT_TEST(non_source_only_replicas_preferred_when_replicas_not_in_sync); + CPPUNIT_TEST(at_least_one_non_source_only_replica_chosen_when_all_trusted); CPPUNIT_TEST_SUITE_END(); }; @@ -56,12 +63,13 @@ namespace { #define ASSERT_LIMIT(maxNodes, nodes, result) \ { \ MergeLimiter limiter(maxNodes); \ - limiter.limitMergeToMaxNodes(nodes); \ + auto nodesCopy = nodes; \ + limiter.limitMergeToMaxNodes(nodesCopy); \ std::ostringstream actual; \ - for (uint32_t i=0; i<nodes.size(); ++i) { \ + for (uint32_t i = 0; i < nodesCopy.size(); ++i) { \ if (i != 0) actual << ","; \ - actual << nodes[i]._nodeIndex; \ - if (nodes[i]._sourceOnly) actual << 's'; \ + actual << nodesCopy[i]._nodeIndex; \ + if (nodesCopy[i]._sourceOnly) actual << 's'; \ } \ CPPUNIT_ASSERT_EQUAL(std::string(result), actual.str()); \ } @@ -153,8 +161,52 @@ MergeLimiterTest::testSourceOnlyLast() .add(13, 0x9) .add(1, 0x7) .add(4, 0x5)); - ASSERT_LIMIT(4, nodes, "13,1,2s,5s"); + ASSERT_LIMIT(4, nodes, "9,3,5s,2s"); } -} // distributor -} // storage +void MergeLimiterTest::limited_set_cannot_be_just_source_only() { + MergeLimiter::NodeArray nodes(NodeFactory() + .addTrusted(9, 0x6) + .addTrusted(2, 0x6) + .addTrusted(13, 0x6).setSourceOnly() + .add(1, 0x7).setSourceOnly()); + ASSERT_LIMIT(2, nodes, "2,13s"); + ASSERT_LIMIT(3, nodes, "2,13s,1s"); +} + +void MergeLimiterTest::non_source_only_replica_chosen_from_in_sync_group() { + // nodes 9, 2, 13 are all in sync. Merge limiter will currently by default + // pop the _last_ node of an in-sync replica "group" when outputting a limited + // set. Unless we special-case source-only replicas here, we'd end up with an + // output set of "13s,1s", i.e. all source-only. + MergeLimiter::NodeArray nodes(NodeFactory() + .add(9, 0x6) + .add(2, 0x6) + .add(13, 0x6).setSourceOnly() + .add(1, 0x7).setSourceOnly()); + ASSERT_LIMIT(2, nodes, "2,13s"); + ASSERT_LIMIT(3, nodes, "2,13s,1s"); +} + +void MergeLimiterTest::non_source_only_replicas_preferred_when_replicas_not_in_sync() { + MergeLimiter::NodeArray nodes(NodeFactory() + .add(9, 0x4) + .add(2, 0x5) + .add(13, 0x6).setSourceOnly() + .add(1, 0x7).setSourceOnly()); + ASSERT_LIMIT(2, nodes, "9,2"); + ASSERT_LIMIT(3, nodes, "9,2,13s"); +} + +void MergeLimiterTest::at_least_one_non_source_only_replica_chosen_when_all_trusted() { + std::cerr << "\n\n\n"; + MergeLimiter::NodeArray nodes(NodeFactory() + .addTrusted(9, 0x6) + .addTrusted(2, 0x6) + .addTrusted(13, 0x6).setSourceOnly() + .addTrusted(1, 0x6).setSourceOnly()); + ASSERT_LIMIT(2, nodes, "2,13s"); + ASSERT_LIMIT(3, nodes, "2,13s,1s"); +} + +} // storage::distributor diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp index 618ba9ba884..d708b902454 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp @@ -1,20 +1,32 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/storage/distributor/operations/idealstate/mergelimiter.h> +#include <cassert> #include <vespa/log/log.h> - LOG_SETUP(".distributor.operations.merge.limiter"); -namespace storage { -namespace distributor { +namespace storage::distributor { MergeLimiter::MergeLimiter(uint16_t maxNodes) : _maxNodes(maxNodes) { + assert(maxNodes > 1); LOG(spam, "Limiter initialized with %u nodes.", uint32_t(maxNodes)); } +// TODO replace this overly complicated set of heuristics with something simpler. +// Suggestion: +// 1. Find non-source only replica with highest meta entry count. Emit it and remove from set. +// This tries to maintain a "seed" replica that can hopefully let the remaining replicas +// converge to the complete document entry set as quickly as possible. +// 2. Create mapping from checksum -> replica set. +// 3. Circularly loop through mapping and emit+remove the first replica in each mapping's set. +// Distributing the merge across replica checksum groups is a heuristic to fetch as many +// distinct document entries in one merge operation as possible, as these are all known to +// be pairwise divergent from each other. +// 3.1 Once merge limit is reached, break +// 4. Do a stable sort on the emitted list such that source only replicas are last in the sequence. namespace { class EqualCopies { uint32_t _checksum; @@ -28,15 +40,24 @@ namespace { { } - bool hasTrusted() const { return (_trustedCopies > 0); } - uint32_t trustedCount() const { return _trustedCopies; } - uint32_t size() const { return _copies.size(); } - bool operator==(const MergeMetaData& mmd) const { + bool hasTrusted() const noexcept { return (_trustedCopies > 0); } + uint32_t trustedCount() const noexcept { return _trustedCopies; } + uint32_t size() const noexcept { return static_cast<uint32_t>(_copies.size()); } + bool operator==(const MergeMetaData& mmd) const noexcept { return (_checksum == mmd.checksum()); } void add(const MergeMetaData& mmd) { - if (_copies.empty()) _checksum = mmd.checksum(); - if (mmd.trusted()) ++_trustedCopies; + if (_copies.empty()) { + _checksum = mmd.checksum(); + } + // Don't treat source only replicas as trusted from the perspective of + // picking replica groups. "Trusted" in the context of the merge limiter + // logic _in practice_ means "may be output as the sole non-source only node + // in the resulting node set", which obviously doesn't work if it is in fact + // source only to begin with... + if (mmd.trusted() && !mmd.source_only()) { + ++_trustedCopies; + } _copies.push_back(mmd); } MergeMetaData extractNext() { @@ -56,36 +77,22 @@ namespace { : _trustedCopies(0) { _groups.reserve(a.size()); - for (uint32_t i=0, n=a.size(); i<n; ++i) { + for (uint32_t i = 0, n = static_cast<uint32_t>(a.size()); i < n; ++i) { add(a[i]); - if (a[i].trusted()) { + if (a[i].trusted() && !a[i].source_only()) { ++_trustedCopies; } } } - EqualCopies& getMajority() { - EqualCopies* candidate = 0; - uint32_t size = 0; - for (uint32_t i=0, n=_groups.size(); i<n; ++i) { - if (_groups[i].size() > size) { - candidate = &_groups[i]; - size = candidate->size(); - } - } - assert(candidate != 0); - return *candidate; - } - - bool hasTrusted() const { return (_trustedCopies > 0); } - uint32_t trustedCount() const { return _trustedCopies; } + bool hasTrusted() const noexcept { return (_trustedCopies > 0); } Statistics extractGroupsWithTrustedCopies() { std::vector<EqualCopies> remaining; Statistics trusted; remaining.reserve(_groups.size()); trusted._groups.reserve(_groups.size()); - for (uint32_t i=0, n=_groups.size(); i<n; ++i) { + for (uint32_t i = 0, n = static_cast<uint32_t>(_groups.size()); i < n; ++i) { if (_groups[i].hasTrusted()) { trusted._groups.push_back(_groups[i]); trusted._trustedCopies += _groups[i].trustedCount(); @@ -110,7 +117,7 @@ namespace { void removeGroup(uint32_t groupIndex) { std::vector<EqualCopies> remaining; remaining.reserve(_groups.size()-1); - for (uint32_t i=0, n=_groups.size(); i<n; ++i) { + for (uint32_t i = 0, n = static_cast<uint32_t>(_groups.size()); i < n; ++i) { if (i != groupIndex) { remaining.push_back(_groups[i]); } @@ -120,10 +127,16 @@ namespace { private: void add(const MergeMetaData& mmd) { - for (uint32_t i=0; i<_groups.size(); ++i) { - if (_groups[i] == mmd) { - _groups[i].add(mmd); - return; + // Treat source only replicas as their own distinct "groups" with regards + // to picking replicas for being part of the merge. This way, we avoid + // accidentally picking a trusted source only replica as our one trusted + // replica that will be part of the merge. + if (!mmd.source_only()) { + for (uint32_t i = 0; i < _groups.size(); ++i) { + if (_groups[i] == mmd) { + _groups[i].add(mmd); + return; + } } } _groups.push_back(EqualCopies()); @@ -131,13 +144,15 @@ namespace { } }; - // Add up to max nodes, where different variants exist, prefer having - // some of each. + // Add up to max nodes, where different variants exist, prefer having + // some of each. void addNodes(uint32_t max, Statistics& stats, MergeLimiter::NodeArray& result) { + // FIXME redesign! `last` will unsigned over/underflow in extractNext, which + // is not a very pretty solution, to say the least. uint32_t last = -1; - for (uint32_t i=0; i<max; ++i) { + for (uint32_t i = 0; i < max; ++i) { MergeMetaData data; if (!stats.extractNext(data, last)) return; result.push_back(data); @@ -152,16 +167,22 @@ namespace { }; } +// FIXME the only reason why this code doesn't end up accidentally picking +// just source-only replicas as the output node set today is due to an implicit +// guarantee that the input to this function always has source-only replicas +// listed _last_ in the sequence. void MergeLimiter::limitMergeToMaxNodes(NodeArray& nodes) { - // If not above max anyhow, we need not do anything - if (nodes.size() <= _maxNodes) return; - // Gather some statistics to base decision on what we are going to do on + // If not above max anyhow, we need not do anything + if (nodes.size() <= _maxNodes) { + return; + } + // Gather some statistics to base decision on what we are going to do on Statistics stats(nodes); NodeArray result; - // If we have trusted copies, these should be complete. Pick one of them - // and merge with as many untrusted copies as possible + // If we have trusted copies, these are likely to be complete. Pick one of them + // and merge with as many untrusted copies as possible if (stats.hasTrusted()) { Statistics trusted(stats.extractGroupsWithTrustedCopies()); addNodes(_maxNodes - 1, stats, result); @@ -173,5 +194,4 @@ MergeLimiter::limitMergeToMaxNodes(NodeArray& nodes) result.swap(nodes); } -} // distributor -} // storage +} // storage::distributor diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h index acde58e6061..ef86fc07810 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h +++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h @@ -5,8 +5,7 @@ #include <vespa/storage/distributor/operations/idealstate/mergemetadata.h> #include <vector> -namespace storage { -namespace distributor { +namespace storage::distributor { class MergeLimiter { uint16_t _maxNodes; @@ -19,5 +18,4 @@ public: void limitMergeToMaxNodes(NodeArray&); }; -} // distributor -} // storage +} // storage::distributor diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h b/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h index 8c1cb02bcda..4c4225587bc 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h +++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h @@ -25,6 +25,7 @@ struct MergeMetaData { assert(_copy != 0); return _copy->getChecksum(); } + bool source_only() const noexcept { return _sourceOnly; } }; vespalib::asciistream& operator<<(vespalib::asciistream& out, const MergeMetaData& e); |