aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--storage/src/tests/distributor/mergelimitertest.cpp70
-rw-r--r--storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp104
-rw-r--r--storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h6
-rw-r--r--storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h1
4 files changed, 126 insertions, 55 deletions
diff --git a/storage/src/tests/distributor/mergelimitertest.cpp b/storage/src/tests/distributor/mergelimitertest.cpp
index 57b3690dc24..23d0734d484 100644
--- a/storage/src/tests/distributor/mergelimitertest.cpp
+++ b/storage/src/tests/distributor/mergelimitertest.cpp
@@ -3,8 +3,7 @@
#include <vespa/storage/distributor/operations/idealstate/mergelimiter.h>
#include <vespa/vdstestlib/cppunit/macros.h>
-namespace storage {
-namespace distributor {
+namespace storage::distributor {
struct MergeLimiterTest : public CppUnit::TestFixture
{
@@ -14,6 +13,10 @@ struct MergeLimiterTest : public CppUnit::TestFixture
void testAllUntrustedLessThanMaxVariants();
void testAllUntrustedMoreThanMaxVariants();
void testSourceOnlyLast();
+ void limited_set_cannot_be_just_source_only();
+ void non_source_only_replica_chosen_from_in_sync_group();
+ void non_source_only_replicas_preferred_when_replicas_not_in_sync();
+ void at_least_one_non_source_only_replica_chosen_when_all_trusted();
CPPUNIT_TEST_SUITE(MergeLimiterTest);
CPPUNIT_TEST(testKeepsAllBelowLimit);
@@ -22,6 +25,10 @@ struct MergeLimiterTest : public CppUnit::TestFixture
CPPUNIT_TEST(testAllUntrustedLessThanMaxVariants);
CPPUNIT_TEST(testAllUntrustedMoreThanMaxVariants);
CPPUNIT_TEST(testSourceOnlyLast);
+ CPPUNIT_TEST(limited_set_cannot_be_just_source_only);
+ CPPUNIT_TEST(non_source_only_replica_chosen_from_in_sync_group);
+ CPPUNIT_TEST(non_source_only_replicas_preferred_when_replicas_not_in_sync);
+ CPPUNIT_TEST(at_least_one_non_source_only_replica_chosen_when_all_trusted);
CPPUNIT_TEST_SUITE_END();
};
@@ -56,12 +63,13 @@ namespace {
#define ASSERT_LIMIT(maxNodes, nodes, result) \
{ \
MergeLimiter limiter(maxNodes); \
- limiter.limitMergeToMaxNodes(nodes); \
+ auto nodesCopy = nodes; \
+ limiter.limitMergeToMaxNodes(nodesCopy); \
std::ostringstream actual; \
- for (uint32_t i=0; i<nodes.size(); ++i) { \
+ for (uint32_t i = 0; i < nodesCopy.size(); ++i) { \
if (i != 0) actual << ","; \
- actual << nodes[i]._nodeIndex; \
- if (nodes[i]._sourceOnly) actual << 's'; \
+ actual << nodesCopy[i]._nodeIndex; \
+ if (nodesCopy[i]._sourceOnly) actual << 's'; \
} \
CPPUNIT_ASSERT_EQUAL(std::string(result), actual.str()); \
}
@@ -153,8 +161,52 @@ MergeLimiterTest::testSourceOnlyLast()
.add(13, 0x9)
.add(1, 0x7)
.add(4, 0x5));
- ASSERT_LIMIT(4, nodes, "13,1,2s,5s");
+ ASSERT_LIMIT(4, nodes, "9,3,5s,2s");
}
-} // distributor
-} // storage
+void MergeLimiterTest::limited_set_cannot_be_just_source_only() {
+ MergeLimiter::NodeArray nodes(NodeFactory()
+ .addTrusted(9, 0x6)
+ .addTrusted(2, 0x6)
+ .addTrusted(13, 0x6).setSourceOnly()
+ .add(1, 0x7).setSourceOnly());
+ ASSERT_LIMIT(2, nodes, "2,13s");
+ ASSERT_LIMIT(3, nodes, "2,13s,1s");
+}
+
+void MergeLimiterTest::non_source_only_replica_chosen_from_in_sync_group() {
+ // nodes 9, 2, 13 are all in sync. Merge limiter will currently by default
+ // pop the _last_ node of an in-sync replica "group" when outputting a limited
+ // set. Unless we special-case source-only replicas here, we'd end up with an
+ // output set of "13s,1s", i.e. all source-only.
+ MergeLimiter::NodeArray nodes(NodeFactory()
+ .add(9, 0x6)
+ .add(2, 0x6)
+ .add(13, 0x6).setSourceOnly()
+ .add(1, 0x7).setSourceOnly());
+ ASSERT_LIMIT(2, nodes, "2,13s");
+ ASSERT_LIMIT(3, nodes, "2,13s,1s");
+}
+
+void MergeLimiterTest::non_source_only_replicas_preferred_when_replicas_not_in_sync() {
+ MergeLimiter::NodeArray nodes(NodeFactory()
+ .add(9, 0x4)
+ .add(2, 0x5)
+ .add(13, 0x6).setSourceOnly()
+ .add(1, 0x7).setSourceOnly());
+ ASSERT_LIMIT(2, nodes, "9,2");
+ ASSERT_LIMIT(3, nodes, "9,2,13s");
+}
+
+void MergeLimiterTest::at_least_one_non_source_only_replica_chosen_when_all_trusted() {
+ std::cerr << "\n\n\n";
+ MergeLimiter::NodeArray nodes(NodeFactory()
+ .addTrusted(9, 0x6)
+ .addTrusted(2, 0x6)
+ .addTrusted(13, 0x6).setSourceOnly()
+ .addTrusted(1, 0x6).setSourceOnly());
+ ASSERT_LIMIT(2, nodes, "2,13s");
+ ASSERT_LIMIT(3, nodes, "2,13s,1s");
+}
+
+} // storage::distributor
diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp
index 618ba9ba884..d708b902454 100644
--- a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp
+++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.cpp
@@ -1,20 +1,32 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/storage/distributor/operations/idealstate/mergelimiter.h>
+#include <cassert>
#include <vespa/log/log.h>
-
LOG_SETUP(".distributor.operations.merge.limiter");
-namespace storage {
-namespace distributor {
+namespace storage::distributor {
MergeLimiter::MergeLimiter(uint16_t maxNodes)
: _maxNodes(maxNodes)
{
+ assert(maxNodes > 1);
LOG(spam, "Limiter initialized with %u nodes.", uint32_t(maxNodes));
}
+// TODO replace this overly complicated set of heuristics with something simpler.
+// Suggestion:
+// 1. Find non-source only replica with highest meta entry count. Emit it and remove from set.
+// This tries to maintain a "seed" replica that can hopefully let the remaining replicas
+// converge to the complete document entry set as quickly as possible.
+// 2. Create mapping from checksum -> replica set.
+// 3. Circularly loop through mapping and emit+remove the first replica in each mapping's set.
+// Distributing the merge across replica checksum groups is a heuristic to fetch as many
+// distinct document entries in one merge operation as possible, as these are all known to
+// be pairwise divergent from each other.
+// 3.1 Once merge limit is reached, break
+// 4. Do a stable sort on the emitted list such that source only replicas are last in the sequence.
namespace {
class EqualCopies {
uint32_t _checksum;
@@ -28,15 +40,24 @@ namespace {
{
}
- bool hasTrusted() const { return (_trustedCopies > 0); }
- uint32_t trustedCount() const { return _trustedCopies; }
- uint32_t size() const { return _copies.size(); }
- bool operator==(const MergeMetaData& mmd) const {
+ bool hasTrusted() const noexcept { return (_trustedCopies > 0); }
+ uint32_t trustedCount() const noexcept { return _trustedCopies; }
+ uint32_t size() const noexcept { return static_cast<uint32_t>(_copies.size()); }
+ bool operator==(const MergeMetaData& mmd) const noexcept {
return (_checksum == mmd.checksum());
}
void add(const MergeMetaData& mmd) {
- if (_copies.empty()) _checksum = mmd.checksum();
- if (mmd.trusted()) ++_trustedCopies;
+ if (_copies.empty()) {
+ _checksum = mmd.checksum();
+ }
+ // Don't treat source only replicas as trusted from the perspective of
+ // picking replica groups. "Trusted" in the context of the merge limiter
+ // logic _in practice_ means "may be output as the sole non-source only node
+ // in the resulting node set", which obviously doesn't work if it is in fact
+ // source only to begin with...
+ if (mmd.trusted() && !mmd.source_only()) {
+ ++_trustedCopies;
+ }
_copies.push_back(mmd);
}
MergeMetaData extractNext() {
@@ -56,36 +77,22 @@ namespace {
: _trustedCopies(0)
{
_groups.reserve(a.size());
- for (uint32_t i=0, n=a.size(); i<n; ++i) {
+ for (uint32_t i = 0, n = static_cast<uint32_t>(a.size()); i < n; ++i) {
add(a[i]);
- if (a[i].trusted()) {
+ if (a[i].trusted() && !a[i].source_only()) {
++_trustedCopies;
}
}
}
- EqualCopies& getMajority() {
- EqualCopies* candidate = 0;
- uint32_t size = 0;
- for (uint32_t i=0, n=_groups.size(); i<n; ++i) {
- if (_groups[i].size() > size) {
- candidate = &_groups[i];
- size = candidate->size();
- }
- }
- assert(candidate != 0);
- return *candidate;
- }
-
- bool hasTrusted() const { return (_trustedCopies > 0); }
- uint32_t trustedCount() const { return _trustedCopies; }
+ bool hasTrusted() const noexcept { return (_trustedCopies > 0); }
Statistics extractGroupsWithTrustedCopies() {
std::vector<EqualCopies> remaining;
Statistics trusted;
remaining.reserve(_groups.size());
trusted._groups.reserve(_groups.size());
- for (uint32_t i=0, n=_groups.size(); i<n; ++i) {
+ for (uint32_t i = 0, n = static_cast<uint32_t>(_groups.size()); i < n; ++i) {
if (_groups[i].hasTrusted()) {
trusted._groups.push_back(_groups[i]);
trusted._trustedCopies += _groups[i].trustedCount();
@@ -110,7 +117,7 @@ namespace {
void removeGroup(uint32_t groupIndex) {
std::vector<EqualCopies> remaining;
remaining.reserve(_groups.size()-1);
- for (uint32_t i=0, n=_groups.size(); i<n; ++i) {
+ for (uint32_t i = 0, n = static_cast<uint32_t>(_groups.size()); i < n; ++i) {
if (i != groupIndex) {
remaining.push_back(_groups[i]);
}
@@ -120,10 +127,16 @@ namespace {
private:
void add(const MergeMetaData& mmd) {
- for (uint32_t i=0; i<_groups.size(); ++i) {
- if (_groups[i] == mmd) {
- _groups[i].add(mmd);
- return;
+ // Treat source only replicas as their own distinct "groups" with regards
+ // to picking replicas for being part of the merge. This way, we avoid
+ // accidentally picking a trusted source only replica as our one trusted
+ // replica that will be part of the merge.
+ if (!mmd.source_only()) {
+ for (uint32_t i = 0; i < _groups.size(); ++i) {
+ if (_groups[i] == mmd) {
+ _groups[i].add(mmd);
+ return;
+ }
}
}
_groups.push_back(EqualCopies());
@@ -131,13 +144,15 @@ namespace {
}
};
- // Add up to max nodes, where different variants exist, prefer having
- // some of each.
+ // Add up to max nodes, where different variants exist, prefer having
+ // some of each.
void addNodes(uint32_t max, Statistics& stats,
MergeLimiter::NodeArray& result)
{
+ // FIXME redesign! `last` will unsigned over/underflow in extractNext, which
+ // is not a very pretty solution, to say the least.
uint32_t last = -1;
- for (uint32_t i=0; i<max; ++i) {
+ for (uint32_t i = 0; i < max; ++i) {
MergeMetaData data;
if (!stats.extractNext(data, last)) return;
result.push_back(data);
@@ -152,16 +167,22 @@ namespace {
};
}
+// FIXME the only reason why this code doesn't end up accidentally picking
+// just source-only replicas as the output node set today is due to an implicit
+// guarantee that the input to this function always has source-only replicas
+// listed _last_ in the sequence.
void
MergeLimiter::limitMergeToMaxNodes(NodeArray& nodes)
{
- // If not above max anyhow, we need not do anything
- if (nodes.size() <= _maxNodes) return;
- // Gather some statistics to base decision on what we are going to do on
+ // If not above max anyhow, we need not do anything
+ if (nodes.size() <= _maxNodes) {
+ return;
+ }
+ // Gather some statistics to base decision on what we are going to do on
Statistics stats(nodes);
NodeArray result;
- // If we have trusted copies, these should be complete. Pick one of them
- // and merge with as many untrusted copies as possible
+ // If we have trusted copies, these are likely to be complete. Pick one of them
+ // and merge with as many untrusted copies as possible
if (stats.hasTrusted()) {
Statistics trusted(stats.extractGroupsWithTrustedCopies());
addNodes(_maxNodes - 1, stats, result);
@@ -173,5 +194,4 @@ MergeLimiter::limitMergeToMaxNodes(NodeArray& nodes)
result.swap(nodes);
}
-} // distributor
-} // storage
+} // storage::distributor
diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h
index acde58e6061..ef86fc07810 100644
--- a/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h
+++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergelimiter.h
@@ -5,8 +5,7 @@
#include <vespa/storage/distributor/operations/idealstate/mergemetadata.h>
#include <vector>
-namespace storage {
-namespace distributor {
+namespace storage::distributor {
class MergeLimiter {
uint16_t _maxNodes;
@@ -19,5 +18,4 @@ public:
void limitMergeToMaxNodes(NodeArray&);
};
-} // distributor
-} // storage
+} // storage::distributor
diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h b/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h
index 8c1cb02bcda..4c4225587bc 100644
--- a/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h
+++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergemetadata.h
@@ -25,6 +25,7 @@ struct MergeMetaData {
assert(_copy != 0);
return _copy->getChecksum();
}
+ bool source_only() const noexcept { return _sourceOnly; }
};
vespalib::asciistream& operator<<(vespalib::asciistream& out, const MergeMetaData& e);