aboutsummaryrefslogtreecommitdiffstats
path: root/storage/src/tests/bucketdb/bucketmanagertest.cpp
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2019-02-21 15:10:07 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2019-02-21 16:42:34 +0000
commit33ac2ac4e5975db8a3132d2b10950eaf0a4cc877 (patch)
tree6acb813d22b6dfcd6f794fccd18b6c7ec8fcc691 /storage/src/tests/bucketdb/bucketmanagertest.cpp
parent67d81887caf35a6c71fae85e7a3e14446c0d8278 (diff)
Add workarounds for legacy global distribution hash handling
This addresses a regression introduced as part of #8479, which in turn was intended to serve as a fix for issue #8475. This regression would stall cluster state convergence when a subset of nodes contained the fix and another subset did not. With the workarounds present, nodes gracefully handle the case where different distribution hashes are expected for the global bucket space. `BucketManager` will now fall back to comparing the new incoming hash to that of the legacy derived distribution config if it mismatches. `PendingClusterState` will try to send a subset of bucket info requests with legacy hash format for the global bucket space iff there has been at least 1 rejected request. All these workarounds will be removed on Vespa 8.
Diffstat (limited to 'storage/src/tests/bucketdb/bucketmanagertest.cpp')
-rw-r--r--storage/src/tests/bucketdb/bucketmanagertest.cpp61
1 files changed, 61 insertions, 0 deletions
diff --git a/storage/src/tests/bucketdb/bucketmanagertest.cpp b/storage/src/tests/bucketdb/bucketmanagertest.cpp
index 54b3bf4b8d0..09fe310e97e 100644
--- a/storage/src/tests/bucketdb/bucketmanagertest.cpp
+++ b/storage/src/tests/bucketdb/bucketmanagertest.cpp
@@ -8,6 +8,7 @@
#include <vespa/document/update/documentupdate.h>
#include <vespa/document/repo/documenttyperepo.h>
#include <vespa/storage/bucketdb/bucketmanager.h>
+#include <vespa/storage/common/global_bucket_space_distribution_converter.h>
#include <vespa/storage/persistence/filestorage/filestormanager.h>
#include <vespa/storageapi/message/persistence.h>
#include <vespa/storageapi/message/state.h>
@@ -84,6 +85,7 @@ public:
CPPUNIT_TEST(testConflictSetOnlyClearedAfterAllBucketRequestsDone);
CPPUNIT_TEST(testRejectRequestWithMismatchingDistributionHash);
CPPUNIT_TEST(testDbNotIteratedWhenAllRequestsRejected);
+ CPPUNIT_TEST(fall_back_to_legacy_global_distribution_hash_on_mismatch);
// FIXME(vekterli): test is not deterministic and enjoys failing
// sporadically when running under Valgrind. See bug 5932891.
@@ -154,6 +156,7 @@ public:
void testConflictSetOnlyClearedAfterAllBucketRequestsDone();
void testRejectRequestWithMismatchingDistributionHash();
void testDbNotIteratedWhenAllRequestsRejected();
+ void fall_back_to_legacy_global_distribution_hash_on_mismatch();
public:
static constexpr uint32_t DIR_SPREAD = 3;
@@ -785,6 +788,10 @@ public:
return std::make_shared<api::RequestBucketInfoCommand>(makeBucketSpace(), 0, _state, hash);
}
+ auto createFullFetchCommandWithHash(document::BucketSpace space, vespalib::stringref hash) const {
+ return std::make_shared<api::RequestBucketInfoCommand>(space, 0, _state, hash);
+ }
+
auto acquireBucketLockAndSendInfoRequest(const document::BucketId& bucket) {
auto guard = acquireBucketLock(bucket);
// Send down processing command which will block.
@@ -850,6 +857,45 @@ public:
_self._top->getRepliesOnce();
}
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ std::unique_ptr<lib::Distribution> default_grouped_distribution() {
+ return std::make_unique<lib::Distribution>(
+ GlobalBucketSpaceDistributionConverter::string_to_config(vespalib::string(
+R"(redundancy 2
+group[3]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].nodes[0]
+group[1].name rack0
+group[1].index 0
+group[1].nodes[3]
+group[1].nodes[0].index 0
+group[1].nodes[1].index 1
+group[1].nodes[2].index 2
+group[2].name rack1
+group[2].index 1
+group[2].nodes[3]
+group[2].nodes[0].index 3
+group[2].nodes[1].index 4
+group[2].nodes[2].index 5
+)")));
+ }
+
+ std::shared_ptr<lib::Distribution> derived_global_grouped_distribution(bool use_legacy) {
+ auto default_distr = default_grouped_distribution();
+ return GlobalBucketSpaceDistributionConverter::convert_to_global(*default_distr, use_legacy);
+ }
+
+ void set_grouped_distribution_configs() {
+ auto default_distr = default_grouped_distribution();
+ _self._node->getComponentRegister().getBucketSpaceRepo()
+ .get(document::FixedBucketSpaces::default_space()).setDistribution(std::move(default_distr));
+ auto global_distr = derived_global_grouped_distribution(false);
+ _self._node->getComponentRegister().getBucketSpaceRepo()
+ .get(document::FixedBucketSpaces::global_space()).setDistribution(std::move(global_distr));
+ }
+
private:
BucketManagerTest& _self;
lib::ClusterState _state;
@@ -1358,4 +1404,19 @@ BucketManagerTest::testDbNotIteratedWhenAllRequestsRejected()
auto replies = fixture.awaitAndGetReplies(1);
}
+// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+void BucketManagerTest::fall_back_to_legacy_global_distribution_hash_on_mismatch() {
+ ConcurrentOperationFixture f(*this);
+
+ f.set_grouped_distribution_configs();
+
+ auto legacy_hash = f.derived_global_grouped_distribution(true)->getNodeGraph().getDistributionConfigHash();
+
+ auto infoCmd = f.createFullFetchCommandWithHash(document::FixedBucketSpaces::global_space(), legacy_hash);
+ _top->sendDown(infoCmd);
+ auto replies = f.awaitAndGetReplies(1);
+ auto& reply = dynamic_cast<api::RequestBucketInfoReply&>(*replies[0]);
+ CPPUNIT_ASSERT_EQUAL(api::ReturnCode::OK, reply.getResult().getResult()); // _not_ REJECTED
+}
+
} // storage