summaryrefslogtreecommitdiffstats
path: root/storage/src/tests/distributor
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2019-02-21 15:10:07 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2019-02-21 16:42:34 +0000
commit33ac2ac4e5975db8a3132d2b10950eaf0a4cc877 (patch)
tree6acb813d22b6dfcd6f794fccd18b6c7ec8fcc691 /storage/src/tests/distributor
parent67d81887caf35a6c71fae85e7a3e14446c0d8278 (diff)
Add workarounds for legacy global distribution hash handling
This addresses a regression introduced as part of #8479, which in turn was intended to serve as a fix for issue #8475. This regression would stall cluster state convergence when a subset of nodes contained the fix and another subset did not. With the workarounds present, nodes gracefully handle the case where different distribution hashes are expected for the global bucket space. `BucketManager` will now fall back to comparing the new incoming hash to that of the legacy derived distribution config if it mismatches. `PendingClusterState` will try to send a subset of bucket info requests with legacy hash format for the global bucket space iff there has been at least 1 rejected request. All these workarounds will be removed on Vespa 8.
Diffstat (limited to 'storage/src/tests/distributor')
-rw-r--r--storage/src/tests/distributor/bucketdbupdatertest.cpp56
1 files changed, 53 insertions, 3 deletions
diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp
index 53f80854bef..b2d554c1e42 100644
--- a/storage/src/tests/distributor/bucketdbupdatertest.cpp
+++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp
@@ -111,6 +111,7 @@ class BucketDBUpdaterTest : public CppUnit::TestFixture,
CPPUNIT_TEST(identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted);
CPPUNIT_TEST(adding_diverging_replica_to_existing_trusted_does_not_remove_trusted);
CPPUNIT_TEST(batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted);
+ CPPUNIT_TEST(global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection);
CPPUNIT_TEST_SUITE_END();
public:
@@ -175,6 +176,7 @@ protected:
void identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted();
void adding_diverging_replica_to_existing_trusted_does_not_remove_trusted();
void batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted();
+ void global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection();
auto &defaultDistributorBucketSpace() { return getBucketSpaceRepo().get(makeBucketSpace()); }
@@ -505,7 +507,7 @@ public:
std::make_shared<lib::Distribution>(distConfig));
}
- std::string getDistConfig6Nodes3Groups() const {
+ std::string getDistConfig6Nodes2Groups() const {
return ("redundancy 2\n"
"group[3]\n"
"group[0].name \"invalid\"\n"
@@ -692,7 +694,7 @@ BucketDBUpdaterTest::testDistributorChange()
void
BucketDBUpdaterTest::testDistributorChangeWithGrouping()
{
- std::string distConfig(getDistConfig6Nodes3Groups());
+ std::string distConfig(getDistConfig6Nodes2Groups());
setDistribution(distConfig);
int numBuckets = 100;
@@ -2073,7 +2075,7 @@ BucketDBUpdaterTest::testClusterStateAlwaysSendsFullFetchWhenDistributionChangeP
setAndEnableClusterState(stateBefore, expectedMsgs, dummyBucketsToReturn);
}
_sender.clear();
- std::string distConfig(getDistConfig6Nodes3Groups());
+ std::string distConfig(getDistConfig6Nodes2Groups());
setDistribution(distConfig);
sortSentMessagesByIndex(_sender);
CPPUNIT_ASSERT_EQUAL(messageCount(6), _sender.commands.size());
@@ -2549,4 +2551,52 @@ void BucketDBUpdaterTest::batch_update_from_distributor_change_does_not_mark_div
"0:5/1/2/3|1:5/7/8/9", true));
}
+// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+void BucketDBUpdaterTest::global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection() {
+ std::string distConfig(getDistConfig6Nodes2Groups());
+ setDistribution(distConfig);
+
+ const vespalib::string current_hash = "(0d*|*(0;0;1;2)(1;3;4;5))";
+ const vespalib::string legacy_hash = "(0d3|3|*(0;0;1;2)(1;3;4;5))";
+
+ setSystemState(lib::ClusterState("distributor:6 storage:6"));
+ CPPUNIT_ASSERT_EQUAL(messageCount(6), _sender.commands.size());
+
+ api::RequestBucketInfoCommand* global_req = nullptr;
+ for (auto& cmd : _sender.commands) {
+ auto& req_cmd = dynamic_cast<api::RequestBucketInfoCommand&>(*cmd);
+ if (req_cmd.getBucketSpace() == document::FixedBucketSpaces::global_space()) {
+ global_req = &req_cmd;
+ break;
+ }
+ }
+ CPPUNIT_ASSERT(global_req != nullptr);
+ CPPUNIT_ASSERT_EQUAL(current_hash, global_req->getDistributionHash());
+
+ auto reply = std::make_shared<api::RequestBucketInfoReply>(*global_req);
+ reply->setResult(api::ReturnCode::REJECTED);
+ getBucketDBUpdater().onRequestBucketInfoReply(reply);
+
+ getClock().addSecondsToTime(10);
+ getBucketDBUpdater().resendDelayedMessages();
+
+ // Should now be a resent request with legacy distribution hash
+ CPPUNIT_ASSERT_EQUAL(messageCount(6) + 1, _sender.commands.size());
+ auto& legacy_req = dynamic_cast<api::RequestBucketInfoCommand&>(*_sender.commands.back());
+ CPPUNIT_ASSERT_EQUAL(legacy_hash, legacy_req.getDistributionHash());
+
+ // Now if we reject it _again_ we should cycle back to the current hash
+ // in case it wasn't a hash-based rejection after all. And the circle of life continues.
+ reply = std::make_shared<api::RequestBucketInfoReply>(legacy_req);
+ reply->setResult(api::ReturnCode::REJECTED);
+ getBucketDBUpdater().onRequestBucketInfoReply(reply);
+
+ getClock().addSecondsToTime(10);
+ getBucketDBUpdater().resendDelayedMessages();
+
+ CPPUNIT_ASSERT_EQUAL(messageCount(6) + 2, _sender.commands.size());
+ auto& new_current_req = dynamic_cast<api::RequestBucketInfoCommand&>(*_sender.commands.back());
+ CPPUNIT_ASSERT_EQUAL(current_hash, new_current_req.getDistributionHash());
+}
+
}