diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-12-08 05:56:51 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-08 05:56:51 +0100 |
commit | 83d883b3a3ef519c3c1ee125ceb9f2df2c850958 (patch) | |
tree | d78c0f010648a60a90da354631a85daeeba4fe95 | |
parent | 09f1c86f7bb76c197ff1628da030ba974d9459eb (diff) | |
parent | 85ced6e73b0b220dedebdb7f4b54bdceeb7f3fa2 (diff) |
Merge pull request #20405 from vespa-engine/vekterli/prevent-orphaned-buckets-caused-by-indeterminate-createbucket
Prevent orphaned bucket replicas caused by indeterminate CreateBucket replies [run-systemtest]
-rw-r--r-- | storage/src/tests/distributor/putoperationtest.cpp | 32 | ||||
-rw-r--r-- | storage/src/vespa/storage/distributor/persistencemessagetracker.cpp | 7 |
2 files changed, 36 insertions, 3 deletions
diff --git a/storage/src/tests/distributor/putoperationtest.cpp b/storage/src/tests/distributor/putoperationtest.cpp index a047fb7d79c..b02395717e0 100644 --- a/storage/src/tests/distributor/putoperationtest.cpp +++ b/storage/src/tests/distributor/putoperationtest.cpp @@ -51,9 +51,8 @@ public: document::BucketId createAndSendSampleDocument(vespalib::duration timeout); void sendReply(int idx = -1, - api::ReturnCode::Result result - = api::ReturnCode::OK, - api::BucketInfo info = api::BucketInfo(1,2,3,4,5)) + api::ReturnCode::Result result = api::ReturnCode::OK, + api::BucketInfo info = api::BucketInfo(1,2,3,4,5)) { ASSERT_FALSE(_sender.commands().empty()); if (idx == -1) { @@ -152,6 +151,33 @@ TEST_F(PutOperationTest, bucket_database_gets_special_entry_when_CreateBucket_se ASSERT_EQ("Create bucket => 0,Put => 0", _sender.getCommands(true)); } +TEST_F(PutOperationTest, failed_CreateBucket_removes_replica_from_db_and_sends_RequestBucketInfo) { + setup_stripe(2, 2, "distributor:1 storage:2"); + + auto doc = createDummyDocument("test", "test"); + sendPut(createPut(doc)); + + ASSERT_EQ("Create bucket => 1,Create bucket => 0,Put => 1,Put => 0", _sender.getCommands(true)); + + // Simulate timeouts on node 1. Replica existence is in a Schrödinger's cat state until we send + // a RequestBucketInfo to the node and open the box to find out for sure. + sendReply(0, api::ReturnCode::TIMEOUT, api::BucketInfo()); // CreateBucket + sendReply(2, api::ReturnCode::TIMEOUT, api::BucketInfo()); // Put + // Pretend everything went fine on node 0 + sendReply(1); // CreateBucket + sendReply(3); // Put + + ASSERT_EQ("BucketId(0x4000000000008f09) : " + "node(idx=0,crc=0x1,docs=2/4,bytes=3/5,trusted=true,active=false,ready=false)", + dumpBucket(operation_context().make_split_bit_constrained_bucket_id(doc->getId()))); + + // TODO remove revert concept; does not make sense with Proton (since it's not a multi-version store and + // therefore does not have anything to revert back to) and is config-disabled by default for this provider. + ASSERT_EQ("RequestBucketInfoCommand(1 buckets, super bucket BucketId(0x4000000000008f09). ) => 1," + "Revert(BucketId(0x4000000000008f09)) => 0", + _sender.getCommands(true, true, 4)); +} + TEST_F(PutOperationTest, send_inline_split_before_put_if_bucket_too_large) { setup_stripe(1, 1, "storage:1 distributor:1"); auto cfg = make_config(); diff --git a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp index 8cacbb0bf5a..45129f7be04 100644 --- a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp +++ b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp @@ -259,7 +259,14 @@ PersistenceMessageTrackerImpl::handleCreateBucketReply( && reply.getResult().getResult() != api::ReturnCode::EXISTS) { LOG(spam, "Create bucket reply failed, so deleting it from bucket db"); + // We don't know if the bucket exists at this point, so we remove it from the DB. + // If we get subsequent write load the bucket will be implicitly created again + // (which is an idempotent operation) and all is well. But since we don't know _if_ + // we'll get any further write load we send a RequestBucketInfo to bring the bucket + // back into the DB if it _was_ successfully created. We have to do the latter to + // avoid the risk of introducing an orphaned bucket replica on the content node. _op_ctx.remove_node_from_bucket_database(reply.getBucket(), node); + _op_ctx.recheck_bucket_info(node, reply.getBucket()); } } |