summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-12-08 05:56:51 +0100
committerGitHub <noreply@github.com>2021-12-08 05:56:51 +0100
commit83d883b3a3ef519c3c1ee125ceb9f2df2c850958 (patch)
treed78c0f010648a60a90da354631a85daeeba4fe95
parent09f1c86f7bb76c197ff1628da030ba974d9459eb (diff)
parent85ced6e73b0b220dedebdb7f4b54bdceeb7f3fa2 (diff)
Merge pull request #20405 from vespa-engine/vekterli/prevent-orphaned-buckets-caused-by-indeterminate-createbucket
Prevent orphaned bucket replicas caused by indeterminate CreateBucket replies [run-systemtest]
-rw-r--r--storage/src/tests/distributor/putoperationtest.cpp32
-rw-r--r--storage/src/vespa/storage/distributor/persistencemessagetracker.cpp7
2 files changed, 36 insertions, 3 deletions
diff --git a/storage/src/tests/distributor/putoperationtest.cpp b/storage/src/tests/distributor/putoperationtest.cpp
index a047fb7d79c..b02395717e0 100644
--- a/storage/src/tests/distributor/putoperationtest.cpp
+++ b/storage/src/tests/distributor/putoperationtest.cpp
@@ -51,9 +51,8 @@ public:
document::BucketId createAndSendSampleDocument(vespalib::duration timeout);
void sendReply(int idx = -1,
- api::ReturnCode::Result result
- = api::ReturnCode::OK,
- api::BucketInfo info = api::BucketInfo(1,2,3,4,5))
+ api::ReturnCode::Result result = api::ReturnCode::OK,
+ api::BucketInfo info = api::BucketInfo(1,2,3,4,5))
{
ASSERT_FALSE(_sender.commands().empty());
if (idx == -1) {
@@ -152,6 +151,33 @@ TEST_F(PutOperationTest, bucket_database_gets_special_entry_when_CreateBucket_se
ASSERT_EQ("Create bucket => 0,Put => 0", _sender.getCommands(true));
}
+TEST_F(PutOperationTest, failed_CreateBucket_removes_replica_from_db_and_sends_RequestBucketInfo) {
+ setup_stripe(2, 2, "distributor:1 storage:2");
+
+ auto doc = createDummyDocument("test", "test");
+ sendPut(createPut(doc));
+
+ ASSERT_EQ("Create bucket => 1,Create bucket => 0,Put => 1,Put => 0", _sender.getCommands(true));
+
+ // Simulate timeouts on node 1. Replica existence is in a Schrödinger's cat state until we send
+ // a RequestBucketInfo to the node and open the box to find out for sure.
+ sendReply(0, api::ReturnCode::TIMEOUT, api::BucketInfo()); // CreateBucket
+ sendReply(2, api::ReturnCode::TIMEOUT, api::BucketInfo()); // Put
+ // Pretend everything went fine on node 0
+ sendReply(1); // CreateBucket
+ sendReply(3); // Put
+
+ ASSERT_EQ("BucketId(0x4000000000008f09) : "
+ "node(idx=0,crc=0x1,docs=2/4,bytes=3/5,trusted=true,active=false,ready=false)",
+ dumpBucket(operation_context().make_split_bit_constrained_bucket_id(doc->getId())));
+
+ // TODO remove revert concept; does not make sense with Proton (since it's not a multi-version store and
+ // therefore does not have anything to revert back to) and is config-disabled by default for this provider.
+ ASSERT_EQ("RequestBucketInfoCommand(1 buckets, super bucket BucketId(0x4000000000008f09). ) => 1,"
+ "Revert(BucketId(0x4000000000008f09)) => 0",
+ _sender.getCommands(true, true, 4));
+}
+
TEST_F(PutOperationTest, send_inline_split_before_put_if_bucket_too_large) {
setup_stripe(1, 1, "storage:1 distributor:1");
auto cfg = make_config();
diff --git a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp
index 8cacbb0bf5a..45129f7be04 100644
--- a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp
+++ b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp
@@ -259,7 +259,14 @@ PersistenceMessageTrackerImpl::handleCreateBucketReply(
&& reply.getResult().getResult() != api::ReturnCode::EXISTS)
{
LOG(spam, "Create bucket reply failed, so deleting it from bucket db");
+ // We don't know if the bucket exists at this point, so we remove it from the DB.
+ // If we get subsequent write load the bucket will be implicitly created again
+ // (which is an idempotent operation) and all is well. But since we don't know _if_
+ // we'll get any further write load we send a RequestBucketInfo to bring the bucket
+ // back into the DB if it _was_ successfully created. We have to do the latter to
+ // avoid the risk of introducing an orphaned bucket replica on the content node.
_op_ctx.remove_node_from_bucket_database(reply.getBucket(), node);
+ _op_ctx.recheck_bucket_info(node, reply.getBucket());
}
}