aboutsummaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2021-12-06 14:11:06 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2021-12-06 14:57:51 +0000
commit518825c40eaf2be7d47d4175d14b20450bf0f34f (patch)
tree4cb7a0b74790a9c13226a2ec01920a125656adfe /storage
parenta390fafca6ebe8d3381447e516aa7f9a5132062c (diff)
Treat empty replica subset as inconsistent for GetOperation
`GetOperation` document-level consistency checks are used by the multi-phase update logic to see if we can fall back to a fast path even though not all replicas are in sync. Empty replicas are not considered part of the send-set, so only looking at replies from replicas _sent_ to will not detect this case. If we haphazardly treat empty replicas as implicitly being in sync we risk triggering undetectable inconsistencies at the document level. This can happen if we send create-if-missing updates to an empty replica as well as a non-empty replica, and the document exists in the latter replica. The document would then be implicitly created on the empty replica with the same timestamp as that of the non-empty one, even though their contents would almost certainly differ. With this change we initially tag all `GetOperations` with at least one empty replica as having inconsistent replicas. This will trigger the full write- repair code path for document updates.
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/distributor/getoperationtest.cpp20
-rw-r--r--storage/src/vespa/storage/distributor/operations/external/getoperation.cpp5
-rw-r--r--storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp1
3 files changed, 26 insertions, 0 deletions
diff --git a/storage/src/tests/distributor/getoperationtest.cpp b/storage/src/tests/distributor/getoperationtest.cpp
index dfe4f09de3f..9fecb005659 100644
--- a/storage/src/tests/distributor/getoperationtest.cpp
+++ b/storage/src/tests/distributor/getoperationtest.cpp
@@ -267,6 +267,26 @@ TEST_F(GetOperationTest, send_to_all_invalid_nodes_when_inconsistent) {
EXPECT_EQ("newauthor", getLastReplyAuthor());
}
+// GetOperation document-level consistency checks are used by the multi-phase update
+// logic to see if we can fall back to a fast path even though not all replicas are in sync.
+// Empty replicas are not considered part of the send-set, so only looking at replies from
+// replicas _sent_ to will not detect this case.
+// If we haphazardly treat an empty replicas as implicitly being in sync we risk triggering
+// undetectable inconsistencies at the document level. This can happen if we send create-if-missing
+// updates to an empty replica as well as a non-empty replica, and the document exists in the
+// latter replica. The document would then be implicitly created on the empty replica with the
+// same timestamp as that of the non-empty one, even though their contents would almost
+// certainly differ.
+TEST_F(GetOperationTest, get_not_sent_to_empty_replicas_but_bucket_tagged_as_inconsistent) {
+ setClusterState("distributor:1 storage:4");
+ addNodesToBucketDB(bucketId, "2=0/0/0,3=1/2/3");
+ sendGet();
+ ASSERT_EQ("Get => 3", _sender.getCommands(true));
+ ASSERT_NO_FATAL_FAILURE(sendReply(0, api::ReturnCode::OK, "newauthor", 2));
+ EXPECT_FALSE(op->any_replicas_failed());
+ EXPECT_FALSE(last_reply_had_consistent_replicas());
+}
+
TEST_F(GetOperationTest, inconsistent_split) {
setClusterState("distributor:1 storage:4");
diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp
index 06872cadde6..868de8d0ae2 100644
--- a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp
+++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp
@@ -267,6 +267,11 @@ GetOperation::assignTargetNodeGroups(const BucketDatabase::ReadGuard& read_guard
_responses[GroupId(e.getBucketId(), copy.getChecksum(), copy.getNode())].emplace_back(copy);
} else if (!copy.empty()) {
_responses[GroupId(e.getBucketId(), copy.getChecksum(), -1)].emplace_back(copy);
+ } else { // empty replica
+ // We must treat a bucket with empty replicas as inherently inconsistent.
+ // See GetOperationTest::get_not_sent_to_empty_replicas_but_bucket_tagged_as_inconsistent for
+ // rationale as to why this is the case.
+ _has_replica_inconsistency = true;
}
}
}
diff --git a/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp
index a16eef0ab6f..5233e5678fa 100644
--- a/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp
+++ b/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp
@@ -90,6 +90,7 @@ UpdateOperation::onStart(DistributorStripeMessageSender& sender)
// An UpdateOperation should only be started iff all replicas are consistent
// with each other, so sampling a single replica should be equal to sampling them all.
+ // FIXME this no longer holds when replicas are consistent at the _document_ level but not at the _bucket_ level.
assert(_entries[0].getBucketInfo().getNodeCount() > 0); // Empty buckets are not allowed
_infoAtSendTime = _entries[0].getBucketInfo().getNodeRef(0).getBucketInfo();