diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2021-12-06 14:11:06 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahooinc.com> | 2021-12-06 14:57:51 +0000 |
commit | 518825c40eaf2be7d47d4175d14b20450bf0f34f (patch) | |
tree | 4cb7a0b74790a9c13226a2ec01920a125656adfe /storage | |
parent | a390fafca6ebe8d3381447e516aa7f9a5132062c (diff) |
Treat empty replica subset as inconsistent for GetOperation
`GetOperation` document-level consistency checks are used by the multi-phase
update logic to see if we can fall back to a fast path even though not all
replicas are in sync. Empty replicas are not considered part of the send-set,
so only looking at replies from replicas _sent_ to will not detect this case.
If we haphazardly treat empty replicas as implicitly being in sync we risk
triggering undetectable inconsistencies at the document level. This can
happen if we send create-if-missing updates to an empty replica as well as a
non-empty replica, and the document exists in the latter replica.
The document would then be implicitly created on the empty replica with the
same timestamp as that of the non-empty one, even though their contents would
almost certainly differ.
With this change we initially tag all `GetOperations` with at least one empty
replica as having inconsistent replicas. This will trigger the full write-
repair code path for document updates.
Diffstat (limited to 'storage')
3 files changed, 26 insertions, 0 deletions
diff --git a/storage/src/tests/distributor/getoperationtest.cpp b/storage/src/tests/distributor/getoperationtest.cpp index dfe4f09de3f..9fecb005659 100644 --- a/storage/src/tests/distributor/getoperationtest.cpp +++ b/storage/src/tests/distributor/getoperationtest.cpp @@ -267,6 +267,26 @@ TEST_F(GetOperationTest, send_to_all_invalid_nodes_when_inconsistent) { EXPECT_EQ("newauthor", getLastReplyAuthor()); } +// GetOperation document-level consistency checks are used by the multi-phase update +// logic to see if we can fall back to a fast path even though not all replicas are in sync. +// Empty replicas are not considered part of the send-set, so only looking at replies from +// replicas _sent_ to will not detect this case. +// If we haphazardly treat an empty replicas as implicitly being in sync we risk triggering +// undetectable inconsistencies at the document level. This can happen if we send create-if-missing +// updates to an empty replica as well as a non-empty replica, and the document exists in the +// latter replica. The document would then be implicitly created on the empty replica with the +// same timestamp as that of the non-empty one, even though their contents would almost +// certainly differ. +TEST_F(GetOperationTest, get_not_sent_to_empty_replicas_but_bucket_tagged_as_inconsistent) { + setClusterState("distributor:1 storage:4"); + addNodesToBucketDB(bucketId, "2=0/0/0,3=1/2/3"); + sendGet(); + ASSERT_EQ("Get => 3", _sender.getCommands(true)); + ASSERT_NO_FATAL_FAILURE(sendReply(0, api::ReturnCode::OK, "newauthor", 2)); + EXPECT_FALSE(op->any_replicas_failed()); + EXPECT_FALSE(last_reply_had_consistent_replicas()); +} + TEST_F(GetOperationTest, inconsistent_split) { setClusterState("distributor:1 storage:4"); diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp index 06872cadde6..868de8d0ae2 100644 --- a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp @@ -267,6 +267,11 @@ GetOperation::assignTargetNodeGroups(const BucketDatabase::ReadGuard& read_guard _responses[GroupId(e.getBucketId(), copy.getChecksum(), copy.getNode())].emplace_back(copy); } else if (!copy.empty()) { _responses[GroupId(e.getBucketId(), copy.getChecksum(), -1)].emplace_back(copy); + } else { // empty replica + // We must treat a bucket with empty replicas as inherently inconsistent. + // See GetOperationTest::get_not_sent_to_empty_replicas_but_bucket_tagged_as_inconsistent for + // rationale as to why this is the case. + _has_replica_inconsistency = true; } } } diff --git a/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp index a16eef0ab6f..5233e5678fa 100644 --- a/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp @@ -90,6 +90,7 @@ UpdateOperation::onStart(DistributorStripeMessageSender& sender) // An UpdateOperation should only be started iff all replicas are consistent // with each other, so sampling a single replica should be equal to sampling them all. + // FIXME this no longer holds when replicas are consistent at the _document_ level but not at the _bucket_ level. assert(_entries[0].getBucketInfo().getNodeCount() > 0); // Empty buckets are not allowed _infoAtSendTime = _entries[0].getBucketInfo().getNodeRef(0).getBucketInfo(); |