diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-12-08 08:32:36 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-08 08:32:36 +0100 |
commit | 0d35794eea81a916d06b161d59153fa2ad93cc0f (patch) | |
tree | edc9d98df8ee22835ed5b999682e653acaac58eb /storage | |
parent | 373ccd9082b206b828565b20f2af45a9a886226a (diff) | |
parent | 518825c40eaf2be7d47d4175d14b20450bf0f34f (diff) |
Merge pull request #20382 from vespa-engine/vekterli/treat-empty-replica-subset-as-inconsistent-for-get-operations
Treat empty replica subset as inconsistent for GetOperation [run-systemtest]
Diffstat (limited to 'storage')
3 files changed, 26 insertions, 0 deletions
diff --git a/storage/src/tests/distributor/getoperationtest.cpp b/storage/src/tests/distributor/getoperationtest.cpp index dfe4f09de3f..9fecb005659 100644 --- a/storage/src/tests/distributor/getoperationtest.cpp +++ b/storage/src/tests/distributor/getoperationtest.cpp @@ -267,6 +267,26 @@ TEST_F(GetOperationTest, send_to_all_invalid_nodes_when_inconsistent) { EXPECT_EQ("newauthor", getLastReplyAuthor()); } +// GetOperation document-level consistency checks are used by the multi-phase update +// logic to see if we can fall back to a fast path even though not all replicas are in sync. +// Empty replicas are not considered part of the send-set, so only looking at replies from +// replicas _sent_ to will not detect this case. +// If we haphazardly treat an empty replicas as implicitly being in sync we risk triggering +// undetectable inconsistencies at the document level. This can happen if we send create-if-missing +// updates to an empty replica as well as a non-empty replica, and the document exists in the +// latter replica. The document would then be implicitly created on the empty replica with the +// same timestamp as that of the non-empty one, even though their contents would almost +// certainly differ. +TEST_F(GetOperationTest, get_not_sent_to_empty_replicas_but_bucket_tagged_as_inconsistent) { + setClusterState("distributor:1 storage:4"); + addNodesToBucketDB(bucketId, "2=0/0/0,3=1/2/3"); + sendGet(); + ASSERT_EQ("Get => 3", _sender.getCommands(true)); + ASSERT_NO_FATAL_FAILURE(sendReply(0, api::ReturnCode::OK, "newauthor", 2)); + EXPECT_FALSE(op->any_replicas_failed()); + EXPECT_FALSE(last_reply_had_consistent_replicas()); +} + TEST_F(GetOperationTest, inconsistent_split) { setClusterState("distributor:1 storage:4"); diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp index 06872cadde6..868de8d0ae2 100644 --- a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp @@ -267,6 +267,11 @@ GetOperation::assignTargetNodeGroups(const BucketDatabase::ReadGuard& read_guard _responses[GroupId(e.getBucketId(), copy.getChecksum(), copy.getNode())].emplace_back(copy); } else if (!copy.empty()) { _responses[GroupId(e.getBucketId(), copy.getChecksum(), -1)].emplace_back(copy); + } else { // empty replica + // We must treat a bucket with empty replicas as inherently inconsistent. + // See GetOperationTest::get_not_sent_to_empty_replicas_but_bucket_tagged_as_inconsistent for + // rationale as to why this is the case. + _has_replica_inconsistency = true; } } } diff --git a/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp index a16eef0ab6f..5233e5678fa 100644 --- a/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp @@ -90,6 +90,7 @@ UpdateOperation::onStart(DistributorStripeMessageSender& sender) // An UpdateOperation should only be started iff all replicas are consistent // with each other, so sampling a single replica should be equal to sampling them all. + // FIXME this no longer holds when replicas are consistent at the _document_ level but not at the _bucket_ level. assert(_entries[0].getBucketInfo().getNodeCount() > 0); // Empty buckets are not allowed _infoAtSendTime = _entries[0].getBucketInfo().getNodeRef(0).getBucketInfo(); |