aboutsummaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-12-08 08:32:36 +0100
committerGitHub <noreply@github.com>2021-12-08 08:32:36 +0100
commit0d35794eea81a916d06b161d59153fa2ad93cc0f (patch)
treeedc9d98df8ee22835ed5b999682e653acaac58eb /storage
parent373ccd9082b206b828565b20f2af45a9a886226a (diff)
parent518825c40eaf2be7d47d4175d14b20450bf0f34f (diff)
Merge pull request #20382 from vespa-engine/vekterli/treat-empty-replica-subset-as-inconsistent-for-get-operations
Treat empty replica subset as inconsistent for GetOperation [run-systemtest]
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/distributor/getoperationtest.cpp20
-rw-r--r--storage/src/vespa/storage/distributor/operations/external/getoperation.cpp5
-rw-r--r--storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp1
3 files changed, 26 insertions, 0 deletions
diff --git a/storage/src/tests/distributor/getoperationtest.cpp b/storage/src/tests/distributor/getoperationtest.cpp
index dfe4f09de3f..9fecb005659 100644
--- a/storage/src/tests/distributor/getoperationtest.cpp
+++ b/storage/src/tests/distributor/getoperationtest.cpp
@@ -267,6 +267,26 @@ TEST_F(GetOperationTest, send_to_all_invalid_nodes_when_inconsistent) {
EXPECT_EQ("newauthor", getLastReplyAuthor());
}
+// GetOperation document-level consistency checks are used by the multi-phase update
+// logic to see if we can fall back to a fast path even though not all replicas are in sync.
+// Empty replicas are not considered part of the send-set, so only looking at replies from
+// replicas _sent_ to will not detect this case.
+// If we haphazardly treat an empty replicas as implicitly being in sync we risk triggering
+// undetectable inconsistencies at the document level. This can happen if we send create-if-missing
+// updates to an empty replica as well as a non-empty replica, and the document exists in the
+// latter replica. The document would then be implicitly created on the empty replica with the
+// same timestamp as that of the non-empty one, even though their contents would almost
+// certainly differ.
+TEST_F(GetOperationTest, get_not_sent_to_empty_replicas_but_bucket_tagged_as_inconsistent) {
+ setClusterState("distributor:1 storage:4");
+ addNodesToBucketDB(bucketId, "2=0/0/0,3=1/2/3");
+ sendGet();
+ ASSERT_EQ("Get => 3", _sender.getCommands(true));
+ ASSERT_NO_FATAL_FAILURE(sendReply(0, api::ReturnCode::OK, "newauthor", 2));
+ EXPECT_FALSE(op->any_replicas_failed());
+ EXPECT_FALSE(last_reply_had_consistent_replicas());
+}
+
TEST_F(GetOperationTest, inconsistent_split) {
setClusterState("distributor:1 storage:4");
diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp
index 06872cadde6..868de8d0ae2 100644
--- a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp
+++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp
@@ -267,6 +267,11 @@ GetOperation::assignTargetNodeGroups(const BucketDatabase::ReadGuard& read_guard
_responses[GroupId(e.getBucketId(), copy.getChecksum(), copy.getNode())].emplace_back(copy);
} else if (!copy.empty()) {
_responses[GroupId(e.getBucketId(), copy.getChecksum(), -1)].emplace_back(copy);
+ } else { // empty replica
+ // We must treat a bucket with empty replicas as inherently inconsistent.
+ // See GetOperationTest::get_not_sent_to_empty_replicas_but_bucket_tagged_as_inconsistent for
+ // rationale as to why this is the case.
+ _has_replica_inconsistency = true;
}
}
}
diff --git a/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp
index a16eef0ab6f..5233e5678fa 100644
--- a/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp
+++ b/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp
@@ -90,6 +90,7 @@ UpdateOperation::onStart(DistributorStripeMessageSender& sender)
// An UpdateOperation should only be started iff all replicas are consistent
// with each other, so sampling a single replica should be equal to sampling them all.
+ // FIXME this no longer holds when replicas are consistent at the _document_ level but not at the _bucket_ level.
assert(_entries[0].getBucketInfo().getNodeCount() > 0); // Empty buckets are not allowed
_infoAtSendTime = _entries[0].getBucketInfo().getNodeRef(0).getBucketInfo();