summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2023-06-01 13:07:32 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2023-06-01 13:39:52 +0000
commit9ec7fc5001f0eff5bda7befda8a34022160949bc (patch)
tree10ca41c87d6d8ad2fb04d918eddc6141f5cfd599
parent4cae2ecf6c9c5ac7f329a112451909d0f7ac3950 (diff)
Explicitly detect changed bucket ownership between read and write phases
It was possible for bucket ownership to change between the condition check and operation write phases in a way that was not implicitly detected by the "has the replica set changed?"-test. This could trigger an assertion failure caused by a daisy-chain of events: 1. Distributor tries to update the bucket DB with a bucket that it no longer owns. This is a no-op, as the DB insertion silently drops updates to non-owned buckets. This is to avoid reinsertion of stale state when replies arrive to operations started on the old side of a state transition edge. 2. Distributor reads back the bucket it previously tried to write, with the code assuming it must have a valid, present state (it just wrote it, after all). 3. The distributor sets a flag in the read state and tries to write it back. 4. An invariant violation is detected when invalid state is attempted written and the process aborts. This commit adds explicit checking of ownership in the current and, if present, pending cluster states iff the cluster state version has changed between phases.
-rw-r--r--storage/src/tests/distributor/check_condition_test.cpp13
-rw-r--r--storage/src/vespa/storage/distributor/operations/external/check_condition.cpp7
-rw-r--r--storage/src/vespa/storage/distributor/operations/external/check_condition.h1
3 files changed, 20 insertions, 1 deletions
diff --git a/storage/src/tests/distributor/check_condition_test.cpp b/storage/src/tests/distributor/check_condition_test.cpp
index ee8c9b888bb..d355194ec0d 100644
--- a/storage/src/tests/distributor/check_condition_test.cpp
+++ b/storage/src/tests/distributor/check_condition_test.cpp
@@ -242,6 +242,19 @@ TEST_F(CheckConditionTest, check_fails_if_replica_set_changed_between_start_and_
});
}
+TEST_F(CheckConditionTest, check_fails_if_bucket_ownership_changed_between_start_and_completion) {
+ test_cond_with_2_gets_sent([&](auto& cond) {
+ cond.handle_reply(_sender, make_matched_reply(0));
+ enable_cluster_state("version:2 storage:1 distributor:1 .0.s:d"); // technically, no distributors own anything
+ cond.handle_reply(_sender, make_matched_reply(1));
+ }, [&](auto& outcome) {
+ EXPECT_FALSE(outcome.matched_condition());
+ EXPECT_FALSE(outcome.not_found());
+ EXPECT_TRUE(outcome.failed());
+ EXPECT_EQ(outcome.error_code().getResult(), api::ReturnCode::BUCKET_NOT_FOUND);
+ });
+}
+
TEST_F(CheckConditionTest, nested_get_traces_are_propagated_to_outcome) {
test_cond_with_2_gets_sent([&](auto& cond) {
cond.handle_reply(_sender, make_trace_reply(0, 100, "hello"));
diff --git a/storage/src/vespa/storage/distributor/operations/external/check_condition.cpp b/storage/src/vespa/storage/distributor/operations/external/check_condition.cpp
index fc619d9eb23..c5f4788ec51 100644
--- a/storage/src/vespa/storage/distributor/operations/external/check_condition.cpp
+++ b/storage/src/vespa/storage/distributor/operations/external/check_condition.cpp
@@ -127,6 +127,10 @@ bool CheckCondition::replica_set_changed_after_get_operation() const {
return (replicas_in_db_now != _cond_get_op->replicas_in_db());
}
+bool CheckCondition::distributor_no_longer_owns_bucket() const {
+ return !_bucket_space.check_ownership_in_pending_and_current_state(_doc_id_bucket.getBucketId()).isOwned();
+}
+
CheckCondition::Outcome::Result
CheckCondition::newest_replica_to_outcome(const std::optional<NewestReplica>& newest) noexcept {
if (!newest) {
@@ -161,7 +165,8 @@ void CheckCondition::handle_internal_get_operation_reply(std::shared_ptr<api::St
}
const auto state_version_now = _bucket_space.getClusterState().getVersion();
if ((state_version_now != _cluster_state_version_at_creation_time)
- && replica_set_changed_after_get_operation())
+ && (replica_set_changed_after_get_operation()
+ || distributor_no_longer_owns_bucket()))
{
// BUCKET_NOT_FOUND is semantically (usually) inaccurate here, but it's what we use for this purpose
// in existing operations. Checking the replica set will implicitly check for ownership changes,
diff --git a/storage/src/vespa/storage/distributor/operations/external/check_condition.h b/storage/src/vespa/storage/distributor/operations/external/check_condition.h
index 382aec6242c..999b79adc3d 100644
--- a/storage/src/vespa/storage/distributor/operations/external/check_condition.h
+++ b/storage/src/vespa/storage/distributor/operations/external/check_condition.h
@@ -139,6 +139,7 @@ public:
uint32_t trace_level);
private:
[[nodiscard]] bool replica_set_changed_after_get_operation() const;
+ [[nodiscard]] bool distributor_no_longer_owns_bucket() const;
void handle_internal_get_operation_reply(std::shared_ptr<api::StorageReply> reply);