diff options
author | Tor Egge <Tor.Egge@oath.com> | 2018-03-19 15:11:33 +0000 |
---|---|---|
committer | Tor Egge <Tor.Egge@oath.com> | 2018-03-19 15:11:33 +0000 |
commit | c5ad58651b68d568325af19a2c77a991a7865467 (patch) | |
tree | 2fcee953c6f777564ab0708e5e4afb3bf8b17741 | |
parent | df5301730333954c8118bd6d90760cff81b7601e (diff) |
Abort queued operations if storage node is down in new cluster state bundle.
3 files changed, 21 insertions, 4 deletions
diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp index c8b5c71ee2e..197bd4d148d 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp @@ -332,7 +332,7 @@ FileStorHandlerImpl::abortQueuedCommandsForBuckets( typedef PriorityQueue::iterator iter_t; api::ReturnCode abortedCode(api::ReturnCode::ABORTED, "Sending distributor no longer owns " - "bucket operation was bound to"); + "bucket operation was bound to or storage node went down"); for (iter_t it(t.queue.begin()), e(t.queue.end()); it != e;) { api::StorageMessage& msg(*it->_command); if (messageMayBeAborted(msg) && cmd.shouldAbort(it->_bucket)) { diff --git a/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.cpp b/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.cpp index 8990627f277..57fb7c5c24d 100644 --- a/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.cpp +++ b/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.cpp @@ -151,6 +151,14 @@ ChangedBucketOwnershipHandler::OwnershipState::ownerOf( return FAILED_TO_RESOLVE; } +bool +ChangedBucketOwnershipHandler::OwnershipState::storageNodeUp(document::BucketSpace bucketSpace, uint16_t nodeIndex) const +{ + const auto &derivedState = *_state->getDerivedClusterState(bucketSpace); + lib::Node node(lib::NodeType::STORAGE, nodeIndex); + return derivedState.getNodeState(node).getState().oneOf("uir"); +} + void ChangedBucketOwnershipHandler::logTransition( const lib::ClusterState& currentState, @@ -175,11 +183,16 @@ class StateDiffLazyAbortPredicate // Fast path to avoid trying (and failing) to compute owner in a state // where all distributors are down. bool _allDistributorsHaveGoneDown; + uint16_t _nodeIndex; bool doShouldAbort(const document::Bucket &bucket) const override { if (_allDistributorsHaveGoneDown) { return true; } + bool storageNodeUp = _newState.storageNodeUp(bucket.getBucketSpace(), _nodeIndex); + if (!storageNodeUp) { + return true; + } uint16_t oldOwner(_oldState.ownerOf(bucket)); uint16_t newOwner(_newState.ownerOf(bucket)); if (oldOwner != newOwner) { @@ -192,11 +205,13 @@ class StateDiffLazyAbortPredicate public: StateDiffLazyAbortPredicate( const ChangedBucketOwnershipHandler::OwnershipState& oldState, - const ChangedBucketOwnershipHandler::OwnershipState& newState) + const ChangedBucketOwnershipHandler::OwnershipState& newState, + uint16_t nodeIndex) : _oldState(oldState), _newState(newState), _allDistributorsHaveGoneDown( - allDistributorsDownInState(newState.getBaselineState())) + allDistributorsDownInState(newState.getBaselineState())), + _nodeIndex(nodeIndex) { } }; @@ -209,7 +224,8 @@ ChangedBucketOwnershipHandler::makeLazyAbortPredicate( const OwnershipState::CSP& newOwnership) const { return std::unique_ptr<AbortBucketOperationsCommand::AbortPredicate>( - new StateDiffLazyAbortPredicate(*oldOwnership, *newOwnership)); + new StateDiffLazyAbortPredicate(*oldOwnership, *newOwnership, + _component.getIndex())); } /* diff --git a/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.h b/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.h index 50e711c9484..1e524b6b2fc 100644 --- a/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.h +++ b/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.h @@ -98,6 +98,7 @@ public: const lib::ClusterState& getBaselineState() const; uint16_t ownerOf(const document::Bucket& bucket) const; + bool storageNodeUp(document::BucketSpace bucketSpace, uint16_t nodeIndex) const; }; /** |