diff options
author | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2017-09-21 13:19:44 +0200 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2017-09-25 16:05:23 +0200 |
commit | debb34547c76429e8a345299e4765824903f784c (patch) | |
tree | 34ac382d6296f63f2086032bdf37956f8caad1ff | |
parent | d2a0cce57df4d1bcfa41ed4cbfadc705636df142 (diff) |
Immediately complete failed remote tasks
We check both for master status and task failure, as we otherwise place
a potentially dangerous silent dependency on the task always failing itself
if the controller is not a master.
6 files changed, 53 insertions, 8 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 120b482e11d..030d792a63b 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -665,8 +665,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd final RemoteClusterControllerTask task = remoteTasks.poll(); log.finest(() -> String.format("Processing remote task of type '%s'", task.getClass().getName())); task.doRemoteFleetControllerTask(context); - // We cannot introduce a version barrier for tasks when we're not the master (and therefore will not publish new versions). - if (!isMaster() || !task.hasVersionAckDependency()) { + if (taskMayBeCompletedImmediately(task)) { log.finest(() -> String.format("Done processing remote task of type '%s'", task.getClass().getName())); task.notifyCompleted(); } else { @@ -678,6 +677,11 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd return false; } + private boolean taskMayBeCompletedImmediately(RemoteClusterControllerTask task) { + // We cannot introduce a version barrier for tasks when we're not the master (and therefore will not publish new versions). + return (!task.hasVersionAckDependency() || task.isFailed() || !masterElectionHandler.isMaster()); + } + private RemoteClusterControllerTask.Context createRemoteTaskProcessingContext() { final RemoteClusterControllerTask.Context context = new RemoteClusterControllerTask.Context(); context.cluster = cluster; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java index 0fb9e5798c8..e4519a02632 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java @@ -32,6 +32,14 @@ public abstract class RemoteClusterControllerTask { public boolean hasVersionAckDependency() { return false; } /** + * If true, signals that a task has failed and can be immediately marked as + * complete without waiting for a version ACK. The task implementation has + * the responsibility of communicating any failure to the caller, and ensuring + * that the lack of version waiting does not violate any invariants. + */ + public boolean isFailed() { return false; } + + /** * If the task response has been deferred due to hasVersionAckDependency(), * handleLeadershipLost() will be invoked on the task if the cluster controller * discovers it has lost leadership in the time between task execution and diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java index 924614df327..27ab43b2b5a 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java @@ -65,6 +65,11 @@ public abstract class Request<Result> extends RemoteClusterControllerTask { failure = new UnknownMasterException("Leadership lost before request could complete"); } + @Override + public boolean isFailed() { + return (failure != null); + } + public abstract Result calculateResult(Context context) throws StateRestApiException, OtherMasterIndexException; } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java index 2c094769961..cf41707db75 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java @@ -67,9 +67,7 @@ public class SetNodeStateRequest extends Request<SetResponse> { @Override public boolean hasVersionAckDependency() { - // FIXME this is a temporary change while edge cases in interactions between controller - // and orchestration are sorted out. - return false; + return (responseWait == SetUnitStateRequest.ResponseWait.WAIT_UNTIL_CLUSTER_ACKED); } static SetResponse setWantedState( diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java index 6a8ffae8f3f..d9815241920 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java @@ -1275,6 +1275,11 @@ public class StateChangeTest extends FleetControllerTest { } } + private static class FailingMockSynchronousTaskWithSideEffects extends MockSynchronousTaskWithSideEffects { + @Override + public boolean isFailed() { return true; } + } + private static class MockNoOpSynchronousTask extends MockTask { @Override public void doRemoteFleetControllerTask(Context ctx) { @@ -1310,6 +1315,10 @@ public class StateChangeTest extends FleetControllerTest { return scheduleTask(new MockNoOpSynchronousTask()); } + MockTask scheduleFailingVersionDependentTaskWithSideEffects() throws Exception { + return scheduleTask(new FailingMockSynchronousTaskWithSideEffects()); + } + void markStorageNodeDown(int index) throws Exception { communicator.setNodeState(new Node(NodeType.STORAGE, index), State.DOWN, "foo"); // Auto-ACKed ctrl.tick(); @@ -1396,6 +1405,15 @@ public class StateChangeTest extends FleetControllerTest { } @Test + public void failing_task_is_immediately_completed() throws Exception { + RemoteTaskFixture fixture = createDefaultFixture(); + MockTask task = fixture.scheduleFailingVersionDependentTaskWithSideEffects(); + + assertTrue(task.isInvoked()); + assertTrue(task.isCompleted()); + } + + @Test public void no_op_synchronous_remote_task_can_complete_immediately_if_current_state_already_acked() throws Exception { RemoteTaskFixture fixture = createFixtureWith(optionsWithZeroTransitionTime()); fixture.markStorageNodeDown(0); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java index 0f35b96c31d..416c57ce5d7 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java @@ -374,11 +374,16 @@ public class SetNodeStateTest extends StateRestApiTest { .setNewState("user", "maintenance", "whatever reason.")); } - // FIXME requests should be tagged as version dependent; temporary workaround @Test - public void set_node_state_requests_are_by_default_not_tagged_as_having_version_ack_dependency() { + public void set_node_state_requests_are_by_default_tagged_as_having_version_ack_dependency() { SetNodeStateRequest request = createDummySetNodeStateRequest(); - assertFalse(request.hasVersionAckDependency()); + assertTrue(request.hasVersionAckDependency()); + } + + @Test + public void set_node_state_requests_not_initially_marked_as_failed() { + SetNodeStateRequest request = createDummySetNodeStateRequest(); + assertFalse(request.isFailed()); } @Test @@ -402,4 +407,11 @@ public class SetNodeStateTest extends StateRestApiTest { request.getResult(); } + @Test + public void leadership_loss_marks_request_as_failed_for_early_out_response() { + SetNodeStateRequest request = createDummySetNodeStateRequest(); + request.handleLeadershipLost(); + assertTrue(request.isFailed()); + } + } |