summaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2017-10-12 17:04:54 +0200
committerGitHub <noreply@github.com>2017-10-12 17:04:54 +0200
commit50b3993733265a0455d78b12989c9f0391527a5d (patch)
tree023027060d9b243c6771b5d69e1f96b15c2ec4fc /clustercontroller-core
parent0f2f3accdc4803781920285fa9656c820d41a20a (diff)
parent8c6befb4a9fb5357d33208631cc15989dab771f7 (diff)
Merge pull request #3525 from vespa-engine/vekterli/re-enable-synchronous-set-node-state
Re-enable synchronous set node state with additional safeguards
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java30
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java12
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java30
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/VersionDependentTaskCompletion.java9
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java14
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java4
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java55
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java32
8 files changed, 165 insertions, 21 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 76d59622fe7..a5419c64818 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -400,12 +400,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
private void failAllVersionDependentTasks() {
tasksPendingStateRecompute.forEach(task -> {
- task.handleLeadershipLost();
+ task.handleFailure(RemoteClusterControllerTask.FailureCondition.LEADERSHIP_LOST);
task.notifyCompleted();
});
tasksPendingStateRecompute.clear();
taskCompletionQueue.forEach(task -> {
- task.getTask().handleLeadershipLost();
+ task.getTask().handleFailure(RemoteClusterControllerTask.FailureCondition.LEADERSHIP_LOST);
task.getTask().notifyCompleted();
});
taskCompletionQueue.clear();
@@ -665,8 +665,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
final RemoteClusterControllerTask task = remoteTasks.poll();
log.finest(() -> String.format("Processing remote task of type '%s'", task.getClass().getName()));
task.doRemoteFleetControllerTask(context);
- // We cannot introduce a version barrier for tasks when we're not the master (and therefore will not publish new versions).
- if (!isMaster() || !task.hasVersionAckDependency()) {
+ if (taskMayBeCompletedImmediately(task)) {
log.finest(() -> String.format("Done processing remote task of type '%s'", task.getClass().getName()));
task.notifyCompleted();
} else {
@@ -678,6 +677,11 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
return false;
}
+ private boolean taskMayBeCompletedImmediately(RemoteClusterControllerTask task) {
+ // We cannot introduce a version barrier for tasks when we're not the master (and therefore will not publish new versions).
+ return (!task.hasVersionAckDependency() || task.isFailed() || !masterElectionHandler.isMaster());
+ }
+
private RemoteClusterControllerTask.Context createRemoteTaskProcessingContext() {
final RemoteClusterControllerTask.Context context = new RemoteClusterControllerTask.Context();
context.cluster = cluster;
@@ -691,13 +695,27 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
private boolean completeSatisfiedVersionDependentTasks() {
int publishedVersion = systemStateBroadcaster.lastClusterStateVersionInSync();
long queueSizeBefore = taskCompletionQueue.size();
+ // Note: although version monotonicity of tasks in queue always should hold,
+ // deadline monotonicity is not guaranteed to do so due to reconfigs of task
+ // timeout durations. Means that tasks enqueued with shorter deadline duration
+ // might be observed as having at least the same timeout as tasks enqueued during
+ // a previous configuration. Current clock implementation is also susceptible to
+ // skewing.
+ final long now = timer.getCurrentTimeInMillis();
while (!taskCompletionQueue.isEmpty()) {
VersionDependentTaskCompletion taskCompletion = taskCompletionQueue.peek();
+ // TODO expose and use monotonic clock instead of system clock
if (publishedVersion >= taskCompletion.getMinimumVersion()) {
log.fine(() -> String.format("Deferred task of type '%s' has minimum version %d, published is %d; completing",
taskCompletion.getTask().getClass().getName(), taskCompletion.getMinimumVersion(), publishedVersion));
taskCompletion.getTask().notifyCompleted();
taskCompletionQueue.remove();
+ } else if (taskCompletion.getDeadlineTimePointMs() <= now) {
+ log.fine(() -> String.format("Deferred task of type '%s' has exceeded wait deadline; completing with failure",
+ taskCompletion.getTask().getClass().getName()));
+ taskCompletion.getTask().handleFailure(RemoteClusterControllerTask.FailureCondition.DEADLINE_EXCEEDED);
+ taskCompletion.getTask().notifyCompleted();
+ taskCompletionQueue.remove();
} else {
break;
}
@@ -792,10 +810,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
* has been ACKed by all distributors in the system, those tasks will be marked as completed.
*/
private void scheduleVersionDependentTasksForFutureCompletion(int completeAtVersion) {
+ // TODO expose and use monotonic clock instead of system clock
+ final long deadlineTimePointMs = timer.getCurrentTimeInMillis() + options.getMaxDeferredTaskVersionWaitTime().toMillis();
for (RemoteClusterControllerTask task : tasksPendingStateRecompute) {
log.finest(() -> String.format("Adding task of type '%s' to be completed at version %d",
task.getClass().getName(), completeAtVersion));
- taskCompletionQueue.add(new VersionDependentTaskCompletion(completeAtVersion, task));
+ taskCompletionQueue.add(new VersionDependentTaskCompletion(completeAtVersion, task, deadlineTimePointMs));
}
tasksPendingStateRecompute.clear();
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
index a885d432597..d8c853f45cb 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -7,6 +7,7 @@ import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+import java.time.Duration;
import java.util.*;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
@@ -116,6 +117,8 @@ public class FleetControllerOptions implements Cloneable {
// TODO: Get rid of this by always getting nodes by distribution.getNodes()
public Set<ConfiguredNode> nodes;
+ private Duration maxDeferredTaskVersionWaitTime = Duration.ofSeconds(30);
+
// TODO: Replace usage of this by usage where the nodes are explicitly passed (below)
public FleetControllerOptions(String clusterName) {
this.clusterName = clusterName;
@@ -139,6 +142,14 @@ public class FleetControllerOptions implements Cloneable {
this.nodes = distribution.getNodes();
}
+ public Duration getMaxDeferredTaskVersionWaitTime() {
+ return maxDeferredTaskVersionWaitTime;
+ }
+
+ public void setMaxDeferredTaskVersionWaitTime(Duration maxDeferredTaskVersionWaitTime) {
+ this.maxDeferredTaskVersionWaitTime = maxDeferredTaskVersionWaitTime;
+ }
+
public FleetControllerOptions clone() {
try {
// TODO: This should deep clone
@@ -213,6 +224,7 @@ public class FleetControllerOptions implements Cloneable {
sb.append("<tr><td><nobr>Maximum event log size</nobr></td><td align=\"right\">").append(eventLogMaxSize).append("</td></tr>");
sb.append("<tr><td><nobr>Maximum node event log size</nobr></td><td align=\"right\">").append(eventNodeLogMaxSize).append("</td></tr>");
sb.append("<tr><td><nobr>Wanted distribution bits</nobr></td><td align=\"right\">").append(distributionBits).append("</td></tr>");
+ sb.append("<tr><td><nobr>Max deferred task version wait time</nobr></td><td align=\"right\">").append(maxDeferredTaskVersionWaitTime.toMillis()).append("ms</td></tr>");
sb.append("</table>");
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java
index 0fb9e5798c8..d082158edc7 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java
@@ -32,15 +32,37 @@ public abstract class RemoteClusterControllerTask {
public boolean hasVersionAckDependency() { return false; }
/**
- * If the task response has been deferred due to hasVersionAckDependency(),
- * handleLeadershipLost() will be invoked on the task if the cluster controller
+ * If true, signals that a task has failed and can be immediately marked as
+ * complete without waiting for a version ACK. The task implementation has
+ * the responsibility of communicating any failure to the caller, and ensuring
+ * that the lack of version waiting does not violate any invariants.
+ */
+ public boolean isFailed() { return false; }
+
+ public enum FailureCondition {
+ LEADERSHIP_LOST,
+ DEADLINE_EXCEEDED
+ }
+
+ /**
+ * If the task completion has been deferred due to hasVersionAckDependency(),
+ * this method will be invoked if a failure occurs before the version has
+ * been successfully ACKed.
+ *
+ * LEADERSHIP_LOST will be the failure condition if the cluster controller
* discovers it has lost leadership in the time between task execution and
- * deferred response send time.
+ * deferred completion time.
+ *
+ * DEADLINE_EXCEEDED will be the failure condition if the completion has been
+ * deferred for more than a configurable amount of time.
*
* This method will also be invoked if the controller is signalled to shut down
* before the dependent cluster version has been published.
+ *
+ * The task implementation is responsible for communicating the appropriate
+ * error semantics to the caller who initially scheduled the task.
*/
- public void handleLeadershipLost() {}
+ public void handleFailure(FailureCondition condition) {}
public boolean isCompleted() {
synchronized (monitor) {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/VersionDependentTaskCompletion.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/VersionDependentTaskCompletion.java
index d7b8f96005a..5d6a4f66467 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/VersionDependentTaskCompletion.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/VersionDependentTaskCompletion.java
@@ -8,15 +8,18 @@ import java.util.Objects;
* completion depends on side effects by the task becoming visible in
* the cluster before a response can be sent. Each such task is associated
* with a particular cluster state version number representing a lower bound
- * on the published state containing the side effect.
+ * on the published state containing the side effect. Each task is also
+ * associated with a completion deadline.
*/
class VersionDependentTaskCompletion {
private final long minimumVersion;
private final RemoteClusterControllerTask task;
+ private final long deadlineTimePointMs;
- VersionDependentTaskCompletion(long minimumVersion, RemoteClusterControllerTask task) {
+ VersionDependentTaskCompletion(long minimumVersion, RemoteClusterControllerTask task, long deadlineTimePointMs) {
this.minimumVersion = minimumVersion;
this.task = task;
+ this.deadlineTimePointMs = deadlineTimePointMs;
}
long getMinimumVersion() {
@@ -27,6 +30,8 @@ class VersionDependentTaskCompletion {
return task;
}
+ long getDeadlineTimePointMs() { return deadlineTimePointMs; }
+
@Override
public boolean equals(Object o) {
if (this == o) return true;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java
index 924614df327..5ac15e75127 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.clustercontroller.core.restapiv2;
import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.DeadlineExceededException;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InternalFailure;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.UnknownMasterException;
@@ -61,8 +62,17 @@ public abstract class Request<Result> extends RemoteClusterControllerTask {
}
@Override
- public void handleLeadershipLost() {
- failure = new UnknownMasterException("Leadership lost before request could complete");
+ public void handleFailure(FailureCondition condition) {
+ if (condition == FailureCondition.LEADERSHIP_LOST) {
+ failure = new UnknownMasterException("Leadership lost before request could complete");
+ } else if (condition == FailureCondition.DEADLINE_EXCEEDED) {
+ failure = new DeadlineExceededException("Task exceeded its version wait deadline");
+ }
+ }
+
+ @Override
+ public boolean isFailed() {
+ return (failure != null);
}
public abstract Result calculateResult(Context context) throws StateRestApiException, OtherMasterIndexException;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
index 2c094769961..cf41707db75 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
@@ -67,9 +67,7 @@ public class SetNodeStateRequest extends Request<SetResponse> {
@Override
public boolean hasVersionAckDependency() {
- // FIXME this is a temporary change while edge cases in interactions between controller
- // and orchestration are sorted out.
- return false;
+ return (responseWait == SetUnitStateRequest.ResponseWait.WAIT_UNTIL_CLUSTER_ACKED);
}
static SetResponse setWantedState(
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
index 6a8ffae8f3f..b10a8101c37 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
@@ -16,6 +16,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+import java.time.Duration;
import java.util.*;
import java.util.logging.Logger;
@@ -1248,16 +1249,25 @@ public class StateChangeTest extends FleetControllerTest {
private static abstract class MockTask extends RemoteClusterControllerTask {
boolean invoked = false;
boolean leadershipLost = false;
+ boolean deadlineExceeded = false;
boolean isInvoked() { return invoked; }
boolean isLeadershipLost() { return leadershipLost; }
+ boolean isDeadlineExceeded() { return deadlineExceeded; }
+
@Override
public boolean hasVersionAckDependency() { return true; }
@Override
- public void handleLeadershipLost() { this.leadershipLost = true; }
+ public void handleFailure(FailureCondition condition) {
+ if (condition == FailureCondition.LEADERSHIP_LOST) {
+ this.leadershipLost = true;
+ } else if (condition == FailureCondition.DEADLINE_EXCEEDED) {
+ this.deadlineExceeded = true;
+ }
+ }
}
// We create an explicit mock task class instead of using mock() simply because of
@@ -1275,6 +1285,11 @@ public class StateChangeTest extends FleetControllerTest {
}
}
+ private static class FailingMockSynchronousTaskWithSideEffects extends MockSynchronousTaskWithSideEffects {
+ @Override
+ public boolean isFailed() { return true; }
+ }
+
private static class MockNoOpSynchronousTask extends MockTask {
@Override
public void doRemoteFleetControllerTask(Context ctx) {
@@ -1310,6 +1325,10 @@ public class StateChangeTest extends FleetControllerTest {
return scheduleTask(new MockNoOpSynchronousTask());
}
+ MockTask scheduleFailingVersionDependentTaskWithSideEffects() throws Exception {
+ return scheduleTask(new FailingMockSynchronousTaskWithSideEffects());
+ }
+
void markStorageNodeDown(int index) throws Exception {
communicator.setNodeState(new Node(NodeType.STORAGE, index), State.DOWN, "foo"); // Auto-ACKed
ctrl.tick();
@@ -1396,6 +1415,15 @@ public class StateChangeTest extends FleetControllerTest {
}
@Test
+ public void failing_task_is_immediately_completed() throws Exception {
+ RemoteTaskFixture fixture = createDefaultFixture();
+ MockTask task = fixture.scheduleFailingVersionDependentTaskWithSideEffects();
+
+ assertTrue(task.isInvoked());
+ assertTrue(task.isCompleted());
+ }
+
+ @Test
public void no_op_synchronous_remote_task_can_complete_immediately_if_current_state_already_acked() throws Exception {
RemoteTaskFixture fixture = createFixtureWith(optionsWithZeroTransitionTime());
fixture.markStorageNodeDown(0);
@@ -1523,4 +1551,29 @@ public class StateChangeTest extends FleetControllerTest {
assertTrue(task.isCompleted());
}
+ @Test
+ public void task_not_completed_within_deadline_is_failed_with_deadline_exceeded_error() throws Exception {
+ FleetControllerOptions options = defaultOptions();
+ options.setMaxDeferredTaskVersionWaitTime(Duration.ofSeconds(60));
+ RemoteTaskFixture fixture = createFixtureWith(options);
+
+ MockTask task = fixture.scheduleVersionDependentTaskWithSideEffects();
+ communicator.setShouldDeferDistributorClusterStateAcks(true);
+ fixture.processScheduledTask();
+
+ assertTrue(task.isInvoked());
+ assertFalse(task.isCompleted());
+ assertFalse(task.isDeadlineExceeded());
+
+ timer.advanceTime(59_000);
+ ctrl.tick();
+ assertFalse(task.isCompleted());
+ assertFalse(task.isDeadlineExceeded());
+
+ timer.advanceTime(1_001);
+ ctrl.tick();
+ assertTrue(task.isCompleted());
+ assertTrue(task.isDeadlineExceeded());
+ }
+
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java
index 0f35b96c31d..4fb244666a4 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java
@@ -2,7 +2,9 @@
package com.yahoo.vespa.clustercontroller.core.restapiv2;
import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
import com.yahoo.vespa.clustercontroller.core.restapiv2.requests.SetNodeStateRequest;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.DeadlineExceededException;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InvalidContentException;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.MissingUnitException;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.OperationNotSupportedForUnitException;
@@ -374,11 +376,16 @@ public class SetNodeStateTest extends StateRestApiTest {
.setNewState("user", "maintenance", "whatever reason."));
}
- // FIXME requests should be tagged as version dependent; temporary workaround
@Test
- public void set_node_state_requests_are_by_default_not_tagged_as_having_version_ack_dependency() {
+ public void set_node_state_requests_are_by_default_tagged_as_having_version_ack_dependency() {
SetNodeStateRequest request = createDummySetNodeStateRequest();
- assertFalse(request.hasVersionAckDependency());
+ assertTrue(request.hasVersionAckDependency());
+ }
+
+ @Test
+ public void set_node_state_requests_not_initially_marked_as_failed() {
+ SetNodeStateRequest request = createDummySetNodeStateRequest();
+ assertFalse(request.isFailed());
}
@Test
@@ -398,7 +405,24 @@ public class SetNodeStateTest extends StateRestApiTest {
expectedException.expect(UnknownMasterException.class);
SetNodeStateRequest request = createDummySetNodeStateRequest();
- request.handleLeadershipLost();
+ request.handleFailure(RemoteClusterControllerTask.FailureCondition.LEADERSHIP_LOST);
+ request.getResult();
+ }
+
+ @Test
+ public void leadership_loss_marks_request_as_failed_for_early_out_response() {
+ SetNodeStateRequest request = createDummySetNodeStateRequest();
+ request.handleFailure(RemoteClusterControllerTask.FailureCondition.LEADERSHIP_LOST);
+ assertTrue(request.isFailed());
+ }
+
+ @Test
+ public void deadline_exceeded_fails_set_node_state_request() throws Exception {
+ expectedException.expectMessage("Task exceeded its version wait deadline");
+ expectedException.expect(DeadlineExceededException.class);
+
+ SetNodeStateRequest request = createDummySetNodeStateRequest();
+ request.handleFailure(RemoteClusterControllerTask.FailureCondition.DEADLINE_EXCEEDED);
request.getResult();
}