summaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2017-09-21 17:41:07 +0200
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2017-09-25 16:05:23 +0200
commit8c6befb4a9fb5357d33208631cc15989dab771f7 (patch)
tree9501dda9ae920968e0a109a0922a45b3833c16aa /clustercontroller-core
parentdebb34547c76429e8a345299e4765824903f784c (diff)
Add configurable deadline for cluster controller tasks
Prevents an unstable cluster from potentially holding up all container request processing threads indefinitely. Deadline errors are translated into HTTP 504 errors to REST API clients.
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java22
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java12
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java22
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/VersionDependentTaskCompletion.java9
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java9
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java37
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java16
7 files changed, 113 insertions, 14 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 030d792a63b..457fe024535 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -400,12 +400,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
private void failAllVersionDependentTasks() {
tasksPendingStateRecompute.forEach(task -> {
- task.handleLeadershipLost();
+ task.handleFailure(RemoteClusterControllerTask.FailureCondition.LEADERSHIP_LOST);
task.notifyCompleted();
});
tasksPendingStateRecompute.clear();
taskCompletionQueue.forEach(task -> {
- task.getTask().handleLeadershipLost();
+ task.getTask().handleFailure(RemoteClusterControllerTask.FailureCondition.LEADERSHIP_LOST);
task.getTask().notifyCompleted();
});
taskCompletionQueue.clear();
@@ -695,13 +695,27 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
private boolean completeSatisfiedVersionDependentTasks() {
int publishedVersion = systemStateBroadcaster.lastClusterStateVersionInSync();
long queueSizeBefore = taskCompletionQueue.size();
+ // Note: although version monotonicity of tasks in queue always should hold,
+ // deadline monotonicity is not guaranteed to do so due to reconfigs of task
+ // timeout durations. Means that tasks enqueued with shorter deadline duration
+ // might be observed as having at least the same timeout as tasks enqueued during
+ // a previous configuration. Current clock implementation is also susceptible to
+ // skewing.
+ final long now = timer.getCurrentTimeInMillis();
while (!taskCompletionQueue.isEmpty()) {
VersionDependentTaskCompletion taskCompletion = taskCompletionQueue.peek();
+ // TODO expose and use monotonic clock instead of system clock
if (publishedVersion >= taskCompletion.getMinimumVersion()) {
log.fine(() -> String.format("Deferred task of type '%s' has minimum version %d, published is %d; completing",
taskCompletion.getTask().getClass().getName(), taskCompletion.getMinimumVersion(), publishedVersion));
taskCompletion.getTask().notifyCompleted();
taskCompletionQueue.remove();
+ } else if (taskCompletion.getDeadlineTimePointMs() <= now) {
+ log.fine(() -> String.format("Deferred task of type '%s' has exceeded wait deadline; completing with failure",
+ taskCompletion.getTask().getClass().getName()));
+ taskCompletion.getTask().handleFailure(RemoteClusterControllerTask.FailureCondition.DEADLINE_EXCEEDED);
+ taskCompletion.getTask().notifyCompleted();
+ taskCompletionQueue.remove();
} else {
break;
}
@@ -796,10 +810,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
* has been ACKed by all distributors in the system, those tasks will be marked as completed.
*/
private void scheduleVersionDependentTasksForFutureCompletion(int completeAtVersion) {
+ // TODO expose and use monotonic clock instead of system clock
+ final long deadlineTimePointMs = timer.getCurrentTimeInMillis() + options.getMaxDeferredTaskVersionWaitTime().toMillis();
for (RemoteClusterControllerTask task : tasksPendingStateRecompute) {
log.finest(() -> String.format("Adding task of type '%s' to be completed at version %d",
task.getClass().getName(), completeAtVersion));
- taskCompletionQueue.add(new VersionDependentTaskCompletion(completeAtVersion, task));
+ taskCompletionQueue.add(new VersionDependentTaskCompletion(completeAtVersion, task, deadlineTimePointMs));
}
tasksPendingStateRecompute.clear();
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
index a885d432597..d8c853f45cb 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -7,6 +7,7 @@ import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer;
+import java.time.Duration;
import java.util.*;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
@@ -116,6 +117,8 @@ public class FleetControllerOptions implements Cloneable {
// TODO: Get rid of this by always getting nodes by distribution.getNodes()
public Set<ConfiguredNode> nodes;
+ private Duration maxDeferredTaskVersionWaitTime = Duration.ofSeconds(30);
+
// TODO: Replace usage of this by usage where the nodes are explicitly passed (below)
public FleetControllerOptions(String clusterName) {
this.clusterName = clusterName;
@@ -139,6 +142,14 @@ public class FleetControllerOptions implements Cloneable {
this.nodes = distribution.getNodes();
}
+ public Duration getMaxDeferredTaskVersionWaitTime() {
+ return maxDeferredTaskVersionWaitTime;
+ }
+
+ public void setMaxDeferredTaskVersionWaitTime(Duration maxDeferredTaskVersionWaitTime) {
+ this.maxDeferredTaskVersionWaitTime = maxDeferredTaskVersionWaitTime;
+ }
+
public FleetControllerOptions clone() {
try {
// TODO: This should deep clone
@@ -213,6 +224,7 @@ public class FleetControllerOptions implements Cloneable {
sb.append("<tr><td><nobr>Maximum event log size</nobr></td><td align=\"right\">").append(eventLogMaxSize).append("</td></tr>");
sb.append("<tr><td><nobr>Maximum node event log size</nobr></td><td align=\"right\">").append(eventNodeLogMaxSize).append("</td></tr>");
sb.append("<tr><td><nobr>Wanted distribution bits</nobr></td><td align=\"right\">").append(distributionBits).append("</td></tr>");
+ sb.append("<tr><td><nobr>Max deferred task version wait time</nobr></td><td align=\"right\">").append(maxDeferredTaskVersionWaitTime.toMillis()).append("ms</td></tr>");
sb.append("</table>");
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java
index e4519a02632..d082158edc7 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java
@@ -39,16 +39,30 @@ public abstract class RemoteClusterControllerTask {
*/
public boolean isFailed() { return false; }
+ public enum FailureCondition {
+ LEADERSHIP_LOST,
+ DEADLINE_EXCEEDED
+ }
+
/**
- * If the task response has been deferred due to hasVersionAckDependency(),
- * handleLeadershipLost() will be invoked on the task if the cluster controller
+ * If the task completion has been deferred due to hasVersionAckDependency(),
+ * this method will be invoked if a failure occurs before the version has
+ * been successfully ACKed.
+ *
+ * LEADERSHIP_LOST will be the failure condition if the cluster controller
* discovers it has lost leadership in the time between task execution and
- * deferred response send time.
+ * deferred completion time.
+ *
+ * DEADLINE_EXCEEDED will be the failure condition if the completion has been
+ * deferred for more than a configurable amount of time.
*
* This method will also be invoked if the controller is signalled to shut down
* before the dependent cluster version has been published.
+ *
+ * The task implementation is responsible for communicating the appropriate
+ * error semantics to the caller who initially scheduled the task.
*/
- public void handleLeadershipLost() {}
+ public void handleFailure(FailureCondition condition) {}
public boolean isCompleted() {
synchronized (monitor) {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/VersionDependentTaskCompletion.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/VersionDependentTaskCompletion.java
index d7b8f96005a..5d6a4f66467 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/VersionDependentTaskCompletion.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/VersionDependentTaskCompletion.java
@@ -8,15 +8,18 @@ import java.util.Objects;
* completion depends on side effects by the task becoming visible in
* the cluster before a response can be sent. Each such task is associated
* with a particular cluster state version number representing a lower bound
- * on the published state containing the side effect.
+ * on the published state containing the side effect. Each task is also
+ * associated with a completion deadline.
*/
class VersionDependentTaskCompletion {
private final long minimumVersion;
private final RemoteClusterControllerTask task;
+ private final long deadlineTimePointMs;
- VersionDependentTaskCompletion(long minimumVersion, RemoteClusterControllerTask task) {
+ VersionDependentTaskCompletion(long minimumVersion, RemoteClusterControllerTask task, long deadlineTimePointMs) {
this.minimumVersion = minimumVersion;
this.task = task;
+ this.deadlineTimePointMs = deadlineTimePointMs;
}
long getMinimumVersion() {
@@ -27,6 +30,8 @@ class VersionDependentTaskCompletion {
return task;
}
+ long getDeadlineTimePointMs() { return deadlineTimePointMs; }
+
@Override
public boolean equals(Object o) {
if (this == o) return true;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java
index 27ab43b2b5a..5ac15e75127 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Request.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.clustercontroller.core.restapiv2;
import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.DeadlineExceededException;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InternalFailure;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.StateRestApiException;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.UnknownMasterException;
@@ -61,8 +62,12 @@ public abstract class Request<Result> extends RemoteClusterControllerTask {
}
@Override
- public void handleLeadershipLost() {
- failure = new UnknownMasterException("Leadership lost before request could complete");
+ public void handleFailure(FailureCondition condition) {
+ if (condition == FailureCondition.LEADERSHIP_LOST) {
+ failure = new UnknownMasterException("Leadership lost before request could complete");
+ } else if (condition == FailureCondition.DEADLINE_EXCEEDED) {
+ failure = new DeadlineExceededException("Task exceeded its version wait deadline");
+ }
}
@Override
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
index d9815241920..b10a8101c37 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
@@ -16,6 +16,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+import java.time.Duration;
import java.util.*;
import java.util.logging.Logger;
@@ -1248,16 +1249,25 @@ public class StateChangeTest extends FleetControllerTest {
private static abstract class MockTask extends RemoteClusterControllerTask {
boolean invoked = false;
boolean leadershipLost = false;
+ boolean deadlineExceeded = false;
boolean isInvoked() { return invoked; }
boolean isLeadershipLost() { return leadershipLost; }
+ boolean isDeadlineExceeded() { return deadlineExceeded; }
+
@Override
public boolean hasVersionAckDependency() { return true; }
@Override
- public void handleLeadershipLost() { this.leadershipLost = true; }
+ public void handleFailure(FailureCondition condition) {
+ if (condition == FailureCondition.LEADERSHIP_LOST) {
+ this.leadershipLost = true;
+ } else if (condition == FailureCondition.DEADLINE_EXCEEDED) {
+ this.deadlineExceeded = true;
+ }
+ }
}
// We create an explicit mock task class instead of using mock() simply because of
@@ -1541,4 +1551,29 @@ public class StateChangeTest extends FleetControllerTest {
assertTrue(task.isCompleted());
}
+ @Test
+ public void task_not_completed_within_deadline_is_failed_with_deadline_exceeded_error() throws Exception {
+ FleetControllerOptions options = defaultOptions();
+ options.setMaxDeferredTaskVersionWaitTime(Duration.ofSeconds(60));
+ RemoteTaskFixture fixture = createFixtureWith(options);
+
+ MockTask task = fixture.scheduleVersionDependentTaskWithSideEffects();
+ communicator.setShouldDeferDistributorClusterStateAcks(true);
+ fixture.processScheduledTask();
+
+ assertTrue(task.isInvoked());
+ assertFalse(task.isCompleted());
+ assertFalse(task.isDeadlineExceeded());
+
+ timer.advanceTime(59_000);
+ ctrl.tick();
+ assertFalse(task.isCompleted());
+ assertFalse(task.isDeadlineExceeded());
+
+ timer.advanceTime(1_001);
+ ctrl.tick();
+ assertTrue(task.isCompleted());
+ assertTrue(task.isDeadlineExceeded());
+ }
+
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java
index 416c57ce5d7..4fb244666a4 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java
@@ -2,7 +2,9 @@
package com.yahoo.vespa.clustercontroller.core.restapiv2;
import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask;
import com.yahoo.vespa.clustercontroller.core.restapiv2.requests.SetNodeStateRequest;
+import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.DeadlineExceededException;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.InvalidContentException;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.MissingUnitException;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.errors.OperationNotSupportedForUnitException;
@@ -403,15 +405,25 @@ public class SetNodeStateTest extends StateRestApiTest {
expectedException.expect(UnknownMasterException.class);
SetNodeStateRequest request = createDummySetNodeStateRequest();
- request.handleLeadershipLost();
+ request.handleFailure(RemoteClusterControllerTask.FailureCondition.LEADERSHIP_LOST);
request.getResult();
}
@Test
public void leadership_loss_marks_request_as_failed_for_early_out_response() {
SetNodeStateRequest request = createDummySetNodeStateRequest();
- request.handleLeadershipLost();
+ request.handleFailure(RemoteClusterControllerTask.FailureCondition.LEADERSHIP_LOST);
assertTrue(request.isFailed());
}
+ @Test
+ public void deadline_exceeded_fails_set_node_state_request() throws Exception {
+ expectedException.expectMessage("Task exceeded its version wait deadline");
+ expectedException.expect(DeadlineExceededException.class);
+
+ SetNodeStateRequest request = createDummySetNodeStateRequest();
+ request.handleFailure(RemoteClusterControllerTask.FailureCondition.DEADLINE_EXCEEDED);
+ request.getResult();
+ }
+
}