summaryrefslogtreecommitdiffstats
path: root/orchestrator
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@oath.com>2018-10-23 15:44:56 +0200
committerHåkon Hallingstad <hakon@oath.com>2018-10-23 15:44:56 +0200
commit629cfc4d314f761ca632ae61639905cc9d1e3fa6 (patch)
tree7281f65a07909f1185414e20e6bce4d887496057 /orchestrator
parent123c5a40b52018090a9a7d36ab2699b85bd12328 (diff)
Enforce CC timeouts in Orchestrator 2
Diffstat (limited to 'orchestrator')
-rw-r--r--orchestrator/pom.xml6
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java9
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java70
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java37
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java41
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java125
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java34
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java54
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java2
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java7
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java8
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java29
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java16
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java8
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeoutsTest.java155
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactoryTest.java54
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java116
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java10
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java21
19 files changed, 496 insertions, 306 deletions
diff --git a/orchestrator/pom.xml b/orchestrator/pom.xml
index ae05a1908c9..37d308111ae 100644
--- a/orchestrator/pom.xml
+++ b/orchestrator/pom.xml
@@ -117,6 +117,12 @@
<scope>provided</scope>
</dependency>
<dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>testutil</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-test</artifactId>
<scope>test</scope>
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java
index ab61a51c418..e37f5ad4d70 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java
@@ -74,15 +74,6 @@ public interface Orchestrator {
void acquirePermissionToRemove(HostName hostName) throws OrchestrationException;
/**
- * Suspend normal operations for a group of nodes in the same application.
- *
- * @param nodeGroup The group of nodes in an application.
- * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints.
- * @throws HostNameNotFoundException if any hostnames in the node group is not recognized
- */
- void suspendGroup(NodeGroup nodeGroup) throws HostStateChangeDeniedException, HostNameNotFoundException;
-
- /**
* Suspend several hosts. On failure, all hosts are resumed before exiting the method with an exception.
*/
void suspendAll(HostName parentHostname, List<HostName> hostNames)
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
index 6577b4b96cc..25c6c780130 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
@@ -1,40 +1,74 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator;
-import com.google.common.util.concurrent.UncheckedTimeoutException;
import com.yahoo.time.TimeBudget;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts;
import java.time.Clock;
import java.time.Duration;
+import java.time.Instant;
+import java.util.Optional;
/**
- * Context for the Orchestrator, e.g. timeout management.
+ * Context for an operation (or suboperation) of the Orchestrator that needs to pass through to the backend,
+ * e.g. timeout management and probing.
*
- * @author hakon
+ * @author hakonhall
*/
public class OrchestratorContext {
- private static final Duration DEFAULT_TIMEOUT = Duration.ofSeconds(10);
+ private static final Duration DEFAULT_TIMEOUT_FOR_SINGLE_OP = Duration.ofSeconds(10);
+ private static final Duration DEFAULT_TIMEOUT_FOR_BATCH_OP = Duration.ofSeconds(60);
- private TimeBudget timeBudget;
+ private final Clock clock;
+ private final TimeBudget timeBudget;
+ private boolean probe;
- public OrchestratorContext(Clock clock) {
- this.timeBudget = TimeBudget.fromNow(clock, DEFAULT_TIMEOUT);
+ public static OrchestratorContext createContextForMultiAppOp(Clock clock) {
+ return new OrchestratorContext(clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_BATCH_OP), true);
}
- /** Get the original timeout in seconds. */
- public long getOriginalTimeoutInSeconds() {
- return timeBudget.originalTimeout().get().getSeconds();
+ public static OrchestratorContext createContextForSingleAppOp(Clock clock) {
+ return new OrchestratorContext(clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_SINGLE_OP), true);
}
- /**
- * Get timeout for a suboperation that should take up {@code shareOfRemaining} of the
- * remaining time, or throw an {@link UncheckedTimeoutException} if timed out.
- */
- public float getSuboperationTimeoutInSeconds(float shareOfRemaining) {
- if (!(0f <= shareOfRemaining && shareOfRemaining <= 1.0f)) {
- throw new IllegalArgumentException("Share of remaining time must be between 0 and 1: " + shareOfRemaining);
+ private OrchestratorContext(Clock clock, TimeBudget timeBudget, boolean probe) {
+ this.clock = clock;
+ this.timeBudget = timeBudget;
+ this.probe = probe;
+ }
+
+ public Duration getTimeLeft() {
+ return timeBudget.timeLeftOrThrow().get();
+ }
+
+ public ClusterControllerClientTimeouts getClusterControllerTimeouts(String clusterName) {
+ return new ClusterControllerClientTimeouts(clusterName, timeBudget.timeLeftAsTimeBudget());
+ }
+
+
+ /** Mark this operation as a non-committal probe. */
+ public OrchestratorContext markAsProbe() {
+ this.probe = true;
+ return this;
+ }
+
+ /** Whether the operation is a no-op probe to test whether it would have succeeded, if it had been committal. */
+ public boolean isProbe() {
+ return probe;
+ }
+
+ /** Create an OrchestratorContext to use within an application lock. */
+ public OrchestratorContext createSubcontextForApplication() {
+ Instant now = clock.instant();
+ Instant deadline = timeBudget.deadline().get();
+ Instant maxDeadline = now.plus(DEFAULT_TIMEOUT_FOR_SINGLE_OP);
+ if (maxDeadline.compareTo(deadline) < 0) {
+ deadline = maxDeadline;
}
- return shareOfRemaining * timeBudget.timeLeftOrThrow().get().toMillis() / 1000.0f;
+ return new OrchestratorContext(
+ clock,
+ TimeBudget.from(clock, now, Optional.of(Duration.between(now, deadline))),
+ probe);
}
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
index ad8a35312e4..3a2673d9dbc 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
@@ -105,7 +105,9 @@ public class OrchestratorImpl implements Orchestrator {
@Override
public void setNodeStatus(HostName hostName, HostStatus status) throws OrchestrationException {
ApplicationInstanceReference reference = getApplicationInstance(hostName).reference();
- try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(reference)) {
+ OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock);
+ try (MutableStatusRegistry statusRegistry = statusService
+ .lockApplicationInstance_forCurrentThreadOnly(reference, context.getTimeLeft())) {
statusRegistry.setHostState(hostName, status);
}
}
@@ -127,10 +129,10 @@ public class OrchestratorImpl implements Orchestrator {
ApplicationInstance appInstance = getApplicationInstance(hostName);
- OrchestratorContext context = new OrchestratorContext(clock);
+ OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock);
try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(
appInstance.reference(),
- context.getOriginalTimeoutInSeconds())) {
+ context.getTimeLeft())) {
final HostStatus currentHostState = statusRegistry.getHostStatus(hostName);
if (HostStatus.NO_REMARKS == currentHostState) {
@@ -148,7 +150,7 @@ public class OrchestratorImpl implements Orchestrator {
public void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {
ApplicationInstance appInstance = getApplicationInstance(hostName);
NodeGroup nodeGroup = new NodeGroup(appInstance, hostName);
- suspendGroup(nodeGroup);
+ suspendGroup(OrchestratorContext.createContextForSingleAppOp(clock), nodeGroup);
}
@Override
@@ -156,10 +158,10 @@ public class OrchestratorImpl implements Orchestrator {
ApplicationInstance appInstance = getApplicationInstance(hostName);
NodeGroup nodeGroup = new NodeGroup(appInstance, hostName);
- OrchestratorContext context = new OrchestratorContext(clock);
+ OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock);
try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(
appInstance.reference(),
- context.getOriginalTimeoutInSeconds())) {
+ context.getTimeLeft())) {
ApplicationApi applicationApi = new ApplicationApiImpl(
nodeGroup,
statusRegistry,
@@ -169,16 +171,19 @@ public class OrchestratorImpl implements Orchestrator {
}
}
- // Public for testing purposes
- @Override
- public void suspendGroup(NodeGroup nodeGroup) throws HostStateChangeDeniedException, HostNameNotFoundException {
+ /**
+ * Suspend normal operations for a group of nodes in the same application.
+ *
+ * @param nodeGroup The group of nodes in an application.
+ * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints.
+ */
+ void suspendGroup(OrchestratorContext context, NodeGroup nodeGroup) throws HostStateChangeDeniedException {
ApplicationInstanceReference applicationReference = nodeGroup.getApplicationReference();
- OrchestratorContext context = new OrchestratorContext(clock);
try (MutableStatusRegistry hostStatusRegistry =
statusService.lockApplicationInstance_forCurrentThreadOnly(
applicationReference,
- context.getOriginalTimeoutInSeconds())) {
+ context.getTimeLeft())) {
ApplicationInstanceStatus appStatus = statusService.forApplicationInstance(applicationReference).getApplicationInstanceStatus();
if (appStatus == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) {
return;
@@ -224,14 +229,12 @@ public class OrchestratorImpl implements Orchestrator {
throw new BatchHostNameNotFoundException(parentHostname, hostNames, e);
}
+ OrchestratorContext context = OrchestratorContext.createContextForMultiAppOp(clock);
for (NodeGroup nodeGroup : nodeGroupsOrderedByApplication) {
try {
- suspendGroup(nodeGroup);
+ suspendGroup(context.createSubcontextForApplication(), nodeGroup);
} catch (HostStateChangeDeniedException e) {
throw new BatchHostStateChangeDeniedException(parentHostname, nodeGroup, e);
- } catch (HostNameNotFoundException e) {
- // Should never get here since since we would have received HostNameNotFoundException earlier.
- throw new BatchHostNameNotFoundException(parentHostname, hostNames, e);
} catch (RuntimeException e) {
throw new BatchInternalErrorException(parentHostname, nodeGroup, e);
}
@@ -301,12 +304,12 @@ public class OrchestratorImpl implements Orchestrator {
private void setApplicationStatus(ApplicationId appId, ApplicationInstanceStatus status)
throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException{
- OrchestratorContext context = new OrchestratorContext(clock);
+ OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock);
ApplicationInstanceReference appRef = OrchestratorUtil.toApplicationInstanceReference(appId, instanceLookupService);
try (MutableStatusRegistry statusRegistry =
statusService.lockApplicationInstance_forCurrentThreadOnly(
appRef,
- context.getOriginalTimeoutInSeconds())) {
+ context.getTimeLeft())) {
// Short-circuit if already in wanted state
if (status == statusRegistry.getApplicationInstanceStatus()) return;
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
index 467b534f809..50782ea081f 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator.controller;
+import com.google.common.util.concurrent.UncheckedTimeoutException;
import com.yahoo.vespa.jaxrs.client.JaxRsStrategy;
import com.yahoo.vespa.orchestrator.OrchestratorContext;
@@ -15,23 +16,6 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{
public static final String REQUEST_REASON = "Orchestrator";
- // On setNodeState calls against the CC ensemble.
- //
- // We'd like to set a timeout for the request to the first CC such that if the first
- // CC is faulty, there's sufficient time to send the request to the second and third CC.
- // The timeouts would be:
- // timeout(1. request) = SHARE_REMAINING_TIME * T
- // timeout(2. request) = SHARE_REMAINING_TIME * T * (1 - SHARE_REMAINING_TIME)
- // timeout(3. request) = SHARE_REMAINING_TIME * T * (1 - SHARE_REMAINING_TIME)^2
- //
- // Using a share of 50% gives approximately:
- // timeout(1. request) = T * 0.5
- // timeout(2. request) = T * 0.25
- // timeout(3. request) = T * 0.125
- //
- // which seems fine
- public static final float SHARE_REMAINING_TIME = 0.5f;
-
private final JaxRsStrategy<ClusterControllerJaxRsApi> clusterControllerApi;
private final String clusterName;
@@ -52,15 +36,16 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{
ClusterControllerNodeState wantedState) throws IOException {
ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON);
ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.SAFE);
+ ClusterControllerClientTimeouts timeouts = context.getClusterControllerTimeouts(clusterName);
try {
return clusterControllerApi.apply(api -> api.setNodeState(
clusterName,
storageNodeIndex,
- context.getSuboperationTimeoutInSeconds(SHARE_REMAINING_TIME),
- stateRequest)
- );
- } catch (IOException e) {
+ timeouts.getServerTimeout().toMillis() / 1000.0f,
+ stateRequest),
+ timeouts);
+ } catch (IOException | UncheckedTimeoutException e) {
String message = String.format(
"Giving up setting %s for storage node with index %d in cluster %s",
stateRequest,
@@ -79,16 +64,18 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{
@Override
public ClusterControllerStateResponse setApplicationState(
OrchestratorContext context,
- final ClusterControllerNodeState wantedState) throws IOException {
- final ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON);
- final ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.FORCE);
+ ClusterControllerNodeState wantedState) throws IOException {
+ ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON);
+ ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.FORCE);
+ ClusterControllerClientTimeouts timeouts = context.getClusterControllerTimeouts(clusterName);
try {
return clusterControllerApi.apply(api -> api.setClusterState(
clusterName,
- context.getSuboperationTimeoutInSeconds(SHARE_REMAINING_TIME),
- stateRequest));
- } catch (IOException e) {
+ timeouts.getServerTimeout().toMillis() / 1000.0f,
+ stateRequest),
+ timeouts);
+ } catch (IOException | UncheckedTimeoutException e) {
final String message = String.format(
"Giving up setting %s for cluster %s",
stateRequest,
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java
new file mode 100644
index 00000000000..6c08ae53e70
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java
@@ -0,0 +1,125 @@
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.google.common.util.concurrent.UncheckedTimeoutException;
+import com.yahoo.time.TimeBudget;
+import com.yahoo.vespa.jaxrs.client.JaxRsTimeouts;
+
+import java.time.Duration;
+
+/**
+ * Calculates various timeouts associated with a REST call from the Orchestrator to the Cluster Controller.
+ *
+ * <p>Timeout handling of HTTP messaging is fundamentally flawed in various Java implementations.
+ * We would like to specify a max time for the whole operation (connect, send request, and receive response).
+ * Jersey JAX-RS implementation and the Apache HTTP client library provides a way to set the connect timeout C
+ * and read timeout R. So if the operation takes NR reads, and the writes takes TW time,
+ * the theoretical max time is: T = C + R * NR + TW. With both NR and TW unknown, there's no way to
+ * set a proper C and R.</p>
+ *
+ * <p>The various timeouts is set according to the following considerations:</p>
+ *
+ * <ol>
+ * <li>Some time is reserved for the execution in this process, e.g. execution leading to the REST call,
+ * handling of the response, exception handling, etc, such that we can finish processing this request
+ * before the {@link #timeBudget} deadline. This is typically in the order of ms.</li>
+ * <li>A timeout will be passed to the Cluster Controller backend. We'll give a timeout such that if one
+ * CC times out, the next CC will be given exactly the same timeout. This may or may not be a good strategy:
+ * (A) There's typically a 3rd CC. But if the first 2 fails with timeout, the chance the last is OK
+ * is negligible. (B) If picking the CC is random, then giving the full timeout to the first
+ * should be sufficient since a later retry will hit the healthy CC. (C) Because we have been using
+ * DROP in networking rules, clients may time out (host out of app or whatever). This would suggest
+ * allowing more than 1 full request.</li>
+ * <li>The timeout passed to the CC backend should be such that if it honors that, the Orchestrator
+ * should not time out. This means some kernel and network overhead should be subtracted from the timeout
+ * passed to the CC.</li>
+ * <li>We're only able to set the connect and read/write timeouts(!) Since we're communicating within
+ * data center, assume connections are in the order of ms, while a single read may stall close up to the CC
+ * timeout.</li>
+ * </ol>
+ *
+ * @author hakonhall
+ */
+public class ClusterControllerClientTimeouts implements JaxRsTimeouts {
+ // In data center connect timeout
+ static final Duration CONNECT_TIMEOUT = Duration.ofMillis(50);
+ // Per call overhead
+ static final Duration IN_PROCESS_OVERHEAD_PER_CALL = Duration.ofMillis(50);
+ // In data center kernel and network overhead.
+ static final Duration NETWORK_OVERHEAD_PER_CALL = CONNECT_TIMEOUT;
+ // Minimum time reserved for post-RPC processing to finish BEFORE the deadline, including ZK write.
+ static final Duration IN_PROCESS_OVERHEAD = Duration.ofMillis(100);
+ // Number of JAX-RS RPC calls to account for within the time budget.
+ static final int NUM_CALLS = 2;
+ // Minimum server-side timeout
+ static final Duration MIN_SERVER_TIMEOUT = Duration.ofMillis(10);
+
+ private final String clusterName;
+ private final TimeBudget timeBudget;
+ private final Duration maxClientTimeout;
+
+ /**
+ * Creates a timeouts instance.
+ *
+ * The {@link #timeBudget} SHOULD be the time budget for a single logical call to the Cluster Controller.
+ * A logical call to CC may in fact call the CC several times, if the first onces are down and/or not
+ * the master.
+ *
+ * @param clusterName The name of the content cluster this request is for.
+ * @param timeBudget The time budget for a single logical call to the the Cluster Controller.
+ */
+ public ClusterControllerClientTimeouts(String clusterName, TimeBudget timeBudget) {
+ this.clusterName = clusterName;
+ this.timeBudget = timeBudget;
+
+ // timeLeft = inProcessOverhead + numCalls * clientTimeout
+ maxClientTimeout = timeBudget.originalTimeout().get().minus(IN_PROCESS_OVERHEAD).dividedBy(NUM_CALLS);
+ }
+
+ @Override
+ public Duration getConnectTimeout() {
+ return CONNECT_TIMEOUT;
+ }
+
+ @Override
+ public Duration getReadTimeout() {
+ Duration timeLeft = timeBudget.timeLeft().get();
+ if (timeLeft.toMillis() <= 0) {
+ throw new UncheckedTimeoutException("Exceeded the timeout " + timeBudget.originalTimeout().get() +
+ " against content cluster '" + clusterName + "' by " + timeLeft.negated());
+ }
+
+ Duration clientTimeout = min(timeLeft, maxClientTimeout);
+ verifyPositive(timeLeft, maxClientTimeout);
+
+ // clientTimeout = overheadPerCall + connectTimeout + readTimeout
+ Duration readTimeout = clientTimeout.minus(IN_PROCESS_OVERHEAD_PER_CALL).minus(CONNECT_TIMEOUT);
+ verifyPositive(timeLeft, readTimeout);
+
+ return readTimeout;
+ }
+
+ public Duration getServerTimeout() {
+ // readTimeout = networkOverhead + serverTimeout
+ Duration serverTimeout = getReadTimeout().minus(NETWORK_OVERHEAD_PER_CALL);
+ if (serverTimeout.toMillis() < MIN_SERVER_TIMEOUT.toMillis()) {
+ throw new UncheckedTimeoutException("Server would be given too little time to complete: " +
+ serverTimeout + ". Original timeout was " + timeBudget.originalTimeout().get());
+ }
+
+ return serverTimeout;
+ }
+
+ private static Duration min(Duration a, Duration b) {
+ return a.compareTo(b) < 0 ? a : b;
+ }
+
+ private void verifyLargerThan(Duration timeLeft, Duration availableDuration) {
+ if (availableDuration.toMillis() <= 0) {
+ throw new UncheckedTimeoutException("Too little time left (" + timeLeft +
+ ") to call content cluster '" + clusterName +
+ "', original timeout was " + timeBudget.originalTimeout().get());
+ }
+ }
+
+ private void verifyPositive(Duration timeLeft, Duration duration) { verifyLargerThan(timeLeft, duration); }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java
index 5571eedeec6..7a0b96c2231 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java
@@ -8,9 +8,8 @@ import com.yahoo.vespa.jaxrs.client.JaxRsStrategy;
import com.yahoo.vespa.jaxrs.client.JaxRsStrategyFactory;
import com.yahoo.vespa.jaxrs.client.JerseyJaxRsClientFactory;
+import java.util.HashSet;
import java.util.List;
-import java.util.Set;
-import java.util.stream.Collectors;
/**
* @author bakksjo
@@ -21,14 +20,12 @@ public class RetryingClusterControllerClientFactory implements ClusterController
public static final int HARDCODED_CLUSTERCONTROLLER_PORT = 19050;
public static final String CLUSTERCONTROLLER_API_PATH = "/";
public static final String CLUSTERCONTROLLER_SCHEME = "http";
- private static final int CLUSTER_CONTROLLER_CONNECT_TIMEOUT_MS = 1000;
- private static final int CLUSTER_CONTROLLER_READ_TIMEOUT_MS = 1000;
private JaxRsClientFactory jaxRsClientFactory;
@Inject
public RetryingClusterControllerClientFactory() {
- this(new JerseyJaxRsClientFactory(CLUSTER_CONTROLLER_CONNECT_TIMEOUT_MS, CLUSTER_CONTROLLER_READ_TIMEOUT_MS));
+ this(new JerseyJaxRsClientFactory());
}
public RetryingClusterControllerClientFactory(JaxRsClientFactory jaxRsClientFactory) {
@@ -36,19 +33,20 @@ public class RetryingClusterControllerClientFactory implements ClusterController
}
@Override
- public ClusterControllerClient createClient(List<HostName> clusterControllers,
- String clusterName) {
- Set<HostName> clusterControllerSet = clusterControllers.stream().collect(Collectors.toSet());
- JaxRsStrategy<ClusterControllerJaxRsApi> jaxRsApi
- = new JaxRsStrategyFactory(clusterControllerSet, HARDCODED_CLUSTERCONTROLLER_PORT, jaxRsClientFactory, CLUSTERCONTROLLER_SCHEME)
- .apiWithRetries(ClusterControllerJaxRsApi.class, CLUSTERCONTROLLER_API_PATH)
- // Use max iteration 1. The JaxRsStrategyFactory will try host 1, 2, then 3:
- // - If host 1 responds, it will redirect to master if necessary. Otherwise
- // - If host 2 responds, it will redirect to master if necessary. Otherwise
- // - If host 3 responds, it may redirect to master if necessary (if they're up
- // after all), but more likely there's no quorum and this will fail too.
- .setMaxIterations(1);
+ public ClusterControllerClient createClient(List<HostName> clusterControllers, String clusterName) {
+ JaxRsStrategy<ClusterControllerJaxRsApi> jaxRsApi =
+ new JaxRsStrategyFactory(
+ new HashSet<>(clusterControllers),
+ HARDCODED_CLUSTERCONTROLLER_PORT,
+ jaxRsClientFactory,
+ CLUSTERCONTROLLER_SCHEME)
+ .apiWithRetries(ClusterControllerJaxRsApi.class, CLUSTERCONTROLLER_API_PATH)
+ // Use max iteration 1. The JaxRsStrategyFactory will try host 1, 2, then 3:
+ // - If host 1 responds, it will redirect to master if necessary. Otherwise
+ // - If host 2 responds, it will redirect to master if necessary. Otherwise
+ // - If host 3 responds, it may redirect to master if necessary (if they're up
+ // after all), but more likely there's no quorum and this will fail too.
+ .setMaxIterations(1);
return new ClusterControllerClientImpl(jaxRsApi, clusterName);
}
-
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java
deleted file mode 100644
index 7459f0a6b11..00000000000
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.orchestrator.controller;
-
-import com.yahoo.log.LogLevel;
-import com.yahoo.vespa.applicationmodel.HostName;
-import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory;
-import com.yahoo.vespa.jaxrs.client.JaxRsStrategy;
-import com.yahoo.vespa.jaxrs.client.NoRetryJaxRsStrategy;
-
-import java.util.List;
-import java.util.logging.Logger;
-
-/**
- * @author bakksjo
- */
-public class SingleInstanceClusterControllerClientFactory implements ClusterControllerClientFactory {
-
- public static final int CLUSTERCONTROLLER_HARDCODED_PORT = 19050;
- public static final String CLUSTERCONTROLLER_HARDCODED_SCHEME = "http";
- public static final String CLUSTERCONTROLLER_API_PATH = "/";
-
- private static final Logger log = Logger.getLogger(SingleInstanceClusterControllerClientFactory.class.getName());
-
- private JaxRsClientFactory jaxRsClientFactory;
-
- public SingleInstanceClusterControllerClientFactory(JaxRsClientFactory jaxRsClientFactory) {
- this.jaxRsClientFactory = jaxRsClientFactory;
- }
-
- @Override
- public ClusterControllerClient createClient(List<HostName> clusterControllers,
- String clusterName) {
- if (clusterControllers.isEmpty()) {
- throw new IllegalArgumentException("No cluster controller instances found");
- }
- HostName controllerHostName = clusterControllers.iterator().next();
- int port = CLUSTERCONTROLLER_HARDCODED_PORT; // TODO: Get this from service monitor.
-
- log.log(LogLevel.DEBUG, () ->
- "For cluster '" + clusterName + "' with controllers " + clusterControllers
- + ", creating api client for " + controllerHostName.s() + ":" + port);
-
- JaxRsStrategy<ClusterControllerJaxRsApi> strategy = new NoRetryJaxRsStrategy<>(
- controllerHostName,
- port,
- jaxRsClientFactory,
- ClusterControllerJaxRsApi.class,
- CLUSTERCONTROLLER_API_PATH,
- CLUSTERCONTROLLER_HARDCODED_SCHEME);
-
- return new ClusterControllerClientImpl(strategy, clusterName);
- }
-
-}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java
index ed506c82079..74b4b534acc 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java
@@ -43,7 +43,7 @@ public class NodeGroup {
}
public List<HostName> getHostNames() {
- return hostNames.stream().collect(Collectors.toList()).stream().sorted().collect(Collectors.toList());
+ return hostNames.stream().sorted().collect(Collectors.toList());
}
public String toCommaSeparatedString() {
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java
index c5ae553a98c..bd5eb6f3e29 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.orchestrator.status;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
import com.yahoo.vespa.applicationmodel.HostName;
+import java.time.Duration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@@ -42,15 +43,11 @@ public class InMemoryStatusService implements StatusService {
};
}
- @Override
- public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference) {
- return lockApplicationInstance_forCurrentThreadOnly(applicationInstanceReference, 10);
- }
@Override
public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(
ApplicationInstanceReference applicationInstanceReference,
- long timeoutSeconds) {
+ Duration timeout) {
Lock lock = instanceLockService.get(applicationInstanceReference);
return new InMemoryMutableStatusRegistry(lock, applicationInstanceReference);
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
index c47be096242..768e5290412 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.orchestrator.status;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import java.time.Duration;
import java.util.Set;
/**
@@ -24,7 +25,7 @@ public interface StatusService {
* possibly inconsistent snapshot values. It is not recommended that this method is used for anything other
* than monitoring, logging, debugging, etc. It should never be used for multi-step operations (e.g.
* read-then-write) where consistency is required. For those cases, use
- * {@link #lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference)}.
+ * {@link #lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference, Duration)}.
*/
ReadOnlyStatusRegistry forApplicationInstance(ApplicationInstanceReference applicationInstanceReference);
@@ -52,12 +53,9 @@ public interface StatusService {
* this case, subsequent mutating operations will fail, but previous mutating operations are NOT rolled back.
* This may leave the registry in an inconsistent state (as judged by the client code).
*/
- MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference);
-
- /** Lock application instance with timeout. */
MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(
ApplicationInstanceReference applicationInstanceReference,
- long timeoutSeconds);
+ Duration timeoutSeconds);
/**
* Returns all application instances that are allowed to be down. The intention is to use this
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
index deece6a4a65..7df29e038c1 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
@@ -56,21 +56,6 @@ public class ZookeeperStatusService implements StatusService {
};
}
- /**
- * 1) locks the status service for an application instance.
- * 2) fails all operations in this thread when the session is lost,
- * since session loss might cause the lock to be lost.
- * Since it only fails operations in this thread,
- * all operations depending on a lock, including the locking itself, must be done in this thread.
- * Note that since it is the thread that fails, all status operations in this thread will fail
- * even if they're not supposed to be guarded by this lock
- * (i.e. the request is for another applicationInstanceReference)
- */
- @Override
- public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference) {
- return lockApplicationInstance_forCurrentThreadOnly(applicationInstanceReference, 10);
- }
-
@Override
public Set<ApplicationInstanceReference> getAllSuspendedApplications() {
try {
@@ -93,13 +78,23 @@ public class ZookeeperStatusService implements StatusService {
}
}
+ /**
+ * 1) locks the status service for an application instance.
+ * 2) fails all operations in this thread when the session is lost,
+ * since session loss might cause the lock to be lost.
+ * Since it only fails operations in this thread,
+ * all operations depending on a lock, including the locking itself, must be done in this thread.
+ * Note that since it is the thread that fails, all status operations in this thread will fail
+ * even if they're not supposed to be guarded by this lock
+ * (i.e. the request is for another applicationInstanceReference)
+ */
@Override
public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(
ApplicationInstanceReference applicationInstanceReference,
- long timeoutSeconds) {
+ Duration timeout) {
String lockPath = applicationInstanceLock2Path(applicationInstanceReference);
Lock lock = new Lock(lockPath, curator);
- lock.acquire(Duration.ofSeconds(timeoutSeconds));
+ lock.acquire(timeout);
try {
return new ZkMutableStatusRegistry(lock, applicationInstanceReference);
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
index 76d9398c44e..80174d05a54 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
@@ -43,6 +43,8 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.mock;
@@ -245,6 +247,8 @@ public class OrchestratorImplTest {
// A spy is preferential because suspendAll() relies on delegating the hard work to suspend() and resume().
OrchestratorImpl orchestrator = spy(this.orchestrator);
+ OrchestratorContext context = mock(OrchestratorContext.class);
+
orchestrator.suspendAll(
new HostName("parentHostname"),
Arrays.asList(
@@ -257,9 +261,9 @@ public class OrchestratorImplTest {
// TEST6: tenant-id-3:application-instance-3:default
// TEST1: test-tenant-id:application:instance
InOrder order = inOrder(orchestrator);
- order.verify(orchestrator).suspendGroup(DummyInstanceLookupService.TEST3_NODE_GROUP);
- order.verify(orchestrator).suspendGroup(DummyInstanceLookupService.TEST6_NODE_GROUP);
- order.verify(orchestrator).suspendGroup(DummyInstanceLookupService.TEST1_NODE_GROUP);
+ order.verify(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST3_NODE_GROUP));
+ order.verify(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST6_NODE_GROUP));
+ order.verify(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST1_NODE_GROUP));
order.verifyNoMoreInteractions();
}
@@ -272,7 +276,7 @@ public class OrchestratorImplTest {
DummyInstanceLookupService.TEST6_HOST_NAME,
"some-constraint",
"error message");
- doThrow(supensionFailure).when(orchestrator).suspendGroup(DummyInstanceLookupService.TEST6_NODE_GROUP);
+ doThrow(supensionFailure).when(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST6_NODE_GROUP));
try {
orchestrator.suspendAll(
@@ -291,8 +295,8 @@ public class OrchestratorImplTest {
}
InOrder order = inOrder(orchestrator);
- order.verify(orchestrator).suspendGroup(DummyInstanceLookupService.TEST3_NODE_GROUP);
- order.verify(orchestrator).suspendGroup(DummyInstanceLookupService.TEST6_NODE_GROUP);
+ order.verify(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST3_NODE_GROUP));
+ order.verify(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST6_NODE_GROUP));
order.verifyNoMoreInteractions();
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
index 228174a9b3d..91909391fe7 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
@@ -6,7 +6,9 @@ import com.yahoo.vespa.jaxrs.client.LocalPassThroughJaxRsStrategy;
import com.yahoo.vespa.orchestrator.OrchestratorContext;
import org.junit.Test;
-import static org.mockito.Matchers.anyFloat;
+import java.time.Duration;
+
+import static org.mockito.Matchers.any;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
@@ -28,7 +30,9 @@ public class ClusterControllerClientTest {
final ClusterControllerNodeState wantedState = ClusterControllerNodeState.MAINTENANCE;
OrchestratorContext context = mock(OrchestratorContext.class);
- when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f);
+ ClusterControllerClientTimeouts timeouts = mock(ClusterControllerClientTimeouts.class);
+ when(context.getClusterControllerTimeouts(any())).thenReturn(timeouts);
+ when(timeouts.getServerTimeout()).thenReturn(Duration.ofSeconds(1));
clusterControllerClient.setNodeState(context, STORAGE_NODE_INDEX, wantedState);
final ClusterControllerStateRequest expectedNodeStateRequest = new ClusterControllerStateRequest(
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeoutsTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeoutsTest.java
new file mode 100644
index 00000000000..36dd4c4a83f
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeoutsTest.java
@@ -0,0 +1,155 @@
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.google.common.util.concurrent.UncheckedTimeoutException;
+import com.yahoo.test.ManualClock;
+import com.yahoo.time.TimeBudget;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.Optional;
+
+import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.CONNECT_TIMEOUT;
+import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.IN_PROCESS_OVERHEAD;
+import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.IN_PROCESS_OVERHEAD_PER_CALL;
+import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.MIN_SERVER_TIMEOUT;
+import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.NETWORK_OVERHEAD_PER_CALL;
+import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.NUM_CALLS;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+public class ClusterControllerClientTimeoutsTest {
+ // The minimum time left for any invocation of prepareForImmediateJaxRsCall().
+ private static final Duration MINIMUM_TIME_LEFT = IN_PROCESS_OVERHEAD_PER_CALL
+ .plus(CONNECT_TIMEOUT)
+ .plus(NETWORK_OVERHEAD_PER_CALL)
+ .plus(MIN_SERVER_TIMEOUT);
+ static {
+ assertEquals(Duration.ofMillis(160), MINIMUM_TIME_LEFT);
+ }
+
+ // The minimum time left (= original time) which is required to allow any requests to the CC.
+ private static final Duration MINIMUM_ORIGINAL_TIMEOUT = MINIMUM_TIME_LEFT
+ .multipliedBy(NUM_CALLS)
+ .plus(IN_PROCESS_OVERHEAD);
+ static {
+ assertEquals(Duration.ofMillis(420), MINIMUM_ORIGINAL_TIMEOUT);
+ }
+
+ private final ManualClock clock = new ManualClock();
+
+ private Duration originalTimeout;
+ private TimeBudget timeBudget;
+ private ClusterControllerClientTimeouts timeouts;
+
+ private void makeTimeouts(Duration originalTimeout) {
+ this.originalTimeout = originalTimeout;
+ this.timeBudget = TimeBudget.from(clock, clock.instant(), Optional.of(originalTimeout));
+ this.timeouts = new ClusterControllerClientTimeouts("clustername", timeBudget);
+ }
+
+ @Before
+ public void setUp() {
+ makeTimeouts(Duration.ofSeconds(3));
+ }
+
+ @Test
+ public void makes2RequestsWithMaxProcessingTime() {
+ assertEquals(Duration.ofMillis(50), timeouts.getConnectTimeout());
+ assertEquals(Duration.ofMillis(1350), timeouts.getReadTimeout());
+ assertEquals(Duration.ofMillis(1300), timeouts.getServerTimeout());
+
+ Duration maxProcessingTime = IN_PROCESS_OVERHEAD_PER_CALL
+ .plus(CONNECT_TIMEOUT)
+ .plus(timeouts.getReadTimeout());
+ assertEquals(1450, maxProcessingTime.toMillis());
+ clock.advance(maxProcessingTime);
+
+ assertEquals(Duration.ofMillis(50), timeouts.getConnectTimeout());
+ assertEquals(Duration.ofMillis(1350), timeouts.getReadTimeout());
+ assertEquals(Duration.ofMillis(1300), timeouts.getServerTimeout());
+
+ clock.advance(maxProcessingTime);
+
+ try {
+ timeouts.getServerTimeout();
+ fail();
+ } catch (UncheckedTimeoutException e) {
+ assertEquals(
+ "Too little time left (PT0.1S) to call content cluster 'clustername', original timeout was PT3S",
+ e.getMessage());
+ }
+ }
+
+ @Test
+ public void makesAtLeast3RequestsWithShortProcessingTime() {
+ assertEquals(Duration.ofMillis(50), timeouts.getConnectTimeout());
+ assertEquals(Duration.ofMillis(1350), timeouts.getReadTimeout());
+ assertEquals(Duration.ofMillis(1300), timeouts.getServerTimeout());
+
+ Duration shortPocessingTime = Duration.ofMillis(200);
+ clock.advance(shortPocessingTime);
+
+ assertEquals(Duration.ofMillis(50), timeouts.getConnectTimeout());
+ assertEquals(Duration.ofMillis(1350), timeouts.getReadTimeout());
+ assertEquals(Duration.ofMillis(1300), timeouts.getServerTimeout());
+
+ clock.advance(shortPocessingTime);
+
+ assertEquals(Duration.ofMillis(50), timeouts.getConnectTimeout());
+ assertEquals(Duration.ofMillis(1350), timeouts.getReadTimeout());
+ assertEquals(Duration.ofMillis(1300), timeouts.getServerTimeout());
+ }
+
+ @Test
+ public void alreadyTimedOut() {
+ clock.advance(Duration.ofSeconds(4));
+
+ try {
+ timeouts.getServerTimeout();
+ fail();
+ } catch (UncheckedTimeoutException e) {
+ assertEquals(
+ "Exceeded the timeout PT3S against content cluster 'clustername' by PT1S",
+ e.getMessage());
+ }
+ }
+
+ @Test
+ public void justTooLittleTime() {
+ clock.advance(originalTimeout.minus(MINIMUM_TIME_LEFT).plus(Duration.ofMillis(1)));
+ try {
+ timeouts.getServerTimeout();
+ fail();
+ } catch (UncheckedTimeoutException e) {
+ assertEquals(
+ "Server would be given too little time to complete: PT0.009S. Original timeout was PT3S",
+ e.getMessage());
+ }
+ }
+
+ @Test
+ public void justEnoughTime() {
+ clock.advance(originalTimeout.minus(MINIMUM_TIME_LEFT));
+ timeouts.getServerTimeout();
+ }
+
+ @Test
+ public void justTooLittleInitialTime() {
+ makeTimeouts(MINIMUM_ORIGINAL_TIMEOUT.minus(Duration.ofMillis(1)));
+ try {
+ timeouts.getServerTimeout();
+ fail();
+ } catch (UncheckedTimeoutException e) {
+ assertEquals(
+ "Server would be given too little time to complete: PT0.0095S. Original timeout was PT0.419S",
+ e.getMessage());
+ }
+ }
+
+ @Test
+ public void justEnoughInitialTime() {
+ makeTimeouts(MINIMUM_ORIGINAL_TIMEOUT);
+ timeouts.getServerTimeout();
+ }
+} \ No newline at end of file
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactoryTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactoryTest.java
new file mode 100644
index 00000000000..1f505991476
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactoryTest.java
@@ -0,0 +1,54 @@
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.yahoo.test.ManualClock;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory;
+import com.yahoo.vespa.orchestrator.OrchestratorContext;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+
+import java.io.IOException;
+import java.time.Clock;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class RetryingClusterControllerClientFactoryTest {
+ private final Clock clock = new ManualClock();
+
+ @Test
+ public void verifyJerseyCallForSetNodeState() throws IOException {
+ JaxRsClientFactory clientFactory = mock(JaxRsClientFactory.class);
+ ClusterControllerJaxRsApi api = mock(ClusterControllerJaxRsApi.class);
+ when(clientFactory.createClient(any())).thenReturn(api);
+ RetryingClusterControllerClientFactory factory = new RetryingClusterControllerClientFactory(clientFactory);
+ String clusterName = "clustername";
+ HostName host1 = new HostName("host1");
+ HostName host2 = new HostName("host2");
+ HostName host3 = new HostName("host3");
+ List<HostName> clusterControllers = Arrays.asList(host1, host2, host3);
+ ClusterControllerClient client = factory.createClient(clusterControllers, clusterName);
+ OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock);
+ int storageNode = 2;
+ ClusterControllerNodeState wantedState = ClusterControllerNodeState.MAINTENANCE;
+ client.setNodeState(context, storageNode, wantedState);
+
+ ArgumentCaptor<ClusterControllerStateRequest> requestCaptor = ArgumentCaptor.forClass(ClusterControllerStateRequest.class);
+
+ verify(api, times(1)).setNodeState(eq(clusterName), eq(storageNode), eq(4.8f), requestCaptor.capture());
+ ClusterControllerStateRequest request = requestCaptor.getValue();
+ assertEquals(ClusterControllerStateRequest.Condition.SAFE, request.condition);
+ Map<String, Object> expectedState = new HashMap<>();
+ expectedState.put("user", new ClusterControllerStateRequest.State(wantedState, "Orchestrator"));
+ assertEquals(expectedState, request.state);
+ }
+} \ No newline at end of file
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java
deleted file mode 100644
index 4dabae14add..00000000000
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.orchestrator.controller;
-
-import com.yahoo.vespa.applicationmodel.ConfigId;
-import com.yahoo.vespa.applicationmodel.HostName;
-import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory;
-import com.yahoo.vespa.orchestrator.OrchestratorContext;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.util.Arrays;
-import java.util.List;
-
-import static org.hamcrest.CoreMatchers.anyOf;
-import static org.hamcrest.CoreMatchers.equalTo;
-import static org.hamcrest.CoreMatchers.is;
-import static org.junit.Assert.fail;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyFloat;
-import static org.mockito.Matchers.anyInt;
-import static org.mockito.Matchers.anyString;
-import static org.mockito.Matchers.argThat;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
-
-public class SingleInstanceClusterControllerClientFactoryTest {
- private static final int PORT = SingleInstanceClusterControllerClientFactory.CLUSTERCONTROLLER_HARDCODED_PORT;
- private static final String PATH = SingleInstanceClusterControllerClientFactory.CLUSTERCONTROLLER_API_PATH;
-
- private static final HostName HOST_NAME_1 = new HostName("host1");
- private static final HostName HOST_NAME_2 = new HostName("host2");
- private static final HostName HOST_NAME_3 = new HostName("host3");
-
- OrchestratorContext context = mock(OrchestratorContext.class);
- private final ClusterControllerJaxRsApi mockApi = mock(ClusterControllerJaxRsApi.class);
- private final JaxRsClientFactory jaxRsClientFactory = mock(JaxRsClientFactory.class);
- private final ClusterControllerClientFactory clientFactory
- = new SingleInstanceClusterControllerClientFactory(jaxRsClientFactory);
-
- @Before
- public void setup() {
- when(
- jaxRsClientFactory.createClient(
- eq(ClusterControllerJaxRsApi.class),
- any(HostName.class),
- anyInt(),
- anyString(),
- anyString()))
- .thenReturn(mockApi);
- }
-
- @Test
- public void testCreateClientWithNoClusterControllerInstances() throws Exception {
- final List<HostName> clusterControllers = Arrays.asList();
-
- try {
- clientFactory.createClient(clusterControllers, "clusterName");
- fail();
- } catch (IllegalArgumentException e) {
- // As expected.
- }
- }
-
- @Test
- public void testCreateClientWithSingleClusterControllerInstance() throws Exception {
- final List<HostName> clusterControllers = Arrays.asList(HOST_NAME_1);
-
- when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f);
- clientFactory.createClient(clusterControllers, "clusterName")
- .setNodeState(context, 0, ClusterControllerNodeState.MAINTENANCE);
-
- verify(jaxRsClientFactory).createClient(
- ClusterControllerJaxRsApi.class,
- HOST_NAME_1,
- PORT,
- PATH,
- "http");
- }
-
- @Test
- public void testCreateClientWithoutClusterControllerInstances() throws Exception {
- final List<HostName> clusterControllers = Arrays.asList();
-
- try {
- clientFactory.createClient(clusterControllers, "clusterName");
- fail();
- } catch (IllegalArgumentException e) {
- // As expected.
- }
- }
-
- @Test
- public void testCreateClientWithThreeClusterControllerInstances() throws Exception {
- final List<HostName> clusterControllers = Arrays.asList(HOST_NAME_1, HOST_NAME_2, HOST_NAME_3);
-
- when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f);
- clientFactory.createClient(clusterControllers, "clusterName")
- .setNodeState(context, 0, ClusterControllerNodeState.MAINTENANCE);
-
- verify(jaxRsClientFactory).createClient(
- eq(ClusterControllerJaxRsApi.class),
- argThat(is(anyOf(
- equalTo(HOST_NAME_1),
- equalTo(HOST_NAME_2),
- equalTo(HOST_NAME_3)))),
- eq(PORT),
- eq(PATH),
- eq("http"));
- }
-
- private static ConfigId clusterControllerConfigId(final int index) {
- return new ConfigId("admin/cluster-controllers/" + index);
- }
-}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
index 45ba862c8f1..a9b8127e7fe 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
@@ -1,7 +1,6 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator.resources;
-import com.yahoo.jdisc.Timer;
import com.yahoo.vespa.applicationmodel.ApplicationInstance;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
@@ -32,6 +31,7 @@ import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
import com.yahoo.vespa.orchestrator.status.HostStatus;
import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
import com.yahoo.vespa.orchestrator.status.StatusService;
+import org.junit.Before;
import org.junit.Test;
import javax.ws.rs.BadRequestException;
@@ -41,6 +41,7 @@ import javax.ws.rs.core.UriBuilder;
import javax.ws.rs.core.UriInfo;
import java.net.URI;
import java.time.Clock;
+import java.time.Instant;
import java.util.Arrays;
import java.util.Collections;
import java.util.Optional;
@@ -74,7 +75,7 @@ public class HostResourceTest {
static {
when(EVERY_HOST_IS_UP_HOST_STATUS_SERVICE.forApplicationInstance(eq(APPLICATION_INSTANCE_REFERENCE)))
.thenReturn(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY);
- when(EVERY_HOST_IS_UP_HOST_STATUS_SERVICE.lockApplicationInstance_forCurrentThreadOnly(eq(APPLICATION_INSTANCE_REFERENCE)))
+ when(EVERY_HOST_IS_UP_HOST_STATUS_SERVICE.lockApplicationInstance_forCurrentThreadOnly(eq(APPLICATION_INSTANCE_REFERENCE), any()))
.thenReturn(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY);
when(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY.getHostStatus(any()))
.thenReturn(HostStatus.NO_REMARKS);
@@ -150,6 +151,11 @@ public class HostResourceTest {
private final UriInfo uriInfo = mock(UriInfo.class);
+ @Before
+ public void setUp() {
+ when(clock.instant()).thenReturn(Instant.now());
+ }
+
@Test
public void returns_200_on_success() {
HostResource hostResource =
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
index 2e914718e20..44847666670 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
@@ -17,6 +17,7 @@ import org.junit.After;
import org.junit.Before;
import org.junit.Test;
+import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -81,7 +82,8 @@ public class ZookeeperStatusServiceTest {
@Test
public void setting_host_state_is_idempotent() {
try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
- TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ TestIds.APPLICATION_INSTANCE_REFERENCE,
+ Duration.ofSeconds(10))) {
//shuffling to catch "clean database" failures for all cases.
for (HostStatus hostStatus: shuffledList(HostStatus.values())) {
@@ -105,11 +107,12 @@ public class ZookeeperStatusServiceTest {
final CompletableFuture<Void> lockedSuccessfullyFuture;
try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
- TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ TestIds.APPLICATION_INSTANCE_REFERENCE,
+ Duration.ofSeconds(10))) {
lockedSuccessfullyFuture = CompletableFuture.runAsync(() -> {
try (MutableStatusRegistry statusRegistry2 = zookeeperStatusService2
- .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE))
+ .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(10)))
{
}
});
@@ -131,13 +134,13 @@ public class ZookeeperStatusServiceTest {
ZookeeperStatusService zookeeperStatusService2 = new ZookeeperStatusService(curator);
try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
- TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(10))) {
//must run in separate thread, since having 2 locks in the same thread fails
CompletableFuture<Void> resultOfZkOperationAfterLockFailure = CompletableFuture.runAsync(() -> {
try {
zookeeperStatusService2.lockApplicationInstance_forCurrentThreadOnly(
- TestIds.APPLICATION_INSTANCE_REFERENCE,1);
+ TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(1));
fail("Both zookeeper host status services locked simultaneously for the same application instance");
} catch (RuntimeException e) {
}
@@ -211,7 +214,7 @@ public class ZookeeperStatusServiceTest {
// Suspend
try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
- TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(10))) {
statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
}
@@ -223,7 +226,7 @@ public class ZookeeperStatusServiceTest {
// Resume
try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
- TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(10))) {
statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.NO_REMARKS);
}
@@ -241,12 +244,12 @@ public class ZookeeperStatusServiceTest {
assertThat(suspendedApps.size(), is(0));
try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
- TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(10))) {
statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
}
try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
- TestIds.APPLICATION_INSTANCE_REFERENCE2)) {
+ TestIds.APPLICATION_INSTANCE_REFERENCE2, Duration.ofSeconds(10))) {
statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
}