From 837011aca508acbaeea8a3b233c19a9257d271b7 Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Thu, 1 Nov 2018 11:20:58 +0100 Subject: Revert "Revert "Revert "Enforce CC timeouts in Orchestrator 4""" --- .../com/yahoo/vespa/orchestrator/Orchestrator.java | 9 ++ .../vespa/orchestrator/OrchestratorContext.java | 70 +++------- .../yahoo/vespa/orchestrator/OrchestratorImpl.java | 37 +++-- .../controller/ClusterControllerClientImpl.java | 46 ++++--- .../ClusterControllerClientTimeouts.java | 125 ----------------- .../RetryingClusterControllerClientFactory.java | 35 ++--- ...ngleInstanceClusterControllerClientFactory.java | 54 ++++++++ .../yahoo/vespa/orchestrator/model/NodeGroup.java | 2 +- .../orchestrator/status/InMemoryStatusService.java | 7 +- .../vespa/orchestrator/status/StatusService.java | 8 +- .../status/ZookeeperStatusService.java | 29 ++-- .../vespa/orchestrator/OrchestratorImplTest.java | 16 +-- .../controller/ClusterControllerClientTest.java | 8 +- .../ClusterControllerClientTimeoutsTest.java | 151 --------------------- ...RetryingClusterControllerClientFactoryTest.java | 54 -------- ...InstanceClusterControllerClientFactoryTest.java | 116 ++++++++++++++++ .../orchestrator/resources/HostResourceTest.java | 10 +- .../status/ZookeeperStatusServiceTest.java | 21 ++- 18 files changed, 308 insertions(+), 490 deletions(-) delete mode 100644 orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java create mode 100644 orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java delete mode 100644 orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeoutsTest.java delete mode 100644 orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactoryTest.java create mode 100644 orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java (limited to 'orchestrator/src') diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java index df124f2f690..b2be4fe52ec 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java @@ -74,6 +74,15 @@ public interface Orchestrator { */ void acquirePermissionToRemove(HostName hostName) throws OrchestrationException; + /** + * Suspend normal operations for a group of nodes in the same application. + * + * @param nodeGroup The group of nodes in an application. + * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints. + * @throws HostNameNotFoundException if any hostnames in the node group is not recognized + */ + void suspendGroup(NodeGroup nodeGroup) throws HostStateChangeDeniedException, HostNameNotFoundException; + /** * Suspend several hosts. On failure, all hosts are resumed before exiting the method with an exception. */ diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java index 25c6c780130..6577b4b96cc 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java @@ -1,74 +1,40 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator; +import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.time.TimeBudget; -import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts; import java.time.Clock; import java.time.Duration; -import java.time.Instant; -import java.util.Optional; /** - * Context for an operation (or suboperation) of the Orchestrator that needs to pass through to the backend, - * e.g. timeout management and probing. + * Context for the Orchestrator, e.g. timeout management. * - * @author hakonhall + * @author hakon */ public class OrchestratorContext { - private static final Duration DEFAULT_TIMEOUT_FOR_SINGLE_OP = Duration.ofSeconds(10); - private static final Duration DEFAULT_TIMEOUT_FOR_BATCH_OP = Duration.ofSeconds(60); + private static final Duration DEFAULT_TIMEOUT = Duration.ofSeconds(10); - private final Clock clock; - private final TimeBudget timeBudget; - private boolean probe; + private TimeBudget timeBudget; - public static OrchestratorContext createContextForMultiAppOp(Clock clock) { - return new OrchestratorContext(clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_BATCH_OP), true); + public OrchestratorContext(Clock clock) { + this.timeBudget = TimeBudget.fromNow(clock, DEFAULT_TIMEOUT); } - public static OrchestratorContext createContextForSingleAppOp(Clock clock) { - return new OrchestratorContext(clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_SINGLE_OP), true); + /** Get the original timeout in seconds. */ + public long getOriginalTimeoutInSeconds() { + return timeBudget.originalTimeout().get().getSeconds(); } - private OrchestratorContext(Clock clock, TimeBudget timeBudget, boolean probe) { - this.clock = clock; - this.timeBudget = timeBudget; - this.probe = probe; - } - - public Duration getTimeLeft() { - return timeBudget.timeLeftOrThrow().get(); - } - - public ClusterControllerClientTimeouts getClusterControllerTimeouts(String clusterName) { - return new ClusterControllerClientTimeouts(clusterName, timeBudget.timeLeftAsTimeBudget()); - } - - - /** Mark this operation as a non-committal probe. */ - public OrchestratorContext markAsProbe() { - this.probe = true; - return this; - } - - /** Whether the operation is a no-op probe to test whether it would have succeeded, if it had been committal. */ - public boolean isProbe() { - return probe; - } - - /** Create an OrchestratorContext to use within an application lock. */ - public OrchestratorContext createSubcontextForApplication() { - Instant now = clock.instant(); - Instant deadline = timeBudget.deadline().get(); - Instant maxDeadline = now.plus(DEFAULT_TIMEOUT_FOR_SINGLE_OP); - if (maxDeadline.compareTo(deadline) < 0) { - deadline = maxDeadline; + /** + * Get timeout for a suboperation that should take up {@code shareOfRemaining} of the + * remaining time, or throw an {@link UncheckedTimeoutException} if timed out. + */ + public float getSuboperationTimeoutInSeconds(float shareOfRemaining) { + if (!(0f <= shareOfRemaining && shareOfRemaining <= 1.0f)) { + throw new IllegalArgumentException("Share of remaining time must be between 0 and 1: " + shareOfRemaining); } - return new OrchestratorContext( - clock, - TimeBudget.from(clock, now, Optional.of(Duration.between(now, deadline))), - probe); + return shareOfRemaining * timeBudget.timeLeftOrThrow().get().toMillis() / 1000.0f; } } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java index 3a2673d9dbc..ad8a35312e4 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java @@ -105,9 +105,7 @@ public class OrchestratorImpl implements Orchestrator { @Override public void setNodeStatus(HostName hostName, HostStatus status) throws OrchestrationException { ApplicationInstanceReference reference = getApplicationInstance(hostName).reference(); - OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock); - try (MutableStatusRegistry statusRegistry = statusService - .lockApplicationInstance_forCurrentThreadOnly(reference, context.getTimeLeft())) { + try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(reference)) { statusRegistry.setHostState(hostName, status); } } @@ -129,10 +127,10 @@ public class OrchestratorImpl implements Orchestrator { ApplicationInstance appInstance = getApplicationInstance(hostName); - OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock); + OrchestratorContext context = new OrchestratorContext(clock); try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly( appInstance.reference(), - context.getTimeLeft())) { + context.getOriginalTimeoutInSeconds())) { final HostStatus currentHostState = statusRegistry.getHostStatus(hostName); if (HostStatus.NO_REMARKS == currentHostState) { @@ -150,7 +148,7 @@ public class OrchestratorImpl implements Orchestrator { public void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException { ApplicationInstance appInstance = getApplicationInstance(hostName); NodeGroup nodeGroup = new NodeGroup(appInstance, hostName); - suspendGroup(OrchestratorContext.createContextForSingleAppOp(clock), nodeGroup); + suspendGroup(nodeGroup); } @Override @@ -158,10 +156,10 @@ public class OrchestratorImpl implements Orchestrator { ApplicationInstance appInstance = getApplicationInstance(hostName); NodeGroup nodeGroup = new NodeGroup(appInstance, hostName); - OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock); + OrchestratorContext context = new OrchestratorContext(clock); try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly( appInstance.reference(), - context.getTimeLeft())) { + context.getOriginalTimeoutInSeconds())) { ApplicationApi applicationApi = new ApplicationApiImpl( nodeGroup, statusRegistry, @@ -171,19 +169,16 @@ public class OrchestratorImpl implements Orchestrator { } } - /** - * Suspend normal operations for a group of nodes in the same application. - * - * @param nodeGroup The group of nodes in an application. - * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints. - */ - void suspendGroup(OrchestratorContext context, NodeGroup nodeGroup) throws HostStateChangeDeniedException { + // Public for testing purposes + @Override + public void suspendGroup(NodeGroup nodeGroup) throws HostStateChangeDeniedException, HostNameNotFoundException { ApplicationInstanceReference applicationReference = nodeGroup.getApplicationReference(); + OrchestratorContext context = new OrchestratorContext(clock); try (MutableStatusRegistry hostStatusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly( applicationReference, - context.getTimeLeft())) { + context.getOriginalTimeoutInSeconds())) { ApplicationInstanceStatus appStatus = statusService.forApplicationInstance(applicationReference).getApplicationInstanceStatus(); if (appStatus == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) { return; @@ -229,12 +224,14 @@ public class OrchestratorImpl implements Orchestrator { throw new BatchHostNameNotFoundException(parentHostname, hostNames, e); } - OrchestratorContext context = OrchestratorContext.createContextForMultiAppOp(clock); for (NodeGroup nodeGroup : nodeGroupsOrderedByApplication) { try { - suspendGroup(context.createSubcontextForApplication(), nodeGroup); + suspendGroup(nodeGroup); } catch (HostStateChangeDeniedException e) { throw new BatchHostStateChangeDeniedException(parentHostname, nodeGroup, e); + } catch (HostNameNotFoundException e) { + // Should never get here since since we would have received HostNameNotFoundException earlier. + throw new BatchHostNameNotFoundException(parentHostname, hostNames, e); } catch (RuntimeException e) { throw new BatchInternalErrorException(parentHostname, nodeGroup, e); } @@ -304,12 +301,12 @@ public class OrchestratorImpl implements Orchestrator { private void setApplicationStatus(ApplicationId appId, ApplicationInstanceStatus status) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException{ - OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock); + OrchestratorContext context = new OrchestratorContext(clock); ApplicationInstanceReference appRef = OrchestratorUtil.toApplicationInstanceReference(appId, instanceLookupService); try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly( appRef, - context.getTimeLeft())) { + context.getOriginalTimeoutInSeconds())) { // Short-circuit if already in wanted state if (status == statusRegistry.getApplicationInstanceStatus()) return; diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java index 46440682fa0..467b534f809 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java @@ -1,7 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator.controller; -import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.vespa.jaxrs.client.JaxRsStrategy; import com.yahoo.vespa.orchestrator.OrchestratorContext; @@ -16,6 +15,23 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ public static final String REQUEST_REASON = "Orchestrator"; + // On setNodeState calls against the CC ensemble. + // + // We'd like to set a timeout for the request to the first CC such that if the first + // CC is faulty, there's sufficient time to send the request to the second and third CC. + // The timeouts would be: + // timeout(1. request) = SHARE_REMAINING_TIME * T + // timeout(2. request) = SHARE_REMAINING_TIME * T * (1 - SHARE_REMAINING_TIME) + // timeout(3. request) = SHARE_REMAINING_TIME * T * (1 - SHARE_REMAINING_TIME)^2 + // + // Using a share of 50% gives approximately: + // timeout(1. request) = T * 0.5 + // timeout(2. request) = T * 0.25 + // timeout(3. request) = T * 0.125 + // + // which seems fine + public static final float SHARE_REMAINING_TIME = 0.5f; + private final JaxRsStrategy clusterControllerApi; private final String clusterName; @@ -36,22 +52,20 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ ClusterControllerNodeState wantedState) throws IOException { ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON); ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.SAFE); - ClusterControllerClientTimeouts timeouts = context.getClusterControllerTimeouts(clusterName); try { return clusterControllerApi.apply(api -> api.setNodeState( clusterName, storageNodeIndex, - timeouts.getServerTimeoutOrThrow().toMillis() / 1000.0f, - stateRequest), - timeouts); - } catch (IOException | UncheckedTimeoutException e) { + context.getSuboperationTimeoutInSeconds(SHARE_REMAINING_TIME), + stateRequest) + ); + } catch (IOException e) { String message = String.format( - "Giving up setting %s for storage node with index %d in cluster %s: %s", + "Giving up setting %s for storage node with index %d in cluster %s", stateRequest, storageNodeIndex, - clusterName, - e.getMessage()); + clusterName); throw new IOException(message, e); } @@ -65,18 +79,16 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ @Override public ClusterControllerStateResponse setApplicationState( OrchestratorContext context, - ClusterControllerNodeState wantedState) throws IOException { - ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON); - ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.FORCE); - ClusterControllerClientTimeouts timeouts = context.getClusterControllerTimeouts(clusterName); + final ClusterControllerNodeState wantedState) throws IOException { + final ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON); + final ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.FORCE); try { return clusterControllerApi.apply(api -> api.setClusterState( clusterName, - timeouts.getServerTimeoutOrThrow().toMillis() / 1000.0f, - stateRequest), - timeouts); - } catch (IOException | UncheckedTimeoutException e) { + context.getSuboperationTimeoutInSeconds(SHARE_REMAINING_TIME), + stateRequest)); + } catch (IOException e) { final String message = String.format( "Giving up setting %s for cluster %s", stateRequest, diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java deleted file mode 100644 index 44ecc7ac167..00000000000 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java +++ /dev/null @@ -1,125 +0,0 @@ -package com.yahoo.vespa.orchestrator.controller; - -import com.google.common.util.concurrent.UncheckedTimeoutException; -import com.yahoo.time.TimeBudget; -import com.yahoo.vespa.jaxrs.client.JaxRsTimeouts; - -import java.time.Duration; - -/** - * Calculates various timeouts associated with a REST call from the Orchestrator to the Cluster Controller. - * - *

Timeout handling of HTTP messaging is fundamentally flawed in various Java implementations. - * We would like to specify a max time for the whole operation (connect, send request, and receive response). - * Jersey JAX-RS implementation and the Apache HTTP client library provides a way to set the connect timeout C - * and read timeout R. So if the operation takes NR reads, and the writes takes TW time, - * the theoretical max time is: T = C + R * NR + TW. With both NR and TW unknown, there's no way to - * set a proper C and R.

- * - *

The various timeouts is set according to the following considerations:

- * - *
    - *
  1. Some time is reserved for the execution in this process, e.g. execution leading to the REST call, - * handling of the response, exception handling, etc, such that we can finish processing this request - * before the {@link #timeBudget} deadline. This is typically in the order of ms.
  2. - *
  3. A timeout will be passed to the Cluster Controller backend. We'll give a timeout such that if one - * CC times out, the next CC will be given exactly the same timeout. This may or may not be a good strategy: - * (A) There's typically a 3rd CC. But if the first 2 fails with timeout, the chance the last is OK - * is negligible. (B) If picking the CC is random, then giving the full timeout to the first - * should be sufficient since a later retry will hit the healthy CC. (C) Because we have been using - * DROP in networking rules, clients may time out (host out of app or whatever). This would suggest - * allowing more than 1 full request.
  4. - *
  5. The timeout passed to the CC backend should be such that if it honors that, the Orchestrator - * should not time out. This means some kernel and network overhead should be subtracted from the timeout - * passed to the CC.
  6. - *
  7. We're only able to set the connect and read/write timeouts(!) Since we're communicating within - * data center, assume connections are in the order of ms, while a single read may stall close up to the CC - * timeout.
  8. - *
- * - * @author hakonhall - */ -public class ClusterControllerClientTimeouts implements JaxRsTimeouts { - // In data center connect timeout - static final Duration CONNECT_TIMEOUT = Duration.ofMillis(100); - // Per call overhead - static final Duration IN_PROCESS_OVERHEAD_PER_CALL = Duration.ofMillis(100); - // In-process kernel overhead, network overhead, server kernel overhead, and server in-process overhead. - static final Duration DOWNSTREAM_OVERHEAD_PER_CALL = CONNECT_TIMEOUT.plus(Duration.ofMillis(100)); - // Minimum time reserved for post-RPC processing to finish BEFORE the deadline, including ZK write. - static final Duration IN_PROCESS_OVERHEAD = Duration.ofMillis(100); - // Number of JAX-RS RPC calls to account for within the time budget. - static final int NUM_CALLS = 2; - // Minimum server-side timeout - static final Duration MIN_SERVER_TIMEOUT = Duration.ofMillis(10); - - private final String clusterName; - private final TimeBudget timeBudget; - private final Duration maxClientTimeout; - - /** - * Creates a timeouts instance. - * - * The {@link #timeBudget} SHOULD be the time budget for a single logical call to the Cluster Controller. - * A logical call to CC may in fact call the CC several times, if the first onces are down and/or not - * the master. - * - * @param clusterName The name of the content cluster this request is for. - * @param timeBudget The time budget for a single logical call to the the Cluster Controller. - */ - public ClusterControllerClientTimeouts(String clusterName, TimeBudget timeBudget) { - this.clusterName = clusterName; - this.timeBudget = timeBudget; - - // timeLeft = inProcessOverhead + numCalls * clientTimeout - maxClientTimeout = timeBudget.originalTimeout().get().minus(IN_PROCESS_OVERHEAD).dividedBy(NUM_CALLS); - } - - @Override - public Duration getConnectTimeoutOrThrow() { - return CONNECT_TIMEOUT; - } - - @Override - public Duration getReadTimeoutOrThrow() { - Duration timeLeft = timeBudget.timeLeft().get(); - if (timeLeft.toMillis() <= 0) { - throw new UncheckedTimeoutException("Exceeded the timeout " + timeBudget.originalTimeout().get() + - " against content cluster '" + clusterName + "' by " + timeLeft.negated()); - } - - Duration clientTimeout = min(timeLeft, maxClientTimeout); - verifyPositive(timeLeft, clientTimeout); - - // clientTimeout = overheadPerCall + connectTimeout + readTimeout - Duration readTimeout = clientTimeout.minus(IN_PROCESS_OVERHEAD_PER_CALL).minus(CONNECT_TIMEOUT); - verifyPositive(timeLeft, readTimeout); - - return readTimeout; - } - - public Duration getServerTimeoutOrThrow() { - // readTimeout = networkOverhead + serverTimeout - Duration serverTimeout = getReadTimeoutOrThrow().minus(DOWNSTREAM_OVERHEAD_PER_CALL); - if (serverTimeout.toMillis() < MIN_SERVER_TIMEOUT.toMillis()) { - throw new UncheckedTimeoutException("Server would be given too little time to complete: " + - serverTimeout + ". Original timeout was " + timeBudget.originalTimeout().get()); - } - - return serverTimeout; - } - - private static Duration min(Duration a, Duration b) { - return a.compareTo(b) < 0 ? a : b; - } - - private void verifyLargerThan(Duration timeLeft, Duration availableDuration) { - if (availableDuration.toMillis() <= 0) { - throw new UncheckedTimeoutException("Too little time left (" + timeLeft + - ") to call content cluster '" + clusterName + - "', original timeout was " + timeBudget.originalTimeout().get()); - } - } - - private void verifyPositive(Duration timeLeft, Duration duration) { verifyLargerThan(timeLeft, duration); } -} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java index 33e74235862..5571eedeec6 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java @@ -8,8 +8,9 @@ import com.yahoo.vespa.jaxrs.client.JaxRsStrategy; import com.yahoo.vespa.jaxrs.client.JaxRsStrategyFactory; import com.yahoo.vespa.jaxrs.client.JerseyJaxRsClientFactory; -import java.util.HashSet; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; /** * @author bakksjo @@ -20,12 +21,14 @@ public class RetryingClusterControllerClientFactory implements ClusterController public static final int HARDCODED_CLUSTERCONTROLLER_PORT = 19050; public static final String CLUSTERCONTROLLER_API_PATH = "/"; public static final String CLUSTERCONTROLLER_SCHEME = "http"; + private static final int CLUSTER_CONTROLLER_CONNECT_TIMEOUT_MS = 1000; + private static final int CLUSTER_CONTROLLER_READ_TIMEOUT_MS = 1000; private JaxRsClientFactory jaxRsClientFactory; @Inject public RetryingClusterControllerClientFactory() { - this(new JerseyJaxRsClientFactory()); + this(new JerseyJaxRsClientFactory(CLUSTER_CONTROLLER_CONNECT_TIMEOUT_MS, CLUSTER_CONTROLLER_READ_TIMEOUT_MS)); } public RetryingClusterControllerClientFactory(JaxRsClientFactory jaxRsClientFactory) { @@ -33,21 +36,19 @@ public class RetryingClusterControllerClientFactory implements ClusterController } @Override - public ClusterControllerClient createClient(List clusterControllers, String clusterName) { - JaxRsStrategy jaxRsApi = - new JaxRsStrategyFactory( - new HashSet<>(clusterControllers), - HARDCODED_CLUSTERCONTROLLER_PORT, - jaxRsClientFactory, - CLUSTERCONTROLLER_SCHEME) - .apiWithRetries(ClusterControllerJaxRsApi.class, CLUSTERCONTROLLER_API_PATH) - // Use max iteration 1: The JaxRsStrategyFactory will try host 1, 2, then 3: - // - If host 1 responds, it will redirect to master if necessary. Otherwise - // - If host 2 responds, it will redirect to master if necessary. Otherwise - // - If host 3 responds, it may redirect to master if necessary (if they're up - // after all), but more likely there's no quorum and this will fail too. - // If there's only 1 CC, we'll try that one twice. - .setMaxIterations(clusterControllers.size() > 1 ? 1 : 2); + public ClusterControllerClient createClient(List clusterControllers, + String clusterName) { + Set clusterControllerSet = clusterControllers.stream().collect(Collectors.toSet()); + JaxRsStrategy jaxRsApi + = new JaxRsStrategyFactory(clusterControllerSet, HARDCODED_CLUSTERCONTROLLER_PORT, jaxRsClientFactory, CLUSTERCONTROLLER_SCHEME) + .apiWithRetries(ClusterControllerJaxRsApi.class, CLUSTERCONTROLLER_API_PATH) + // Use max iteration 1. The JaxRsStrategyFactory will try host 1, 2, then 3: + // - If host 1 responds, it will redirect to master if necessary. Otherwise + // - If host 2 responds, it will redirect to master if necessary. Otherwise + // - If host 3 responds, it may redirect to master if necessary (if they're up + // after all), but more likely there's no quorum and this will fail too. + .setMaxIterations(1); return new ClusterControllerClientImpl(jaxRsApi, clusterName); } + } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java new file mode 100644 index 00000000000..7459f0a6b11 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java @@ -0,0 +1,54 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.yahoo.log.LogLevel; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory; +import com.yahoo.vespa.jaxrs.client.JaxRsStrategy; +import com.yahoo.vespa.jaxrs.client.NoRetryJaxRsStrategy; + +import java.util.List; +import java.util.logging.Logger; + +/** + * @author bakksjo + */ +public class SingleInstanceClusterControllerClientFactory implements ClusterControllerClientFactory { + + public static final int CLUSTERCONTROLLER_HARDCODED_PORT = 19050; + public static final String CLUSTERCONTROLLER_HARDCODED_SCHEME = "http"; + public static final String CLUSTERCONTROLLER_API_PATH = "/"; + + private static final Logger log = Logger.getLogger(SingleInstanceClusterControllerClientFactory.class.getName()); + + private JaxRsClientFactory jaxRsClientFactory; + + public SingleInstanceClusterControllerClientFactory(JaxRsClientFactory jaxRsClientFactory) { + this.jaxRsClientFactory = jaxRsClientFactory; + } + + @Override + public ClusterControllerClient createClient(List clusterControllers, + String clusterName) { + if (clusterControllers.isEmpty()) { + throw new IllegalArgumentException("No cluster controller instances found"); + } + HostName controllerHostName = clusterControllers.iterator().next(); + int port = CLUSTERCONTROLLER_HARDCODED_PORT; // TODO: Get this from service monitor. + + log.log(LogLevel.DEBUG, () -> + "For cluster '" + clusterName + "' with controllers " + clusterControllers + + ", creating api client for " + controllerHostName.s() + ":" + port); + + JaxRsStrategy strategy = new NoRetryJaxRsStrategy<>( + controllerHostName, + port, + jaxRsClientFactory, + ClusterControllerJaxRsApi.class, + CLUSTERCONTROLLER_API_PATH, + CLUSTERCONTROLLER_HARDCODED_SCHEME); + + return new ClusterControllerClientImpl(strategy, clusterName); + } + +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java index 74b4b534acc..ed506c82079 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java @@ -43,7 +43,7 @@ public class NodeGroup { } public List getHostNames() { - return hostNames.stream().sorted().collect(Collectors.toList()); + return hostNames.stream().collect(Collectors.toList()).stream().sorted().collect(Collectors.toList()); } public String toCommaSeparatedString() { diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java index bd5eb6f3e29..c5ae553a98c 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java @@ -4,7 +4,6 @@ package com.yahoo.vespa.orchestrator.status; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; import com.yahoo.vespa.applicationmodel.HostName; -import java.time.Duration; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -43,11 +42,15 @@ public class InMemoryStatusService implements StatusService { }; } + @Override + public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference) { + return lockApplicationInstance_forCurrentThreadOnly(applicationInstanceReference, 10); + } @Override public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly( ApplicationInstanceReference applicationInstanceReference, - Duration timeout) { + long timeoutSeconds) { Lock lock = instanceLockService.get(applicationInstanceReference); return new InMemoryMutableStatusRegistry(lock, applicationInstanceReference); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java index 76adef72b2b..c47be096242 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.orchestrator.status; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; -import java.time.Duration; import java.util.Set; /** @@ -25,7 +24,7 @@ public interface StatusService { * possibly inconsistent snapshot values. It is not recommended that this method is used for anything other * than monitoring, logging, debugging, etc. It should never be used for multi-step operations (e.g. * read-then-write) where consistency is required. For those cases, use - * {@link #lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference, Duration)}. + * {@link #lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference)}. */ ReadOnlyStatusRegistry forApplicationInstance(ApplicationInstanceReference applicationInstanceReference); @@ -53,9 +52,12 @@ public interface StatusService { * this case, subsequent mutating operations will fail, but previous mutating operations are NOT rolled back. * This may leave the registry in an inconsistent state (as judged by the client code). */ + MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference); + + /** Lock application instance with timeout. */ MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly( ApplicationInstanceReference applicationInstanceReference, - Duration timeout); + long timeoutSeconds); /** * Returns all application instances that are allowed to be down. The intention is to use this diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java index 7df29e038c1..deece6a4a65 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java @@ -56,6 +56,21 @@ public class ZookeeperStatusService implements StatusService { }; } + /** + * 1) locks the status service for an application instance. + * 2) fails all operations in this thread when the session is lost, + * since session loss might cause the lock to be lost. + * Since it only fails operations in this thread, + * all operations depending on a lock, including the locking itself, must be done in this thread. + * Note that since it is the thread that fails, all status operations in this thread will fail + * even if they're not supposed to be guarded by this lock + * (i.e. the request is for another applicationInstanceReference) + */ + @Override + public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference) { + return lockApplicationInstance_forCurrentThreadOnly(applicationInstanceReference, 10); + } + @Override public Set getAllSuspendedApplications() { try { @@ -78,23 +93,13 @@ public class ZookeeperStatusService implements StatusService { } } - /** - * 1) locks the status service for an application instance. - * 2) fails all operations in this thread when the session is lost, - * since session loss might cause the lock to be lost. - * Since it only fails operations in this thread, - * all operations depending on a lock, including the locking itself, must be done in this thread. - * Note that since it is the thread that fails, all status operations in this thread will fail - * even if they're not supposed to be guarded by this lock - * (i.e. the request is for another applicationInstanceReference) - */ @Override public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly( ApplicationInstanceReference applicationInstanceReference, - Duration timeout) { + long timeoutSeconds) { String lockPath = applicationInstanceLock2Path(applicationInstanceReference); Lock lock = new Lock(lockPath, curator); - lock.acquire(timeout); + lock.acquire(Duration.ofSeconds(timeoutSeconds)); try { return new ZkMutableStatusRegistry(lock, applicationInstanceReference); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java index 80174d05a54..76d9398c44e 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java @@ -43,8 +43,6 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.eq; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; @@ -247,8 +245,6 @@ public class OrchestratorImplTest { // A spy is preferential because suspendAll() relies on delegating the hard work to suspend() and resume(). OrchestratorImpl orchestrator = spy(this.orchestrator); - OrchestratorContext context = mock(OrchestratorContext.class); - orchestrator.suspendAll( new HostName("parentHostname"), Arrays.asList( @@ -261,9 +257,9 @@ public class OrchestratorImplTest { // TEST6: tenant-id-3:application-instance-3:default // TEST1: test-tenant-id:application:instance InOrder order = inOrder(orchestrator); - order.verify(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST3_NODE_GROUP)); - order.verify(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST6_NODE_GROUP)); - order.verify(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST1_NODE_GROUP)); + order.verify(orchestrator).suspendGroup(DummyInstanceLookupService.TEST3_NODE_GROUP); + order.verify(orchestrator).suspendGroup(DummyInstanceLookupService.TEST6_NODE_GROUP); + order.verify(orchestrator).suspendGroup(DummyInstanceLookupService.TEST1_NODE_GROUP); order.verifyNoMoreInteractions(); } @@ -276,7 +272,7 @@ public class OrchestratorImplTest { DummyInstanceLookupService.TEST6_HOST_NAME, "some-constraint", "error message"); - doThrow(supensionFailure).when(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST6_NODE_GROUP)); + doThrow(supensionFailure).when(orchestrator).suspendGroup(DummyInstanceLookupService.TEST6_NODE_GROUP); try { orchestrator.suspendAll( @@ -295,8 +291,8 @@ public class OrchestratorImplTest { } InOrder order = inOrder(orchestrator); - order.verify(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST3_NODE_GROUP)); - order.verify(orchestrator).suspendGroup(any(), eq(DummyInstanceLookupService.TEST6_NODE_GROUP)); + order.verify(orchestrator).suspendGroup(DummyInstanceLookupService.TEST3_NODE_GROUP); + order.verify(orchestrator).suspendGroup(DummyInstanceLookupService.TEST6_NODE_GROUP); order.verifyNoMoreInteractions(); } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java index b12cd5aa7be..228174a9b3d 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java @@ -6,9 +6,7 @@ import com.yahoo.vespa.jaxrs.client.LocalPassThroughJaxRsStrategy; import com.yahoo.vespa.orchestrator.OrchestratorContext; import org.junit.Test; -import java.time.Duration; - -import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyFloat; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -30,9 +28,7 @@ public class ClusterControllerClientTest { final ClusterControllerNodeState wantedState = ClusterControllerNodeState.MAINTENANCE; OrchestratorContext context = mock(OrchestratorContext.class); - ClusterControllerClientTimeouts timeouts = mock(ClusterControllerClientTimeouts.class); - when(context.getClusterControllerTimeouts(any())).thenReturn(timeouts); - when(timeouts.getServerTimeoutOrThrow()).thenReturn(Duration.ofSeconds(1)); + when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f); clusterControllerClient.setNodeState(context, STORAGE_NODE_INDEX, wantedState); final ClusterControllerStateRequest expectedNodeStateRequest = new ClusterControllerStateRequest( diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeoutsTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeoutsTest.java deleted file mode 100644 index ee81a89d76c..00000000000 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeoutsTest.java +++ /dev/null @@ -1,151 +0,0 @@ -package com.yahoo.vespa.orchestrator.controller; - -import com.google.common.util.concurrent.UncheckedTimeoutException; -import com.yahoo.test.ManualClock; -import com.yahoo.time.TimeBudget; -import org.junit.Before; -import org.junit.Test; - -import java.time.Duration; -import java.util.Optional; - -import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.CONNECT_TIMEOUT; -import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.IN_PROCESS_OVERHEAD; -import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.IN_PROCESS_OVERHEAD_PER_CALL; -import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.MIN_SERVER_TIMEOUT; -import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.DOWNSTREAM_OVERHEAD_PER_CALL; -import static com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts.NUM_CALLS; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - -public class ClusterControllerClientTimeoutsTest { - // The minimum time left that allows for a single RPC to CC. - private static final Duration MINIMUM_TIME_LEFT = IN_PROCESS_OVERHEAD_PER_CALL - .plus(CONNECT_TIMEOUT) - .plus(DOWNSTREAM_OVERHEAD_PER_CALL) - .plus(MIN_SERVER_TIMEOUT); - static { - assertEquals(Duration.ofMillis(410), MINIMUM_TIME_LEFT); - } - - // The minimum time left (= original time) that allows for NUM_CALLS RPCs to CC. - private static final Duration MINIMUM_ORIGINAL_TIMEOUT = MINIMUM_TIME_LEFT - .multipliedBy(NUM_CALLS) - .plus(IN_PROCESS_OVERHEAD); - static { - assertEquals(Duration.ofMillis(920), MINIMUM_ORIGINAL_TIMEOUT); - } - - private final ManualClock clock = new ManualClock(); - - private Duration originalTimeout; - private TimeBudget timeBudget; - private ClusterControllerClientTimeouts timeouts; - - private void makeTimeouts(Duration originalTimeout) { - this.originalTimeout = originalTimeout; - this.timeBudget = TimeBudget.from(clock, clock.instant(), Optional.of(originalTimeout)); - this.timeouts = new ClusterControllerClientTimeouts("clustername", timeBudget); - } - - @Before - public void setUp() { - makeTimeouts(Duration.ofSeconds(3)); - } - - @Test - public void makes2RequestsWithMaxProcessingTime() { - assertStandardTimeouts(); - - Duration maxProcessingTime = IN_PROCESS_OVERHEAD_PER_CALL - .plus(CONNECT_TIMEOUT) - .plus(timeouts.getReadTimeoutOrThrow()); - assertEquals(1450, maxProcessingTime.toMillis()); - clock.advance(maxProcessingTime); - - assertStandardTimeouts(); - - clock.advance(maxProcessingTime); - - try { - timeouts.getServerTimeoutOrThrow(); - fail(); - } catch (UncheckedTimeoutException e) { - assertEquals( - "Too little time left (PT0.1S) to call content cluster 'clustername', original timeout was PT3S", - e.getMessage()); - } - } - - @Test - public void makesAtLeast3RequestsWithShortProcessingTime() { - assertStandardTimeouts(); - - Duration shortProcessingTime = Duration.ofMillis(200); - clock.advance(shortProcessingTime); - - assertStandardTimeouts(); - - clock.advance(shortProcessingTime); - - assertStandardTimeouts(); - } - - private void assertStandardTimeouts() { - assertEquals(Duration.ofMillis(100), timeouts.getConnectTimeoutOrThrow()); - assertEquals(Duration.ofMillis(1250), timeouts.getReadTimeoutOrThrow()); - assertEquals(Duration.ofMillis(1050), timeouts.getServerTimeoutOrThrow()); - } - - @Test - public void alreadyTimedOut() { - clock.advance(Duration.ofSeconds(4)); - - try { - timeouts.getServerTimeoutOrThrow(); - fail(); - } catch (UncheckedTimeoutException e) { - assertEquals( - "Exceeded the timeout PT3S against content cluster 'clustername' by PT1S", - e.getMessage()); - } - } - - @Test - public void justTooLittleTime() { - clock.advance(originalTimeout.minus(MINIMUM_TIME_LEFT).plus(Duration.ofMillis(1))); - try { - timeouts.getServerTimeoutOrThrow(); - fail(); - } catch (UncheckedTimeoutException e) { - assertEquals( - "Server would be given too little time to complete: PT0.009S. Original timeout was PT3S", - e.getMessage()); - } - } - - @Test - public void justEnoughTime() { - clock.advance(originalTimeout.minus(MINIMUM_TIME_LEFT)); - timeouts.getServerTimeoutOrThrow(); - } - - @Test - public void justTooLittleInitialTime() { - makeTimeouts(MINIMUM_ORIGINAL_TIMEOUT.minus(Duration.ofMillis(1))); - try { - timeouts.getServerTimeoutOrThrow(); - fail(); - } catch (UncheckedTimeoutException e) { - assertEquals( - "Server would be given too little time to complete: PT0.0095S. Original timeout was PT0.919S", - e.getMessage()); - } - } - - @Test - public void justEnoughInitialTime() { - makeTimeouts(MINIMUM_ORIGINAL_TIMEOUT); - timeouts.getServerTimeoutOrThrow(); - } -} \ No newline at end of file diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactoryTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactoryTest.java deleted file mode 100644 index 3b0b1a43085..00000000000 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactoryTest.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.yahoo.vespa.orchestrator.controller; - -import com.yahoo.test.ManualClock; -import com.yahoo.vespa.applicationmodel.HostName; -import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory; -import com.yahoo.vespa.orchestrator.OrchestratorContext; -import org.junit.Test; -import org.mockito.ArgumentCaptor; - -import java.io.IOException; -import java.time.Clock; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.junit.Assert.assertEquals; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.eq; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -public class RetryingClusterControllerClientFactoryTest { - private final Clock clock = new ManualClock(); - - @Test - public void verifyJerseyCallForSetNodeState() throws IOException { - JaxRsClientFactory clientFactory = mock(JaxRsClientFactory.class); - ClusterControllerJaxRsApi api = mock(ClusterControllerJaxRsApi.class); - when(clientFactory.createClient(any())).thenReturn(api); - RetryingClusterControllerClientFactory factory = new RetryingClusterControllerClientFactory(clientFactory); - String clusterName = "clustername"; - HostName host1 = new HostName("host1"); - HostName host2 = new HostName("host2"); - HostName host3 = new HostName("host3"); - List clusterControllers = Arrays.asList(host1, host2, host3); - ClusterControllerClient client = factory.createClient(clusterControllers, clusterName); - OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock); - int storageNode = 2; - ClusterControllerNodeState wantedState = ClusterControllerNodeState.MAINTENANCE; - client.setNodeState(context, storageNode, wantedState); - - ArgumentCaptor requestCaptor = ArgumentCaptor.forClass(ClusterControllerStateRequest.class); - - verify(api, times(1)).setNodeState(eq(clusterName), eq(storageNode), eq(4.55f), requestCaptor.capture()); - ClusterControllerStateRequest request = requestCaptor.getValue(); - assertEquals(ClusterControllerStateRequest.Condition.SAFE, request.condition); - Map expectedState = new HashMap<>(); - expectedState.put("user", new ClusterControllerStateRequest.State(wantedState, "Orchestrator")); - assertEquals(expectedState, request.state); - } -} \ No newline at end of file diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java new file mode 100644 index 00000000000..4dabae14add --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java @@ -0,0 +1,116 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory; +import com.yahoo.vespa.orchestrator.OrchestratorContext; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.hamcrest.CoreMatchers.anyOf; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyFloat; +import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.anyString; +import static org.mockito.Matchers.argThat; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class SingleInstanceClusterControllerClientFactoryTest { + private static final int PORT = SingleInstanceClusterControllerClientFactory.CLUSTERCONTROLLER_HARDCODED_PORT; + private static final String PATH = SingleInstanceClusterControllerClientFactory.CLUSTERCONTROLLER_API_PATH; + + private static final HostName HOST_NAME_1 = new HostName("host1"); + private static final HostName HOST_NAME_2 = new HostName("host2"); + private static final HostName HOST_NAME_3 = new HostName("host3"); + + OrchestratorContext context = mock(OrchestratorContext.class); + private final ClusterControllerJaxRsApi mockApi = mock(ClusterControllerJaxRsApi.class); + private final JaxRsClientFactory jaxRsClientFactory = mock(JaxRsClientFactory.class); + private final ClusterControllerClientFactory clientFactory + = new SingleInstanceClusterControllerClientFactory(jaxRsClientFactory); + + @Before + public void setup() { + when( + jaxRsClientFactory.createClient( + eq(ClusterControllerJaxRsApi.class), + any(HostName.class), + anyInt(), + anyString(), + anyString())) + .thenReturn(mockApi); + } + + @Test + public void testCreateClientWithNoClusterControllerInstances() throws Exception { + final List clusterControllers = Arrays.asList(); + + try { + clientFactory.createClient(clusterControllers, "clusterName"); + fail(); + } catch (IllegalArgumentException e) { + // As expected. + } + } + + @Test + public void testCreateClientWithSingleClusterControllerInstance() throws Exception { + final List clusterControllers = Arrays.asList(HOST_NAME_1); + + when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f); + clientFactory.createClient(clusterControllers, "clusterName") + .setNodeState(context, 0, ClusterControllerNodeState.MAINTENANCE); + + verify(jaxRsClientFactory).createClient( + ClusterControllerJaxRsApi.class, + HOST_NAME_1, + PORT, + PATH, + "http"); + } + + @Test + public void testCreateClientWithoutClusterControllerInstances() throws Exception { + final List clusterControllers = Arrays.asList(); + + try { + clientFactory.createClient(clusterControllers, "clusterName"); + fail(); + } catch (IllegalArgumentException e) { + // As expected. + } + } + + @Test + public void testCreateClientWithThreeClusterControllerInstances() throws Exception { + final List clusterControllers = Arrays.asList(HOST_NAME_1, HOST_NAME_2, HOST_NAME_3); + + when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f); + clientFactory.createClient(clusterControllers, "clusterName") + .setNodeState(context, 0, ClusterControllerNodeState.MAINTENANCE); + + verify(jaxRsClientFactory).createClient( + eq(ClusterControllerJaxRsApi.class), + argThat(is(anyOf( + equalTo(HOST_NAME_1), + equalTo(HOST_NAME_2), + equalTo(HOST_NAME_3)))), + eq(PORT), + eq(PATH), + eq("http")); + } + + private static ConfigId clusterControllerConfigId(final int index) { + return new ConfigId("admin/cluster-controllers/" + index); + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java index a9b8127e7fe..45ba862c8f1 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator.resources; +import com.yahoo.jdisc.Timer; import com.yahoo.vespa.applicationmodel.ApplicationInstance; import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; @@ -31,7 +32,6 @@ import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; import com.yahoo.vespa.orchestrator.status.HostStatus; import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; import com.yahoo.vespa.orchestrator.status.StatusService; -import org.junit.Before; import org.junit.Test; import javax.ws.rs.BadRequestException; @@ -41,7 +41,6 @@ import javax.ws.rs.core.UriBuilder; import javax.ws.rs.core.UriInfo; import java.net.URI; import java.time.Clock; -import java.time.Instant; import java.util.Arrays; import java.util.Collections; import java.util.Optional; @@ -75,7 +74,7 @@ public class HostResourceTest { static { when(EVERY_HOST_IS_UP_HOST_STATUS_SERVICE.forApplicationInstance(eq(APPLICATION_INSTANCE_REFERENCE))) .thenReturn(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY); - when(EVERY_HOST_IS_UP_HOST_STATUS_SERVICE.lockApplicationInstance_forCurrentThreadOnly(eq(APPLICATION_INSTANCE_REFERENCE), any())) + when(EVERY_HOST_IS_UP_HOST_STATUS_SERVICE.lockApplicationInstance_forCurrentThreadOnly(eq(APPLICATION_INSTANCE_REFERENCE))) .thenReturn(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY); when(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY.getHostStatus(any())) .thenReturn(HostStatus.NO_REMARKS); @@ -151,11 +150,6 @@ public class HostResourceTest { private final UriInfo uriInfo = mock(UriInfo.class); - @Before - public void setUp() { - when(clock.instant()).thenReturn(Instant.now()); - } - @Test public void returns_200_on_success() { HostResource hostResource = diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java index 44847666670..2e914718e20 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java @@ -17,7 +17,6 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; -import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -82,8 +81,7 @@ public class ZookeeperStatusServiceTest { @Test public void setting_host_state_is_idempotent() { try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( - TestIds.APPLICATION_INSTANCE_REFERENCE, - Duration.ofSeconds(10))) { + TestIds.APPLICATION_INSTANCE_REFERENCE)) { //shuffling to catch "clean database" failures for all cases. for (HostStatus hostStatus: shuffledList(HostStatus.values())) { @@ -107,12 +105,11 @@ public class ZookeeperStatusServiceTest { final CompletableFuture lockedSuccessfullyFuture; try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( - TestIds.APPLICATION_INSTANCE_REFERENCE, - Duration.ofSeconds(10))) { + TestIds.APPLICATION_INSTANCE_REFERENCE)) { lockedSuccessfullyFuture = CompletableFuture.runAsync(() -> { try (MutableStatusRegistry statusRegistry2 = zookeeperStatusService2 - .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(10))) + .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE)) { } }); @@ -134,13 +131,13 @@ public class ZookeeperStatusServiceTest { ZookeeperStatusService zookeeperStatusService2 = new ZookeeperStatusService(curator); try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( - TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(10))) { + TestIds.APPLICATION_INSTANCE_REFERENCE)) { //must run in separate thread, since having 2 locks in the same thread fails CompletableFuture resultOfZkOperationAfterLockFailure = CompletableFuture.runAsync(() -> { try { zookeeperStatusService2.lockApplicationInstance_forCurrentThreadOnly( - TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(1)); + TestIds.APPLICATION_INSTANCE_REFERENCE,1); fail("Both zookeeper host status services locked simultaneously for the same application instance"); } catch (RuntimeException e) { } @@ -214,7 +211,7 @@ public class ZookeeperStatusServiceTest { // Suspend try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( - TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(10))) { + TestIds.APPLICATION_INSTANCE_REFERENCE)) { statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); } @@ -226,7 +223,7 @@ public class ZookeeperStatusServiceTest { // Resume try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( - TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(10))) { + TestIds.APPLICATION_INSTANCE_REFERENCE)) { statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.NO_REMARKS); } @@ -244,12 +241,12 @@ public class ZookeeperStatusServiceTest { assertThat(suspendedApps.size(), is(0)); try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( - TestIds.APPLICATION_INSTANCE_REFERENCE, Duration.ofSeconds(10))) { + TestIds.APPLICATION_INSTANCE_REFERENCE)) { statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); } try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( - TestIds.APPLICATION_INSTANCE_REFERENCE2, Duration.ofSeconds(10))) { + TestIds.APPLICATION_INSTANCE_REFERENCE2)) { statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); } -- cgit v1.2.3