diff options
author | HÃ¥kon Hallingstad <hakon@oath.com> | 2018-10-29 10:50:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-29 10:50:07 +0100 |
commit | d7e75c63c3c25474e825488c0daaa328e27472d8 (patch) | |
tree | 30f813e6cd54e73180e90affb29dc4a279104211 /orchestrator/src/main | |
parent | 06056362f9190cf74634e4cce5b4ac0da5a6855c (diff) |
Revert "Revert "Enforce CC timeouts in Orchestrator 2""
Diffstat (limited to 'orchestrator/src/main')
11 files changed, 245 insertions, 171 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java index b2be4fe52ec..df124f2f690 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java @@ -75,15 +75,6 @@ public interface Orchestrator { void acquirePermissionToRemove(HostName hostName) throws OrchestrationException; /** - * Suspend normal operations for a group of nodes in the same application. - * - * @param nodeGroup The group of nodes in an application. - * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints. - * @throws HostNameNotFoundException if any hostnames in the node group is not recognized - */ - void suspendGroup(NodeGroup nodeGroup) throws HostStateChangeDeniedException, HostNameNotFoundException; - - /** * Suspend several hosts. On failure, all hosts are resumed before exiting the method with an exception. */ void suspendAll(HostName parentHostname, List<HostName> hostNames) diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java index 6577b4b96cc..25c6c780130 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java @@ -1,40 +1,74 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator; -import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.time.TimeBudget; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts; import java.time.Clock; import java.time.Duration; +import java.time.Instant; +import java.util.Optional; /** - * Context for the Orchestrator, e.g. timeout management. + * Context for an operation (or suboperation) of the Orchestrator that needs to pass through to the backend, + * e.g. timeout management and probing. * - * @author hakon + * @author hakonhall */ public class OrchestratorContext { - private static final Duration DEFAULT_TIMEOUT = Duration.ofSeconds(10); + private static final Duration DEFAULT_TIMEOUT_FOR_SINGLE_OP = Duration.ofSeconds(10); + private static final Duration DEFAULT_TIMEOUT_FOR_BATCH_OP = Duration.ofSeconds(60); - private TimeBudget timeBudget; + private final Clock clock; + private final TimeBudget timeBudget; + private boolean probe; - public OrchestratorContext(Clock clock) { - this.timeBudget = TimeBudget.fromNow(clock, DEFAULT_TIMEOUT); + public static OrchestratorContext createContextForMultiAppOp(Clock clock) { + return new OrchestratorContext(clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_BATCH_OP), true); } - /** Get the original timeout in seconds. */ - public long getOriginalTimeoutInSeconds() { - return timeBudget.originalTimeout().get().getSeconds(); + public static OrchestratorContext createContextForSingleAppOp(Clock clock) { + return new OrchestratorContext(clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_SINGLE_OP), true); } - /** - * Get timeout for a suboperation that should take up {@code shareOfRemaining} of the - * remaining time, or throw an {@link UncheckedTimeoutException} if timed out. - */ - public float getSuboperationTimeoutInSeconds(float shareOfRemaining) { - if (!(0f <= shareOfRemaining && shareOfRemaining <= 1.0f)) { - throw new IllegalArgumentException("Share of remaining time must be between 0 and 1: " + shareOfRemaining); + private OrchestratorContext(Clock clock, TimeBudget timeBudget, boolean probe) { + this.clock = clock; + this.timeBudget = timeBudget; + this.probe = probe; + } + + public Duration getTimeLeft() { + return timeBudget.timeLeftOrThrow().get(); + } + + public ClusterControllerClientTimeouts getClusterControllerTimeouts(String clusterName) { + return new ClusterControllerClientTimeouts(clusterName, timeBudget.timeLeftAsTimeBudget()); + } + + + /** Mark this operation as a non-committal probe. */ + public OrchestratorContext markAsProbe() { + this.probe = true; + return this; + } + + /** Whether the operation is a no-op probe to test whether it would have succeeded, if it had been committal. */ + public boolean isProbe() { + return probe; + } + + /** Create an OrchestratorContext to use within an application lock. */ + public OrchestratorContext createSubcontextForApplication() { + Instant now = clock.instant(); + Instant deadline = timeBudget.deadline().get(); + Instant maxDeadline = now.plus(DEFAULT_TIMEOUT_FOR_SINGLE_OP); + if (maxDeadline.compareTo(deadline) < 0) { + deadline = maxDeadline; } - return shareOfRemaining * timeBudget.timeLeftOrThrow().get().toMillis() / 1000.0f; + return new OrchestratorContext( + clock, + TimeBudget.from(clock, now, Optional.of(Duration.between(now, deadline))), + probe); } } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java index ad8a35312e4..3a2673d9dbc 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java @@ -105,7 +105,9 @@ public class OrchestratorImpl implements Orchestrator { @Override public void setNodeStatus(HostName hostName, HostStatus status) throws OrchestrationException { ApplicationInstanceReference reference = getApplicationInstance(hostName).reference(); - try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(reference)) { + OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock); + try (MutableStatusRegistry statusRegistry = statusService + .lockApplicationInstance_forCurrentThreadOnly(reference, context.getTimeLeft())) { statusRegistry.setHostState(hostName, status); } } @@ -127,10 +129,10 @@ public class OrchestratorImpl implements Orchestrator { ApplicationInstance appInstance = getApplicationInstance(hostName); - OrchestratorContext context = new OrchestratorContext(clock); + OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock); try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly( appInstance.reference(), - context.getOriginalTimeoutInSeconds())) { + context.getTimeLeft())) { final HostStatus currentHostState = statusRegistry.getHostStatus(hostName); if (HostStatus.NO_REMARKS == currentHostState) { @@ -148,7 +150,7 @@ public class OrchestratorImpl implements Orchestrator { public void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException { ApplicationInstance appInstance = getApplicationInstance(hostName); NodeGroup nodeGroup = new NodeGroup(appInstance, hostName); - suspendGroup(nodeGroup); + suspendGroup(OrchestratorContext.createContextForSingleAppOp(clock), nodeGroup); } @Override @@ -156,10 +158,10 @@ public class OrchestratorImpl implements Orchestrator { ApplicationInstance appInstance = getApplicationInstance(hostName); NodeGroup nodeGroup = new NodeGroup(appInstance, hostName); - OrchestratorContext context = new OrchestratorContext(clock); + OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock); try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly( appInstance.reference(), - context.getOriginalTimeoutInSeconds())) { + context.getTimeLeft())) { ApplicationApi applicationApi = new ApplicationApiImpl( nodeGroup, statusRegistry, @@ -169,16 +171,19 @@ public class OrchestratorImpl implements Orchestrator { } } - // Public for testing purposes - @Override - public void suspendGroup(NodeGroup nodeGroup) throws HostStateChangeDeniedException, HostNameNotFoundException { + /** + * Suspend normal operations for a group of nodes in the same application. + * + * @param nodeGroup The group of nodes in an application. + * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints. + */ + void suspendGroup(OrchestratorContext context, NodeGroup nodeGroup) throws HostStateChangeDeniedException { ApplicationInstanceReference applicationReference = nodeGroup.getApplicationReference(); - OrchestratorContext context = new OrchestratorContext(clock); try (MutableStatusRegistry hostStatusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly( applicationReference, - context.getOriginalTimeoutInSeconds())) { + context.getTimeLeft())) { ApplicationInstanceStatus appStatus = statusService.forApplicationInstance(applicationReference).getApplicationInstanceStatus(); if (appStatus == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) { return; @@ -224,14 +229,12 @@ public class OrchestratorImpl implements Orchestrator { throw new BatchHostNameNotFoundException(parentHostname, hostNames, e); } + OrchestratorContext context = OrchestratorContext.createContextForMultiAppOp(clock); for (NodeGroup nodeGroup : nodeGroupsOrderedByApplication) { try { - suspendGroup(nodeGroup); + suspendGroup(context.createSubcontextForApplication(), nodeGroup); } catch (HostStateChangeDeniedException e) { throw new BatchHostStateChangeDeniedException(parentHostname, nodeGroup, e); - } catch (HostNameNotFoundException e) { - // Should never get here since since we would have received HostNameNotFoundException earlier. - throw new BatchHostNameNotFoundException(parentHostname, hostNames, e); } catch (RuntimeException e) { throw new BatchInternalErrorException(parentHostname, nodeGroup, e); } @@ -301,12 +304,12 @@ public class OrchestratorImpl implements Orchestrator { private void setApplicationStatus(ApplicationId appId, ApplicationInstanceStatus status) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException{ - OrchestratorContext context = new OrchestratorContext(clock); + OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock); ApplicationInstanceReference appRef = OrchestratorUtil.toApplicationInstanceReference(appId, instanceLookupService); try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly( appRef, - context.getOriginalTimeoutInSeconds())) { + context.getTimeLeft())) { // Short-circuit if already in wanted state if (status == statusRegistry.getApplicationInstanceStatus()) return; diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java index 467b534f809..d7e63ccfc76 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator.controller; +import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.vespa.jaxrs.client.JaxRsStrategy; import com.yahoo.vespa.orchestrator.OrchestratorContext; @@ -15,23 +16,6 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ public static final String REQUEST_REASON = "Orchestrator"; - // On setNodeState calls against the CC ensemble. - // - // We'd like to set a timeout for the request to the first CC such that if the first - // CC is faulty, there's sufficient time to send the request to the second and third CC. - // The timeouts would be: - // timeout(1. request) = SHARE_REMAINING_TIME * T - // timeout(2. request) = SHARE_REMAINING_TIME * T * (1 - SHARE_REMAINING_TIME) - // timeout(3. request) = SHARE_REMAINING_TIME * T * (1 - SHARE_REMAINING_TIME)^2 - // - // Using a share of 50% gives approximately: - // timeout(1. request) = T * 0.5 - // timeout(2. request) = T * 0.25 - // timeout(3. request) = T * 0.125 - // - // which seems fine - public static final float SHARE_REMAINING_TIME = 0.5f; - private final JaxRsStrategy<ClusterControllerJaxRsApi> clusterControllerApi; private final String clusterName; @@ -52,15 +36,16 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ ClusterControllerNodeState wantedState) throws IOException { ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON); ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.SAFE); + ClusterControllerClientTimeouts timeouts = context.getClusterControllerTimeouts(clusterName); try { return clusterControllerApi.apply(api -> api.setNodeState( clusterName, storageNodeIndex, - context.getSuboperationTimeoutInSeconds(SHARE_REMAINING_TIME), - stateRequest) - ); - } catch (IOException e) { + timeouts.getServerTimeoutOrThrow().toMillis() / 1000.0f, + stateRequest), + timeouts); + } catch (IOException | UncheckedTimeoutException e) { String message = String.format( "Giving up setting %s for storage node with index %d in cluster %s", stateRequest, @@ -79,16 +64,18 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ @Override public ClusterControllerStateResponse setApplicationState( OrchestratorContext context, - final ClusterControllerNodeState wantedState) throws IOException { - final ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON); - final ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.FORCE); + ClusterControllerNodeState wantedState) throws IOException { + ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON); + ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.FORCE); + ClusterControllerClientTimeouts timeouts = context.getClusterControllerTimeouts(clusterName); try { return clusterControllerApi.apply(api -> api.setClusterState( clusterName, - context.getSuboperationTimeoutInSeconds(SHARE_REMAINING_TIME), - stateRequest)); - } catch (IOException e) { + timeouts.getServerTimeoutOrThrow().toMillis() / 1000.0f, + stateRequest), + timeouts); + } catch (IOException | UncheckedTimeoutException e) { final String message = String.format( "Giving up setting %s for cluster %s", stateRequest, diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java new file mode 100644 index 00000000000..5b0685e88a0 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTimeouts.java @@ -0,0 +1,125 @@ +package com.yahoo.vespa.orchestrator.controller; + +import com.google.common.util.concurrent.UncheckedTimeoutException; +import com.yahoo.time.TimeBudget; +import com.yahoo.vespa.jaxrs.client.JaxRsTimeouts; + +import java.time.Duration; + +/** + * Calculates various timeouts associated with a REST call from the Orchestrator to the Cluster Controller. + * + * <p>Timeout handling of HTTP messaging is fundamentally flawed in various Java implementations. + * We would like to specify a max time for the whole operation (connect, send request, and receive response). + * Jersey JAX-RS implementation and the Apache HTTP client library provides a way to set the connect timeout C + * and read timeout R. So if the operation takes NR reads, and the writes takes TW time, + * the theoretical max time is: T = C + R * NR + TW. With both NR and TW unknown, there's no way to + * set a proper C and R.</p> + * + * <p>The various timeouts is set according to the following considerations:</p> + * + * <ol> + * <li>Some time is reserved for the execution in this process, e.g. execution leading to the REST call, + * handling of the response, exception handling, etc, such that we can finish processing this request + * before the {@link #timeBudget} deadline. This is typically in the order of ms.</li> + * <li>A timeout will be passed to the Cluster Controller backend. We'll give a timeout such that if one + * CC times out, the next CC will be given exactly the same timeout. This may or may not be a good strategy: + * (A) There's typically a 3rd CC. But if the first 2 fails with timeout, the chance the last is OK + * is negligible. (B) If picking the CC is random, then giving the full timeout to the first + * should be sufficient since a later retry will hit the healthy CC. (C) Because we have been using + * DROP in networking rules, clients may time out (host out of app or whatever). This would suggest + * allowing more than 1 full request.</li> + * <li>The timeout passed to the CC backend should be such that if it honors that, the Orchestrator + * should not time out. This means some kernel and network overhead should be subtracted from the timeout + * passed to the CC.</li> + * <li>We're only able to set the connect and read/write timeouts(!) Since we're communicating within + * data center, assume connections are in the order of ms, while a single read may stall close up to the CC + * timeout.</li> + * </ol> + * + * @author hakonhall + */ +public class ClusterControllerClientTimeouts implements JaxRsTimeouts { + // In data center connect timeout + static final Duration CONNECT_TIMEOUT = Duration.ofMillis(50); + // Per call overhead + static final Duration IN_PROCESS_OVERHEAD_PER_CALL = Duration.ofMillis(50); + // In data center kernel and network overhead. + static final Duration NETWORK_OVERHEAD_PER_CALL = CONNECT_TIMEOUT; + // Minimum time reserved for post-RPC processing to finish BEFORE the deadline, including ZK write. + static final Duration IN_PROCESS_OVERHEAD = Duration.ofMillis(100); + // Number of JAX-RS RPC calls to account for within the time budget. + static final int NUM_CALLS = 2; + // Minimum server-side timeout + static final Duration MIN_SERVER_TIMEOUT = Duration.ofMillis(10); + + private final String clusterName; + private final TimeBudget timeBudget; + private final Duration maxClientTimeout; + + /** + * Creates a timeouts instance. + * + * The {@link #timeBudget} SHOULD be the time budget for a single logical call to the Cluster Controller. + * A logical call to CC may in fact call the CC several times, if the first onces are down and/or not + * the master. + * + * @param clusterName The name of the content cluster this request is for. + * @param timeBudget The time budget for a single logical call to the the Cluster Controller. + */ + public ClusterControllerClientTimeouts(String clusterName, TimeBudget timeBudget) { + this.clusterName = clusterName; + this.timeBudget = timeBudget; + + // timeLeft = inProcessOverhead + numCalls * clientTimeout + maxClientTimeout = timeBudget.originalTimeout().get().minus(IN_PROCESS_OVERHEAD).dividedBy(NUM_CALLS); + } + + @Override + public Duration getConnectTimeoutOrThrow() { + return CONNECT_TIMEOUT; + } + + @Override + public Duration getReadTimeoutOrThrow() { + Duration timeLeft = timeBudget.timeLeft().get(); + if (timeLeft.toMillis() <= 0) { + throw new UncheckedTimeoutException("Exceeded the timeout " + timeBudget.originalTimeout().get() + + " against content cluster '" + clusterName + "' by " + timeLeft.negated()); + } + + Duration clientTimeout = min(timeLeft, maxClientTimeout); + verifyPositive(timeLeft, clientTimeout); + + // clientTimeout = overheadPerCall + connectTimeout + readTimeout + Duration readTimeout = clientTimeout.minus(IN_PROCESS_OVERHEAD_PER_CALL).minus(CONNECT_TIMEOUT); + verifyPositive(timeLeft, readTimeout); + + return readTimeout; + } + + public Duration getServerTimeoutOrThrow() { + // readTimeout = networkOverhead + serverTimeout + Duration serverTimeout = getReadTimeoutOrThrow().minus(NETWORK_OVERHEAD_PER_CALL); + if (serverTimeout.toMillis() < MIN_SERVER_TIMEOUT.toMillis()) { + throw new UncheckedTimeoutException("Server would be given too little time to complete: " + + serverTimeout + ". Original timeout was " + timeBudget.originalTimeout().get()); + } + + return serverTimeout; + } + + private static Duration min(Duration a, Duration b) { + return a.compareTo(b) < 0 ? a : b; + } + + private void verifyLargerThan(Duration timeLeft, Duration availableDuration) { + if (availableDuration.toMillis() <= 0) { + throw new UncheckedTimeoutException("Too little time left (" + timeLeft + + ") to call content cluster '" + clusterName + + "', original timeout was " + timeBudget.originalTimeout().get()); + } + } + + private void verifyPositive(Duration timeLeft, Duration duration) { verifyLargerThan(timeLeft, duration); } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java index 5571eedeec6..7a0b96c2231 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java @@ -8,9 +8,8 @@ import com.yahoo.vespa.jaxrs.client.JaxRsStrategy; import com.yahoo.vespa.jaxrs.client.JaxRsStrategyFactory; import com.yahoo.vespa.jaxrs.client.JerseyJaxRsClientFactory; +import java.util.HashSet; import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; /** * @author bakksjo @@ -21,14 +20,12 @@ public class RetryingClusterControllerClientFactory implements ClusterController public static final int HARDCODED_CLUSTERCONTROLLER_PORT = 19050; public static final String CLUSTERCONTROLLER_API_PATH = "/"; public static final String CLUSTERCONTROLLER_SCHEME = "http"; - private static final int CLUSTER_CONTROLLER_CONNECT_TIMEOUT_MS = 1000; - private static final int CLUSTER_CONTROLLER_READ_TIMEOUT_MS = 1000; private JaxRsClientFactory jaxRsClientFactory; @Inject public RetryingClusterControllerClientFactory() { - this(new JerseyJaxRsClientFactory(CLUSTER_CONTROLLER_CONNECT_TIMEOUT_MS, CLUSTER_CONTROLLER_READ_TIMEOUT_MS)); + this(new JerseyJaxRsClientFactory()); } public RetryingClusterControllerClientFactory(JaxRsClientFactory jaxRsClientFactory) { @@ -36,19 +33,20 @@ public class RetryingClusterControllerClientFactory implements ClusterController } @Override - public ClusterControllerClient createClient(List<HostName> clusterControllers, - String clusterName) { - Set<HostName> clusterControllerSet = clusterControllers.stream().collect(Collectors.toSet()); - JaxRsStrategy<ClusterControllerJaxRsApi> jaxRsApi - = new JaxRsStrategyFactory(clusterControllerSet, HARDCODED_CLUSTERCONTROLLER_PORT, jaxRsClientFactory, CLUSTERCONTROLLER_SCHEME) - .apiWithRetries(ClusterControllerJaxRsApi.class, CLUSTERCONTROLLER_API_PATH) - // Use max iteration 1. The JaxRsStrategyFactory will try host 1, 2, then 3: - // - If host 1 responds, it will redirect to master if necessary. Otherwise - // - If host 2 responds, it will redirect to master if necessary. Otherwise - // - If host 3 responds, it may redirect to master if necessary (if they're up - // after all), but more likely there's no quorum and this will fail too. - .setMaxIterations(1); + public ClusterControllerClient createClient(List<HostName> clusterControllers, String clusterName) { + JaxRsStrategy<ClusterControllerJaxRsApi> jaxRsApi = + new JaxRsStrategyFactory( + new HashSet<>(clusterControllers), + HARDCODED_CLUSTERCONTROLLER_PORT, + jaxRsClientFactory, + CLUSTERCONTROLLER_SCHEME) + .apiWithRetries(ClusterControllerJaxRsApi.class, CLUSTERCONTROLLER_API_PATH) + // Use max iteration 1. The JaxRsStrategyFactory will try host 1, 2, then 3: + // - If host 1 responds, it will redirect to master if necessary. Otherwise + // - If host 2 responds, it will redirect to master if necessary. Otherwise + // - If host 3 responds, it may redirect to master if necessary (if they're up + // after all), but more likely there's no quorum and this will fail too. + .setMaxIterations(1); return new ClusterControllerClientImpl(jaxRsApi, clusterName); } - } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java deleted file mode 100644 index 7459f0a6b11..00000000000 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.orchestrator.controller; - -import com.yahoo.log.LogLevel; -import com.yahoo.vespa.applicationmodel.HostName; -import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory; -import com.yahoo.vespa.jaxrs.client.JaxRsStrategy; -import com.yahoo.vespa.jaxrs.client.NoRetryJaxRsStrategy; - -import java.util.List; -import java.util.logging.Logger; - -/** - * @author bakksjo - */ -public class SingleInstanceClusterControllerClientFactory implements ClusterControllerClientFactory { - - public static final int CLUSTERCONTROLLER_HARDCODED_PORT = 19050; - public static final String CLUSTERCONTROLLER_HARDCODED_SCHEME = "http"; - public static final String CLUSTERCONTROLLER_API_PATH = "/"; - - private static final Logger log = Logger.getLogger(SingleInstanceClusterControllerClientFactory.class.getName()); - - private JaxRsClientFactory jaxRsClientFactory; - - public SingleInstanceClusterControllerClientFactory(JaxRsClientFactory jaxRsClientFactory) { - this.jaxRsClientFactory = jaxRsClientFactory; - } - - @Override - public ClusterControllerClient createClient(List<HostName> clusterControllers, - String clusterName) { - if (clusterControllers.isEmpty()) { - throw new IllegalArgumentException("No cluster controller instances found"); - } - HostName controllerHostName = clusterControllers.iterator().next(); - int port = CLUSTERCONTROLLER_HARDCODED_PORT; // TODO: Get this from service monitor. - - log.log(LogLevel.DEBUG, () -> - "For cluster '" + clusterName + "' with controllers " + clusterControllers - + ", creating api client for " + controllerHostName.s() + ":" + port); - - JaxRsStrategy<ClusterControllerJaxRsApi> strategy = new NoRetryJaxRsStrategy<>( - controllerHostName, - port, - jaxRsClientFactory, - ClusterControllerJaxRsApi.class, - CLUSTERCONTROLLER_API_PATH, - CLUSTERCONTROLLER_HARDCODED_SCHEME); - - return new ClusterControllerClientImpl(strategy, clusterName); - } - -} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java index ed506c82079..74b4b534acc 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/NodeGroup.java @@ -43,7 +43,7 @@ public class NodeGroup { } public List<HostName> getHostNames() { - return hostNames.stream().collect(Collectors.toList()).stream().sorted().collect(Collectors.toList()); + return hostNames.stream().sorted().collect(Collectors.toList()); } public String toCommaSeparatedString() { diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java index c5ae553a98c..bd5eb6f3e29 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.orchestrator.status; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; import com.yahoo.vespa.applicationmodel.HostName; +import java.time.Duration; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -42,15 +43,11 @@ public class InMemoryStatusService implements StatusService { }; } - @Override - public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference) { - return lockApplicationInstance_forCurrentThreadOnly(applicationInstanceReference, 10); - } @Override public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly( ApplicationInstanceReference applicationInstanceReference, - long timeoutSeconds) { + Duration timeout) { Lock lock = instanceLockService.get(applicationInstanceReference); return new InMemoryMutableStatusRegistry(lock, applicationInstanceReference); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java index c47be096242..76adef72b2b 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.orchestrator.status; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import java.time.Duration; import java.util.Set; /** @@ -24,7 +25,7 @@ public interface StatusService { * possibly inconsistent snapshot values. It is not recommended that this method is used for anything other * than monitoring, logging, debugging, etc. It should never be used for multi-step operations (e.g. * read-then-write) where consistency is required. For those cases, use - * {@link #lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference)}. + * {@link #lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference, Duration)}. */ ReadOnlyStatusRegistry forApplicationInstance(ApplicationInstanceReference applicationInstanceReference); @@ -52,12 +53,9 @@ public interface StatusService { * this case, subsequent mutating operations will fail, but previous mutating operations are NOT rolled back. * This may leave the registry in an inconsistent state (as judged by the client code). */ - MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference); - - /** Lock application instance with timeout. */ MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly( ApplicationInstanceReference applicationInstanceReference, - long timeoutSeconds); + Duration timeout); /** * Returns all application instances that are allowed to be down. The intention is to use this diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java index deece6a4a65..7df29e038c1 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java @@ -56,21 +56,6 @@ public class ZookeeperStatusService implements StatusService { }; } - /** - * 1) locks the status service for an application instance. - * 2) fails all operations in this thread when the session is lost, - * since session loss might cause the lock to be lost. - * Since it only fails operations in this thread, - * all operations depending on a lock, including the locking itself, must be done in this thread. - * Note that since it is the thread that fails, all status operations in this thread will fail - * even if they're not supposed to be guarded by this lock - * (i.e. the request is for another applicationInstanceReference) - */ - @Override - public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference) { - return lockApplicationInstance_forCurrentThreadOnly(applicationInstanceReference, 10); - } - @Override public Set<ApplicationInstanceReference> getAllSuspendedApplications() { try { @@ -93,13 +78,23 @@ public class ZookeeperStatusService implements StatusService { } } + /** + * 1) locks the status service for an application instance. + * 2) fails all operations in this thread when the session is lost, + * since session loss might cause the lock to be lost. + * Since it only fails operations in this thread, + * all operations depending on a lock, including the locking itself, must be done in this thread. + * Note that since it is the thread that fails, all status operations in this thread will fail + * even if they're not supposed to be guarded by this lock + * (i.e. the request is for another applicationInstanceReference) + */ @Override public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly( ApplicationInstanceReference applicationInstanceReference, - long timeoutSeconds) { + Duration timeout) { String lockPath = applicationInstanceLock2Path(applicationInstanceReference); Lock lock = new Lock(lockPath, curator); - lock.acquire(Duration.ofSeconds(timeoutSeconds)); + lock.acquire(timeout); try { return new ZkMutableStatusRegistry(lock, applicationInstanceReference); |