diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /orchestrator |
Publish
Diffstat (limited to 'orchestrator')
60 files changed, 5495 insertions, 0 deletions
diff --git a/orchestrator/OWNERS b/orchestrator/OWNERS new file mode 100644 index 00000000000..3d08c49311b --- /dev/null +++ b/orchestrator/OWNERS @@ -0,0 +1 @@ +hakon diff --git a/orchestrator/README b/orchestrator/README new file mode 100644 index 00000000000..e30b29fa236 --- /dev/null +++ b/orchestrator/README @@ -0,0 +1 @@ +Determines whether it is safe for a node to halt services for upgrade/restart diff --git a/orchestrator/README.md b/orchestrator/README.md new file mode 100644 index 00000000000..455bd265fcc --- /dev/null +++ b/orchestrator/README.md @@ -0,0 +1,10 @@ +# Orchestrator, a.k.a. Maestro +A service to facilitate safe and staggered restart and upgrades of services in a Vespa instance. +It uses consolidated information from Slobrok and the application model to decide if a hosts +should be allowed to stop its services. + +## TODO: +* Constraint on requests on start-up. +* Constraint on requests after permitting host to go down (it should last at least as long as Slobrok heartbeat cycle). +* Implement caching of host-down decisions. +* Instance resource, exposing the orchestrators current knowledge
\ No newline at end of file diff --git a/orchestrator/pom.xml b/orchestrator/pom.xml new file mode 100644 index 00000000000..7ad5393aa31 --- /dev/null +++ b/orchestrator/pom.xml @@ -0,0 +1,155 @@ +<?xml version="1.0"?> +<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 + http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>com.yahoo.vespa</groupId> + <artifactId>parent</artifactId> + <version>6-SNAPSHOT</version> + <relativePath>../parent/pom.xml</relativePath> + </parent> + <artifactId>orchestrator</artifactId> + <version>6-SNAPSHOT</version> + <packaging>container-plugin</packaging> + <name>${project.artifactId}</name> + <dependencies> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>container-dev</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>service-monitor</artifactId> + <version>${project.version}</version> + <scope>compile</scope> <!-- TODO: Unbundle, but fix JSON serialization of model objects first. --> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>orchestrator-restapi</artifactId> + <version>${project.version}</version> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>com.google.auto.value</groupId> + <artifactId>auto-value</artifactId> + <version>1.0-rc1</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>jaxrs_client_utils</artifactId> + <version>${project.version}</version> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>application</artifactId> + <version>${project.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-core</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.easytesting</groupId> + <artifactId>fest-assert</artifactId> + <version>1.4</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scala-lang</groupId> + <artifactId>scala-library</artifactId> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>commons-lang</groupId> + <artifactId>commons-lang</artifactId> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>config-provisioning</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.glassfish.jersey.ext</groupId> + <artifactId>jersey-proxy-client</artifactId> + <exclusions> + <exclusion> + <groupId>org.glassfish.jersey.core</groupId> + <artifactId>jersey-common</artifactId> + </exclusion> + <exclusion> + <groupId>javax.ws.rs</groupId> + <artifactId>javax.ws.rs-api</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.module</groupId> + <artifactId>jackson-module-scala_${scala.major-version}</artifactId> + <version>${jackson2.version}</version> + <exclusions> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-core</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.code.findbugs</groupId> + <artifactId>findbugs</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + <version>${jackson2.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>zkfacade</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.curator</groupId> + <artifactId>curator-test</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.hamcrest</groupId> + <artifactId>hamcrest-all</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + <build> + <plugins> + <plugin> + <groupId>com.yahoo.vespa</groupId> + <artifactId>bundle-plugin</artifactId> + <extensions>true</extensions> + </plugin> + </plugins> + </build> +</project> diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationIdNotFoundException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationIdNotFoundException.java new file mode 100644 index 00000000000..0e4535a3a51 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationIdNotFoundException.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +/** + * Thrown when applicationId is invalid or not found. + * + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public class ApplicationIdNotFoundException extends Exception { +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationStateChangeDeniedException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationStateChangeDeniedException.java new file mode 100644 index 00000000000..28f92b3dca2 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationStateChangeDeniedException.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +/** + * Exception covering all cases where the state change could not + * be executed. + * + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public class ApplicationStateChangeDeniedException extends Exception { + + final String reason; + + public ApplicationStateChangeDeniedException() { + this("Unknown"); + } + + public ApplicationStateChangeDeniedException(String reason) { + super(); + this.reason = reason; + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchHostNameNotFoundException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchHostNameNotFoundException.java new file mode 100644 index 00000000000..6da27678b21 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchHostNameNotFoundException.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.vespa.applicationmodel.HostName; + +import java.util.List; + +public class BatchHostNameNotFoundException extends OrchestrationException { + public BatchHostNameNotFoundException(HostName parentHostname, + List<HostName> hostNames, + HostNameNotFoundException e) { + super("Failed to suspend " + hostNames + " with parent host " + + parentHostname + ": " + e.getMessage(), e); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchInternalErrorException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchInternalErrorException.java new file mode 100644 index 00000000000..7bd0237d37b --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchInternalErrorException.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.vespa.applicationmodel.HostName; + +import java.util.List; + +public class BatchInternalErrorException extends OrchestrationException { + public BatchInternalErrorException(HostName parentHostname, + List<HostName> orderedHostNames, + RuntimeException e) { + super("Failed to suspend " + orderedHostNames + " with parent host " + + parentHostname + ": " + e.getMessage(), e); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/HostNameNotFoundException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/HostNameNotFoundException.java new file mode 100644 index 00000000000..5919f974234 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/HostNameNotFoundException.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.vespa.applicationmodel.HostName; + +/** + * Exception thrown if hostname is not found in the system (i.e node repo) + * + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public class HostNameNotFoundException extends OrchestrationException { + public HostNameNotFoundException(HostName hostName) { + super("Hostname " + hostName + " not found in any instances"); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/InstanceLookupService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/InstanceLookupService.java new file mode 100644 index 00000000000..11221e3a464 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/InstanceLookupService.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; + +import java.util.Optional; +import java.util.Set; + +/** + * @author oyving + */ +public interface InstanceLookupService { + Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceById(ApplicationInstanceReference applicationInstanceReference); + Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceByHost(HostName hostName); + Set<ApplicationInstanceReference> knownInstances(); +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java new file mode 100644 index 00000000000..600fe8bfe8b --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import java.util.Arrays; + +public class OrchestrationException extends Exception { + public OrchestrationException(Throwable cause) { + super(cause); + } + + public OrchestrationException(String message) { + super(message); + } + + public OrchestrationException(String message, Throwable cause) { + super(message, cause); + } + + // Overrides getMessage() to include suppressed Throwables, which is useful to see which + // "resumes" of a failed suspend succeeded. + @Override + public String getMessage() { + StringBuilder builder = new StringBuilder(); + builder.append(super.getMessage()); + Throwable[] suppressedThrowables = getSuppressed(); + Arrays.stream(suppressedThrowables).forEach(t -> builder.append("; With suppressed throwable " + t)); + return builder.toString(); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java new file mode 100644 index 00000000000..a7db8e42e46 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; +import com.yahoo.vespa.orchestrator.status.HostStatus; + +import java.util.List; +import java.util.Set; + +/** + * The orchestrator is used to coordinate the need of vespa services to restart or + * disconnect from normal operations for debugging or maintenance. We are not coordinating on the service + * level but rather on the granularity of host and application instance. + * + * (A host will have multiple services and an application will have multiple hots) + * + * A policy decides how many hosts can go down at the same time based on which services that runs + * on the hosts, the redundancy in the system, coverage requirements and potentially more + * (see policies for details). + * + * An application level suspend - enables all services to go down at the same time and bypasses the + * host level state and the host level policies. + * This is used for parallel upgrade and larger maintenance tasks. + * + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public interface Orchestrator { + + /** + * Get the status of a given node. If no state is recorded + * then this will return the status 'No Remarks' + * + * @param hostName The FQDN which are used in the noderepo. + * @return The enum describing the current state. + * @throws HostNameNotFoundException if hostName is unrecognized (in node repo) + */ + HostStatus getNodeStatus(final HostName hostName) throws HostNameNotFoundException; + + /** + * Resume normal operation for this host. + * + * @param hostName The FQDN + * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints. + * @throws HostNameNotFoundException if the hostName is not recognized in the system (node repo) + */ + void resume(final HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException; + + /** + * Suspend normal operations for this host. + * + * @param hostName The FQDN + * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints. + * @throws HostNameNotFoundException if the hostName is not recognized in the system (node repo) + */ + void suspend(final HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException; + + /** + * Get the orchestrator status of the application instance. + * + * @param appId Identifier of the application to check + * @return The enum describing the current state. + */ + ApplicationInstanceStatus getApplicationInstanceStatus(final ApplicationId appId) throws ApplicationIdNotFoundException; + + /** + * Returns all application instances that are suspended. The intention is to use this + * for visualization, informational and debugging purposes. + * + * @return A Map between the application instance and its status. + */ + Set<ApplicationId> getAllSuspendedApplications(); + + /** + * Resume normal orchestration for hosts belonging to this application. + * + * @param appId Identifier of the application to resume + */ + void resume(final ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException; + + + /** + * Suspend orchestration for hosts belonging to this application. + * I.e all suspend requests for its hosts will succeed. + * + * @param appId Identifier of the application to resume + */ + void suspend(final ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException; + + /** + * Suspend all hosts. On failure, all hosts are resumed before exiting the method with an exception. + */ + void suspendAll(HostName parentHostname, List<HostName> hostNames) + throws BatchInternalErrorException, BatchHostStateChangeDeniedException, BatchHostNameNotFoundException; +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java new file mode 100644 index 00000000000..ad72a43ff72 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java @@ -0,0 +1,344 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.google.inject.Inject; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.log.LogLevel; +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceCluster; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerState; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse; +import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy; +import com.yahoo.vespa.orchestrator.policy.Policy; +import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; +import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; +import com.yahoo.vespa.orchestrator.status.StatusService; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * @author oyving + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public class OrchestratorImpl implements Orchestrator { + + + private static final Logger log = Logger.getLogger(OrchestratorImpl.class.getName()); + + private final Policy policy; + private final StatusService statusService; + private final InstanceLookupService instanceLookupService; + private final int serviceMonitorConvergenceLatencySeconds; + private final ClusterControllerClientFactory clusterControllerClientFactory; + + @Inject + public OrchestratorImpl( + final ClusterControllerClientFactory clusterControllerClientFactory, + final StatusService statusService, + final OrchestratorConfig orchestratorConfig, + final InstanceLookupService instanceLookupService) + { + this(new HostedVespaPolicy(clusterControllerClientFactory), + clusterControllerClientFactory, + statusService, + instanceLookupService, + orchestratorConfig.serviceMonitorConvergenceLatencySeconds()); + } + + public OrchestratorImpl( + final Policy policy, + final ClusterControllerClientFactory clusterControllerClientFactory, + final StatusService statusService, + final InstanceLookupService instanceLookupService, + final int serviceMonitorConvergenceLatencySeconds) + { + this.policy = policy; + this.clusterControllerClientFactory = clusterControllerClientFactory; + this.statusService = statusService; + this.serviceMonitorConvergenceLatencySeconds = serviceMonitorConvergenceLatencySeconds; + this.instanceLookupService = instanceLookupService; + + } + + @Override + public HostStatus getNodeStatus(HostName hostName) throws HostNameNotFoundException { + return getNodeStatus(getApplicationInstance(hostName).reference(),hostName); + } + + @Override + public void resume(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException { + /** + * When making a state transition to this state, we have to consider that if the host has been in + * ALLOWED_TO_BE_DOWN state, services on the host may recently have been stopped (and, presumably, started). + * Service monitoring may not have had enough time to detect that services were stopped, + * and may therefore mistakenly report services as up, even if they still haven't initialized and + * are not yet ready for serving. Erroneously reporting both host and services as up causes a race + * where services on other hosts may be stopped prematurely. A delay here ensures that service + * monitoring will have had time to catch up. Since we don't want do the delay with the lock held, + * and the host status service's locking functionality does not support something like condition + * variables or Object.wait(), we break out here, releasing the lock before delaying. + */ + sleep(serviceMonitorConvergenceLatencySeconds, TimeUnit.SECONDS); + + ApplicationInstance<ServiceMonitorStatus> appInstance = getApplicationInstance(hostName); + + try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(appInstance.reference())) { + final HostStatus currentHostState = statusRegistry.getHostStatus(hostName); + + if (HostStatus.NO_REMARKS == currentHostState) { + return; + } + + ApplicationInstanceStatus appStatus = statusService.forApplicationInstance(appInstance.reference()).getApplicationInstanceStatus(); + if (appStatus == ApplicationInstanceStatus.NO_REMARKS) { + policy.releaseSuspensionGrant(appInstance, hostName, statusRegistry); + } + } + } + + @Override + public void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException { + ApplicationInstance<ServiceMonitorStatus> appInstance = getApplicationInstance(hostName); + + + try (MutableStatusRegistry hostStatusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(appInstance.reference())) { + final HostStatus currentHostState = hostStatusRegistry.getHostStatus(hostName); + + if (HostStatus.ALLOWED_TO_BE_DOWN == currentHostState) { + return; + } + + ApplicationInstanceStatus appStatus = statusService.forApplicationInstance(appInstance.reference()).getApplicationInstanceStatus(); + if (appStatus == ApplicationInstanceStatus.NO_REMARKS) { + policy.grantSuspensionRequest(appInstance, hostName, hostStatusRegistry); + } + } + } + + @Override + public ApplicationInstanceStatus getApplicationInstanceStatus( + final ApplicationId appId) throws ApplicationIdNotFoundException { + ApplicationInstanceReference appRef = OrchestratorUtil.toApplicationInstanceReference(appId); + return statusService.forApplicationInstance(appRef).getApplicationInstanceStatus(); + } + + @Override + public Set<ApplicationId> getAllSuspendedApplications() { + Set<ApplicationInstanceReference> refSet = statusService.getAllSuspendedApplications(); + return refSet.stream().map(OrchestratorUtil::toApplicationId).collect(Collectors.toSet()); + } + + @Override + public void resume(ApplicationId appId) throws ApplicationIdNotFoundException, ApplicationStateChangeDeniedException { + setApplicationStatus(appId, ApplicationInstanceStatus.NO_REMARKS); + } + + @Override + public void suspend(ApplicationId appId) throws ApplicationIdNotFoundException, ApplicationStateChangeDeniedException { + setApplicationStatus(appId, ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); + } + + @Override + public void suspendAll(HostName parentHostname, List<HostName> hostNames) + throws BatchHostStateChangeDeniedException, BatchHostNameNotFoundException, BatchInternalErrorException { + try { + hostNames = sortHostNamesForSuspend(hostNames); + } catch (HostNameNotFoundException e) { + throw new BatchHostNameNotFoundException(parentHostname, hostNames, e); + } + + for (HostName hostName : hostNames) { + try { + suspend(hostName); + } catch (HostStateChangeDeniedException e) { + BatchHostStateChangeDeniedException exception = + new BatchHostStateChangeDeniedException(parentHostname, hostNames, e); + rollbackSuspendAll(hostNames, exception); + throw exception; + } catch (HostNameNotFoundException e) { + // Should never get here since since we would have received HostNameNotFoundException earlier. + BatchHostNameNotFoundException exception = + new BatchHostNameNotFoundException(parentHostname, hostNames, e); + rollbackSuspendAll(hostNames, exception); + throw exception; + } catch (RuntimeException e) { + BatchInternalErrorException exception = + new BatchInternalErrorException(parentHostname, hostNames, e); + rollbackSuspendAll(hostNames, exception); + throw exception; + } + } + } + + private void rollbackSuspendAll(List<HostName> orderedHostNames, Exception exception) { + List<HostName> reverseOrderedHostNames = new ArrayList<>(orderedHostNames); + Collections.reverse(reverseOrderedHostNames); + for (HostName hostName : reverseOrderedHostNames) { + try { + resume(hostName); + } catch (HostStateChangeDeniedException | HostNameNotFoundException | RuntimeException e) { + // We're forced to ignore these since we're already rolling back a suspension. + exception.addSuppressed(e); + } + } + } + + /** + * PROBLEM + * Take the example of 2 Docker hosts: + * - Docker host 1 has two nodes A1 and B1, belonging to the application with + * a globally unique ID A and B, respectively. + * - Similarly, Docker host 2 has two nodes running content nodes A2 and B2, + * and we assume both A1 and A2 (and B1 and B2) have services within the same service cluster. + * + * Suppose both Docker hosts wanting to reboot, and + * - Docker host 1 asks to suspend A1 and B1, while + * - Docker host 2 asks to suspend B2 and A2. + * + * The Orchestrator may allow suspend of A1 and B2, before requesting the suspension of B1 and A2. + * None of these can be suspended (assuming max 1 suspended content node per content cluster), + * and so both requests for suspension will fail. + * + * Note that it's not a deadlock - both client will fail immediately and resume both A1 and B2 before + * responding to the client, and if host 1 asks later w/o host 2 asking at the same time, + * it will be given permission to suspend. However if both hosts were to request in lock-step, + * there would be starvation. And in general, it would fail requests for suspension more + * than necessary. + * + * SOLUTION + * The solution we're using is to order the hostnames by the globally unique application instance ID, + * e.g. hosted-vespa:routing:dev:ci-corp-us-east-1:default. In the example above, it would guarantee + * Docker host 2 would ensure ask to suspend B2 before A2. We take care of that ordering here. + */ + public List<HostName> sortHostNamesForSuspend(List<HostName> hostNames) throws HostNameNotFoundException { + Map<HostName, ApplicationInstanceReference> applicationReferences = new HashMap<>(hostNames.size()); + for (HostName hostName : hostNames) { + ApplicationInstance<?> appInstance = getApplicationInstance(hostName); + applicationReferences.put(hostName, appInstance.reference()); + } + + List<HostName> orderedHostNames = hostNames.stream() + .sorted((leftHostname, rightHostname) -> compareHostNamesForSuspend(leftHostname, rightHostname, applicationReferences)) + .collect(Collectors.toList()); + + return orderedHostNames; + } + + private int compareHostNamesForSuspend( + HostName leftHostname, + HostName rightHostname, + Map<HostName, ApplicationInstanceReference> applicationReferences) { + ApplicationInstanceReference leftApplicationReference = applicationReferences.get(leftHostname); + assert leftApplicationReference != null; + + ApplicationInstanceReference rightApplicationReference = applicationReferences.get(rightHostname); + assert rightApplicationReference != null; + + // ApplicationInstanceReference.toString() is e.g. "hosted-vespa:routing:dev:ci-corp-us-east-1:default" + int diff = leftApplicationReference.toString().compareTo(rightApplicationReference.toString()); + if (diff != 0) { + return diff; + } + + return leftHostname.toString().compareTo(rightHostname.toString()); + } + + private HostStatus getNodeStatus( + final ApplicationInstanceReference applicationRef, + final HostName hostName) { + return statusService.forApplicationInstance(applicationRef).getHostStatus(hostName); + } + + private void setApplicationStatus( + final ApplicationId appId, + final ApplicationInstanceStatus status) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException{ + + ApplicationInstanceReference appRef = OrchestratorUtil.toApplicationInstanceReference(appId); + try (MutableStatusRegistry statusRegistry = + statusService.lockApplicationInstance_forCurrentThreadOnly(appRef)) { + + // Short-circuit if already in wanted state + if (status == statusRegistry.getApplicationInstanceStatus()) return; + + // Set content clusters for this application in maintenance on suspend + if (status == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) { + ApplicationInstance<ServiceMonitorStatus> application = getApplicationInstance(appRef); + + // Mark it allowed to be down before we manipulate the clustercontroller + OrchestratorUtil.getHostsUsedByApplicationInstance(application).stream() + .forEach(h -> statusRegistry.setHostState(h, HostStatus.ALLOWED_TO_BE_DOWN)); + + // If the clustercontroller throws an error the nodes will be marked as allowed to be down + // and be set back up on next resume invocation. + setClusterStateInController(application, ClusterControllerState.MAINTENANCE); + } + + statusRegistry.setApplicationInstanceStatus(status); + } + } + + private void setClusterStateInController( + final ApplicationInstance<ServiceMonitorStatus> application, + final ClusterControllerState state) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException { + + // Get all content clusters for this application + Set<ClusterId> contentClusterIds = application.serviceClusters().stream() + .filter(VespaModelUtil::isContent) + .map(ServiceCluster::clusterId) + .collect(Collectors.toSet()); + + // For all content clusters set in maintenance + log.log(LogLevel.INFO, String.format("Setting content clusters %s for application %s to %s", + contentClusterIds,application.applicationInstanceId(),state)); + for (ClusterId clusterId : contentClusterIds) { + final ClusterControllerClient client = clusterControllerClientFactory.createClient( + VespaModelUtil.getClusterControllerInstances(application, clusterId), + clusterId.s()); + try { + ClusterControllerStateResponse response = client.setApplicationState(state); + if (!response.wasModified) { + String msg = String.format("Fail to set application %s, cluster name %s to cluster state %s due to: %s", + application.applicationInstanceId(), clusterId, state, response.reason); + throw new ApplicationStateChangeDeniedException(msg); + } + } catch (IOException e) { + throw new ApplicationStateChangeDeniedException(e.getMessage()); + } + } + } + + private ApplicationInstance<ServiceMonitorStatus> getApplicationInstance(HostName hostName) throws HostNameNotFoundException{ + return instanceLookupService.findInstanceByHost(hostName).orElseThrow( + () -> new HostNameNotFoundException(hostName)); + } + + private ApplicationInstance<ServiceMonitorStatus> getApplicationInstance(ApplicationInstanceReference appRef) throws ApplicationIdNotFoundException { + return instanceLookupService.findInstanceById(appRef).orElseThrow(ApplicationIdNotFoundException::new); + } + + private static void sleep(final long time, final TimeUnit timeUnit) { + try { + Thread.sleep(timeUnit.toMillis(time)); + } catch (InterruptedException e) { + throw new RuntimeException("Unexpectedly interrupted", e); + } + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorUtil.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorUtil.java new file mode 100644 index 00000000000..ca7bd058169 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorUtil.java @@ -0,0 +1,144 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.InstanceName; +import com.yahoo.config.provision.TenantName; +import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.orchestrator.status.ReadOnlyStatusRegistry; +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceCluster; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.applicationmodel.TenantId; + +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.stream.Collectors.toMap; +import static java.util.stream.Collectors.toSet; + +/** + * Utility methods for working with service monitor model entity objects. + * + * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a> + */ +public class OrchestratorUtil { + // Utility class, not to be instantiated. + private OrchestratorUtil() {} + + public static Set<HostName> getHostsUsedByApplicationInstance(final ApplicationInstance<?> applicationInstance) { + return applicationInstance.serviceClusters().stream() + .flatMap(serviceCluster -> getHostsUsedByServiceCluster(serviceCluster).stream()) + .collect(toSet()); + } + + public static Set<HostName> getHostsUsedByServiceCluster(final ServiceCluster<?> serviceCluster) { + return serviceCluster.serviceInstances().stream() + .map(ServiceInstance::hostName) + .collect(toSet()); + } + + public static <T> Set<ServiceCluster<T>> getServiceClustersUsingHost( + final Collection<ServiceCluster<T>> serviceClusters, + final HostName hostName) { + return serviceClusters.stream() + .filter(serviceCluster -> hasServiceInstanceOnHost(serviceCluster, hostName)) + .collect(toSet()); + } + + public static Map<HostName, HostStatus> getHostStatusMap( + final Collection<HostName> hosts, + final ReadOnlyStatusRegistry hostStatusService) { + return hosts.stream() + .collect(Collectors.toMap( + hostName -> hostName, + hostName -> hostStatusService.getHostStatus(hostName))); + } + + private static boolean hasServiceInstanceOnHost( + final ServiceCluster<?> serviceCluster, + final HostName hostName) { + return serviceInstancesOnHost(serviceCluster, hostName).count() > 0; + } + + public static <T> Stream<ServiceInstance<T>> serviceInstancesOnHost( + final ServiceCluster<T> serviceCluster, + final HostName hostName) { + return serviceCluster.serviceInstances().stream() + .filter(instance -> instance.hostName().equals(hostName)); + } + + public static <K, V1, V2> Map<K, V2> mapValues( + final Map<K, V1> map, + final Function<V1, V2> valueConverter) { + return map.entrySet().stream() + .collect(toMap(Map.Entry::getKey, entry -> valueConverter.apply(entry.getValue()))); + } + + private static final Pattern APPLICATION_INSTANCE_REFERENCE_REST_FORMAT_PATTERN = Pattern.compile("^([^:]+):(.+)$"); + + /** Returns an ApplicationInstanceReference constructed from the serialized format used in the REST API. */ + public static ApplicationInstanceReference parseAppInstanceReference(final String restFormat) { + if (restFormat == null) { + throw new IllegalArgumentException("Could not construct instance id from null string"); + } + + final Matcher matcher = APPLICATION_INSTANCE_REFERENCE_REST_FORMAT_PATTERN.matcher(restFormat); + if (!matcher.matches()) { + throw new IllegalArgumentException("Could not construct instance id from string \"" + restFormat +"\""); + } + + final TenantId tenantId = new TenantId(matcher.group(1)); + final ApplicationInstanceId applicationInstanceId = new ApplicationInstanceId(matcher.group(2)); + return new ApplicationInstanceReference(tenantId, applicationInstanceId); + } + + public static String toRestApiFormat(final ApplicationInstanceReference applicationInstanceReference) { + return applicationInstanceReference.tenantId() + ":" + applicationInstanceReference.applicationInstanceId(); + } + + + public static ApplicationInstanceReference toApplicationInstanceReference(ApplicationId appId) { + TenantId tenantId = new TenantId(appId.tenant().toString()); + + String appName = appId.application().toString(); + String instanceName = appId.instance().toString(); + ApplicationInstanceId appInstanceId = new ApplicationInstanceId(appName + ":" + instanceName); + + return new ApplicationInstanceReference(tenantId,appInstanceId); + } + + public static ApplicationId toApplicationId(ApplicationInstanceReference appRef) { + TenantName tenantName = TenantName.from(appRef.tenantId().toString()); + + // Now for the application/instance pair we need to split this + String appNameStr = appRef.applicationInstanceId().toString(); + String[] appNameParts = appNameStr.split(":"); + + // We assume a valid application reference has at lest two parts appname:instancename + // TODO is this assumption valid? + if (appNameParts.length < 2) { + // TODO Since this is used internally we should perhapes use another exception type? + throw new IllegalArgumentException("Application reference not valid: " + appRef); + } + + // Last part of string is the instance name + InstanceName instanceName = InstanceName.from(appNameParts[appNameParts.length-1]); + + // The rest is application + int whereAppNameEnds = appNameStr.lastIndexOf(":"); + ApplicationName appName = ApplicationName.from(appNameStr.substring(0, whereAppNameEnds)); + + return ApplicationId.from(tenantName, appName, instanceName); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ServiceMonitorInstanceLookupService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ServiceMonitorInstanceLookupService.java new file mode 100644 index 00000000000..a421db2732c --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ServiceMonitorInstanceLookupService.java @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.google.inject.Inject; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; +import com.yahoo.vespa.service.monitor.SlobrokAndConfigIntersector; + +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Uses slobrok data (a.k.a. heartbeat) to implement {@link InstanceLookupService}. + * + * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a> + */ +public class ServiceMonitorInstanceLookupService implements InstanceLookupService { + private final SlobrokAndConfigIntersector slobrokAndConfigIntersector; + + @Inject + public ServiceMonitorInstanceLookupService(final SlobrokAndConfigIntersector slobrokAndConfigIntersector) { + this.slobrokAndConfigIntersector = slobrokAndConfigIntersector; + } + + @Override + public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceById( + final ApplicationInstanceReference applicationInstanceReference) { + final Map<ApplicationInstanceReference, ApplicationInstance<ServiceMonitorStatus>> instanceMap + = slobrokAndConfigIntersector.queryStatusOfAllApplicationInstances(); + return Optional.ofNullable(instanceMap.get(applicationInstanceReference)); + } + + @Override + public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceByHost(HostName hostName) { + final Map<ApplicationInstanceReference, ApplicationInstance<ServiceMonitorStatus>> instanceMap + = slobrokAndConfigIntersector.queryStatusOfAllApplicationInstances(); + final List<ApplicationInstance<ServiceMonitorStatus>> applicationInstancesUsingHost = instanceMap.entrySet().stream() + .filter(entry -> applicationInstanceUsesHost(entry.getValue(), hostName)) + .map(Map.Entry::getValue) + .collect(Collectors.toList()); + if (applicationInstancesUsingHost.isEmpty()) { + return Optional.empty(); + } + if (applicationInstancesUsingHost.size() > 1) { + throw new AssertionError( + "Major assumption broken: Multiple application instances contain host " + hostName.s() + + ": " + applicationInstancesUsingHost); + } + return Optional.of(applicationInstancesUsingHost.get(0)); + } + + @Override + public Set<ApplicationInstanceReference> knownInstances() { + return slobrokAndConfigIntersector.queryStatusOfAllApplicationInstances().keySet(); + } + + private static boolean applicationInstanceUsesHost( + final ApplicationInstance<ServiceMonitorStatus> applicationInstance, + final HostName hostName) { + return applicationInstance.serviceClusters().stream() + .anyMatch(serviceCluster -> + serviceCluster.serviceInstances().stream() + .anyMatch(serviceInstance -> + serviceInstance.hostName().equals(hostName))); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/VespaModelUtil.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/VespaModelUtil.java new file mode 100644 index 00000000000..148c9463f62 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/VespaModelUtil.java @@ -0,0 +1,227 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceCluster; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.applicationmodel.ServiceType; + +import java.util.Collection; +import java.util.Comparator; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static com.yahoo.collections.CollectionUtil.first; + +/** + * Utility methods for working with Vespa-specific model entities (see OrchestratorUtil + * for more generic model utilities). + * + * @author hakon + */ +public class VespaModelUtil { + private static final Logger log = Logger.getLogger(VespaModelUtil.class.getName()); + + public static final ClusterId ADMIN_CLUSTER_ID = new ClusterId("admin"); + + public static final ServiceType SLOBROK_SERVICE_TYPE = new ServiceType("slobrok"); + public static final ServiceType CLUSTER_CONTROLLER_SERVICE_TYPE = new ServiceType("container-clustercontroller"); + public static final ServiceType DISTRIBUTOR_SERVICE_TYPE = new ServiceType("distributor"); + public static final ServiceType SEARCHNODE_SERVICE_TYPE = new ServiceType("searchnode"); + public static final ServiceType STORAGENODE_SERVICE_TYPE = new ServiceType("storagenode"); + + // @return true iff the service cluster refers to a cluster controller service cluster. + public static boolean isClusterController(final ServiceCluster<?> cluster) { + return CLUSTER_CONTROLLER_SERVICE_TYPE.equals(cluster.serviceType()); + } + + /** + * Note that a search node service cluster (service type searchnode aka proton) is + * always accompanied by a storage node service cluster, but not vice versa. + * + * @return true iff the service cluster consists of storage nodes (proton or vds). + */ + public static boolean isStorage(ServiceCluster<?> cluster) { + return STORAGENODE_SERVICE_TYPE.equals(cluster.serviceType()); + } + + /** + * @return true iff the service cluster is a content service cluster. + */ + public static boolean isContent(final ServiceCluster<?> cluster) { + return DISTRIBUTOR_SERVICE_TYPE.equals(cluster.serviceType()) || + SEARCHNODE_SERVICE_TYPE.equals(cluster.serviceType()) || + STORAGENODE_SERVICE_TYPE.equals(cluster.serviceType()); + } + + /** + * @return The set of all Cluster Controller service instances for the application. + */ + public static <T> Set<ServiceInstance<T>> getClusterControllerInstances( + ApplicationInstance<T> application, + ClusterId contentClusterId) + { + final Set<ServiceCluster<T>> controllerClusters = getClusterControllerServiceClusters(application); + + Collection<ServiceCluster<T>> controllerClustersForContentCluster = filter(controllerClusters, contentClusterId); + + if (controllerClustersForContentCluster.size() == 1) { + return first(controllerClustersForContentCluster).serviceInstances(); + } else if (controllerClusters.size() == 1) { + ServiceCluster<T> cluster = first(controllerClusters); + log.warning("No cluster controller cluster for content cluster " + contentClusterId + + ", using the only cluster controller cluster available: " + cluster.clusterId()); + + return cluster.serviceInstances(); + } else { + throw new RuntimeException("Failed getting cluster controller for content cluster " + contentClusterId + + ". Available clusters = " + controllerClusters + + ", matching clusters = " + controllerClustersForContentCluster); + } + } + + private static <T> Collection<ServiceCluster<T>> filter( + Set<ServiceCluster<T>> controllerClusters, + ClusterId contentClusterId) { + ClusterId clusterControllerClusterId = new ClusterId(contentClusterId.s() + "-controllers"); + + return controllerClusters.stream(). + filter(cluster -> cluster.clusterId().equals(clusterControllerClusterId)). + collect(Collectors.toList()); + } + + public static <T> Set<ServiceCluster<T>> getClusterControllerServiceClusters( + final ApplicationInstance<T> application) { + return application.serviceClusters().stream() + .filter(VespaModelUtil::isClusterController) + .collect(Collectors.toSet()); + } + + /** + * @return Host name for a Cluster Controller that is likely to be the master, is !isPresent() if + * no cluster controller was found. + * @throws java.lang.IllegalArgumentException if there are no cluster controller instances. + */ + public static HostName getControllerHostName(ApplicationInstance<?> application, ClusterId contentClusterId) { + // It happens that the master Cluster Controller is the one with the lowest index, if up. + final ServiceInstance<?> serviceInstance = getClusterControllerInstances(application, contentClusterId) + .stream() + .min(Comparator.comparing(instance -> getClusterControllerIndex(instance.configId()))) + .orElseThrow(() -> + new IllegalArgumentException("No cluster controllers found in application " + application)); + return serviceInstance.hostName(); + } + + /** + * A content cluster consists of many content-related service clusters, like distributor and storagenode. + * All of the service clusters within a content cluster have the same service cluster ID, + * which is also called the content ID (specified in services.xml) and also cluster name + * (terminology used in Cluster Controller). The cluster name is used when referring to + * content cluster resources through the HTTP REST on the Cluster Controller. + * + * There may be many content clusters within an application. But only one content cluster may be + * present on any single host, + * + * @return The cluster name managed by a Cluster Controller. + * @throws IllegalArgumentException if there is not exactly one content cluster name. + */ + public static ClusterId getContentClusterName(ApplicationInstance<?> application, HostName hostName) { + Set<ClusterId> contentClusterIdsOnHost = application.serviceClusters().stream() + .filter(VespaModelUtil::isContent) + .filter(cluster -> clusterHasInstanceOnHost(cluster, hostName)) + .map(ServiceCluster::clusterId) + .collect(Collectors.toSet()); + + if (contentClusterIdsOnHost.size() != 1) { + throw new IllegalArgumentException("Expected exactly one content cluster within application " + + application.applicationInstanceId() + " and host " + hostName + ", but found " + + contentClusterIdsOnHost.size() + ": " + contentClusterIdsOnHost + ", application: " + + application); + } + + return contentClusterIdsOnHost.iterator().next(); + } + + private static boolean clusterHasInstanceOnHost(ServiceCluster<?> cluster, HostName hostName) { + return cluster.serviceInstances().stream().anyMatch(service -> Objects.equals(hostName, service.hostName())); + } + + /** + * @return The node index of the storage node running on the host. + * @throws java.lang.IllegalArgumentException if there is not exactly one storage node running on the host, + * or if the index of that storage node could not be found. + */ + public static <T> int getStorageNodeIndex(ApplicationInstance<T> application, HostName hostName) { + Optional<ServiceInstance<T>> storageNode = getStorageNodeAtHost(application, hostName); + if (!storageNode.isPresent()) { + throw new IllegalArgumentException("Failed to find a storage node for application " + + application.applicationInstanceId() + " at host " + hostName); + } + + return getStorageNodeIndex(storageNode.get().configId()); + } + + public static <T> Optional<ServiceInstance<T>> getStorageNodeAtHost( + ApplicationInstance<T> application, HostName hostName) { + Set<ServiceInstance<T>> storageNodesOnHost = application.serviceClusters().stream() + .filter(VespaModelUtil::isStorage) + .flatMap(cluster -> cluster.serviceInstances().stream()) + .filter(service -> service.hostName().equals(hostName)) + .collect(Collectors.toSet()); + + if (storageNodesOnHost.isEmpty()) { + return Optional.empty(); + } + + if (storageNodesOnHost.size() > 1) { + throw new RuntimeException("Expected application " + application.applicationInstanceId() + + " to have exactly one storage node service on host " + hostName + " but got " + + storageNodesOnHost.size() + ": " + storageNodesOnHost); + } + + return storageNodesOnHost.stream().findAny(); + } + + // See getClusterControllerIndex() + private static final Pattern CONTROLLER_INDEX_PATTERN = + Pattern.compile("admin/cluster-controllers/(\\d+)"); + + /** + * @param configId Must be of the form admin/cluster-controllers/2 + * @return the Cluster Controller index given its config ID. + * @throws java.lang.IllegalArgumentException if the config ID is not of the proper format. + */ + public static int getClusterControllerIndex(ConfigId configId) { + Matcher matcher = CONTROLLER_INDEX_PATTERN.matcher(configId.s()); + if (!matcher.matches()) { + throw new IllegalArgumentException("Unable to extract cluster controller index from config ID " + configId); + } + + return Integer.valueOf(matcher.group(1)); + } + + // See getStorageNodeIndex() + private static final Pattern STORAGE_NODE_INDEX_PATTERN = Pattern.compile(".*/(\\d+)"); + + /** + * @param configId Config ID is of the form "storage/storage/3", where 3 is the storage node index. + * @return The storage node index. + * @throws java.lang.IllegalArgumentException If configId does not have the required form. + */ + public static int getStorageNodeIndex(ConfigId configId) { + Matcher matcher = STORAGE_NODE_INDEX_PATTERN.matcher(configId.s()); + if (!matcher.matches()) { + throw new IllegalArgumentException("Unable to extract node index from config ID " + configId); + } + + return Integer.valueOf(matcher.group(1)); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java new file mode 100644 index 00000000000..5352c81a7a1 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import java.io.IOException; + +/** + * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a> + */ +public interface ClusterControllerClient { + + /** + * Requests that a cluster controller sets the requested node to the requested state. + * + * @throws IOException if there was a problem communicating with the cluster controller + */ + ClusterControllerStateResponse setNodeState(final int storageNodeIndex, final ClusterControllerState wantedState) throws IOException; + + /** + * Requests that a cluster controller sets the requested node to the requested state. + * + * @throws IOException if there was a problem communicating with the cluster controller + */ + ClusterControllerStateResponse setApplicationState(final ClusterControllerState wantedState) throws IOException; +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactory.java new file mode 100644 index 00000000000..b30a3daab31 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactory.java @@ -0,0 +1,13 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.yahoo.vespa.applicationmodel.ServiceInstance; + +import java.util.Collection; + +/** + * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a> + */ +public interface ClusterControllerClientFactory { + ClusterControllerClient createClient(Collection<? extends ServiceInstance<?>> clusterControllers, String clusterName); +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java new file mode 100644 index 00000000000..79f91ff4255 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.yahoo.vespa.jaxrs.client.JaxRsStrategy; + +import java.io.IOException; + +/** + * Default implementation of the ClusterControllerClient. + * + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public class ClusterControllerClientImpl implements ClusterControllerClient{ + public static final String REQUEST_REASON = "Orchestrator"; + + private final JaxRsStrategy<ClusterControllerJaxRsApi> clusterControllerApi; + private final String clusterName; + + public ClusterControllerClientImpl( + final JaxRsStrategy<ClusterControllerJaxRsApi> clusterControllerApi, + final String clusterName) { + this.clusterName = clusterName; + this.clusterControllerApi = clusterControllerApi; + } + + /** + * Requests that a cluster controller sets the requested node to the requested state. + * + * @throws IOException if there was a problem communicating with the cluster controller + */ + @Override + public ClusterControllerStateResponse setNodeState(final int storageNodeIndex, final ClusterControllerState wantedState) throws IOException { + final ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON); + final ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.SAFE); + + try { + return clusterControllerApi.apply(api -> + api.setNodeState(clusterName, storageNodeIndex, stateRequest)); + } catch (IOException e) { + final String message = String.format( + "Giving up setting %s for cluster %s", + stateRequest, + clusterName); + + throw new IOException(message, e); + } + } + + /** + * Requests that a cluster controller sets the requested node to the requested state. + * + * @throws IOException if there was a problem communicating with the cluster controller + */ + @Override + public ClusterControllerStateResponse setApplicationState(final ClusterControllerState wantedState) throws IOException { + final ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON); + final ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.FORCE); + + try { + return clusterControllerApi.apply(api -> api.setClusterState(clusterName,stateRequest)); + } catch (IOException e) { + final String message = String.format( + "Giving up setting %s for cluster %s", + stateRequest, + clusterName); + + throw new IOException(message, e); + } + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerJaxRsApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerJaxRsApi.java new file mode 100644 index 00000000000..e49173d84bf --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerJaxRsApi.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import javax.ws.rs.Consumes; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.core.MediaType; + +/** + * @author hakon + */ +public interface ClusterControllerJaxRsApi { + @POST + @Path("/cluster/v2/{clusterName}/storage/{storageNodeIndex}") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + ClusterControllerStateResponse setNodeState( + @PathParam("clusterName") String clusterName, + @PathParam("storageNodeIndex") int storageNodeIndex, + ClusterControllerStateRequest request); + + @POST + @Path("/cluster/v2/{clusterName}/storage") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + ClusterControllerStateResponse setClusterState( + @PathParam("clusterName") String clusterName, + ClusterControllerStateRequest request); +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java new file mode 100644 index 00000000000..58f1ef32b10 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.fasterxml.jackson.annotation.JsonValue; + +/** + * This denotes the different states passable through the set-node-state API against the ClusterController. + * In the cluster controller, it maps to com.yahoo.vdslib.state.State. Consider using that instead (however + * that class is already fairly complicated, and may perhaps best be screened from JSON annotations - the only + * thing we need is the enum < - > String conversions). + * + * @author hakon + */ +public enum ClusterControllerState { + MAINTENANCE("maintenance"), + UP("up"); + + private final String wireName; + + ClusterControllerState(final String wireName) { + this.wireName = wireName; + } + + @JsonValue + public String getWireName() { + return wireName; + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java new file mode 100644 index 00000000000..870ba79158c --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Collections; +import java.util.Map; +import java.util.Objects; + +/** + * @author hakon + */ +public class ClusterControllerStateRequest { + + @JsonProperty("state") + public final Map<String, State> state; + + @JsonProperty("condition") + public final Condition condition; + + public ClusterControllerStateRequest(State currentState, Condition condition) { + Map<String, State> state = Collections.singletonMap("user", currentState); + this.state = Collections.unmodifiableMap(state); + this.condition = condition; + } + + @Override + public boolean equals(Object object) { + if (!(object instanceof ClusterControllerStateRequest)) { + return false; + } + + final ClusterControllerStateRequest that = (ClusterControllerStateRequest) object; + return Objects.equals(this.state, that.state) + && Objects.equals(this.condition, that.condition); + } + + @Override + public int hashCode() { + return Objects.hash(state, condition); + } + + @Override + public String toString() { + return "NodeStateRequest {" + + " condition=" + condition + + " state=" + state + + " }"; + } + + public static class State { + @JsonProperty("state") + public final ClusterControllerState state; + + /** + * The reason the client is making the request to set the node state. + * Useful for logging in the Cluster Controller. + */ + @JsonProperty("reason") + public final String reason; + + public State(ClusterControllerState state, String reason) { + this.state = state; + this.reason = reason; + } + + @Override + public boolean equals(Object object) { + if (!(object instanceof State)) { + return false; + } + + State that = (State) object; + return this.state.equals(that.state) && + this.reason.equals(that.reason); + } + + @Override + public int hashCode() { + int hash = 1; + hash = 17 * hash + state.hashCode(); + hash = 13 * hash + reason.hashCode(); + return hash; + } + + @Override + public String toString() { + return "reason: " + reason.toString() + ", state: " + state.toString(); + } + } + + public enum Condition { + FORCE, SAFE; + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateResponse.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateResponse.java new file mode 100644 index 00000000000..16d64566b8e --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateResponse.java @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * The response returned by the cluster controller's set-node-state APIs. + * + * @author hakon + */ +public class ClusterControllerStateResponse { + @JsonProperty("wasModified") + public final boolean wasModified; + + @JsonProperty("reason") + public final String reason; + + @JsonCreator + public ClusterControllerStateResponse(@JsonProperty("wasModified") boolean wasModified, + @JsonProperty("reason") String reason) { + this.wasModified = wasModified; + this.reason = reason; + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java new file mode 100644 index 00000000000..2c432f00a28 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.google.inject.Inject; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory; +import com.yahoo.vespa.jaxrs.client.JaxRsStrategy; +import com.yahoo.vespa.jaxrs.client.JaxRsStrategyFactory; +import com.yahoo.vespa.jaxrs.client.JerseyJaxRsClientFactory; + +import java.util.Collection; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a> + */ +public class RetryingClusterControllerClientFactory implements ClusterControllerClientFactory { + // TODO: Figure this port out dynamically. + public static final int HARDCODED_CLUSTERCONTROLLER_PORT = 19050; + public static final String CLUSTERCONTROLLER_API_PATH = "/"; + private static final int CLUSTER_CONTROLLER_CONNECT_TIMEOUT_MS = 1000; + private static final int CLUSTER_CONTROLLER_READ_TIMEOUT_MS = 1000; + + private JaxRsClientFactory jaxRsClientFactory; + + @Inject + public RetryingClusterControllerClientFactory() { + this(new JerseyJaxRsClientFactory( + CLUSTER_CONTROLLER_CONNECT_TIMEOUT_MS, + CLUSTER_CONTROLLER_READ_TIMEOUT_MS)); + } + + public RetryingClusterControllerClientFactory( + final JaxRsClientFactory jaxRsClientFactory) { + this.jaxRsClientFactory = jaxRsClientFactory; + } + + @Override + public ClusterControllerClient createClient( + final Collection<? extends ServiceInstance<?>> clusterControllers, + final String clusterName) { + final Set<HostName> hostNames = clusterControllers.stream() + .map(ServiceInstance::hostName) + .collect(Collectors.toSet()); + final JaxRsStrategy<ClusterControllerJaxRsApi> jaxRsApi + = new JaxRsStrategyFactory(hostNames, HARDCODED_CLUSTERCONTROLLER_PORT, jaxRsClientFactory) + .apiWithRetries(ClusterControllerJaxRsApi.class, CLUSTERCONTROLLER_API_PATH); + return new ClusterControllerClientImpl(jaxRsApi, clusterName); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java new file mode 100644 index 00000000000..711d819e348 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.yahoo.log.LogLevel; +import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory; +import com.yahoo.vespa.jaxrs.client.JaxRsStrategy; +import com.yahoo.vespa.jaxrs.client.NoRetryJaxRsStrategy; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceInstance; + +import java.util.Collection; +import java.util.Comparator; +import java.util.logging.Logger; + +import static com.yahoo.vespa.orchestrator.VespaModelUtil.getClusterControllerIndex; + +/** + * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a> + */ +public class SingleInstanceClusterControllerClientFactory implements ClusterControllerClientFactory { + public static final int CLUSTERCONTROLLER_HARDCODED_PORT = 19050; + public static final String CLUSTERCONTROLLER_API_PATH = "/"; + + private static final Logger log = Logger.getLogger(SingleInstanceClusterControllerClientFactory.class.getName()); + + private static final Comparator<ServiceInstance<?>> CLUSTER_CONTROLLER_INDEX_COMPARATOR = Comparator.comparing( + serviceInstance -> + getClusterControllerIndex(serviceInstance.configId())); + + private JaxRsClientFactory jaxRsClientFactory; + + public SingleInstanceClusterControllerClientFactory( + final JaxRsClientFactory jaxRsClientFactory) { + this.jaxRsClientFactory = jaxRsClientFactory; + } + + @Override + public ClusterControllerClient createClient( + final Collection<? extends ServiceInstance<?>> clusterControllers, + final String clusterName) { + final ServiceInstance<?> serviceInstance = clusterControllers.stream() + .min(CLUSTER_CONTROLLER_INDEX_COMPARATOR) + .orElseThrow(() -> new IllegalArgumentException("No cluster controller instances found")); + final HostName controllerHostName = serviceInstance.hostName(); + final int port = CLUSTERCONTROLLER_HARDCODED_PORT; // TODO: Get this from service monitor. + + log.log(LogLevel.DEBUG, () -> + "For cluster '" + clusterName + "' with controllers " + clusterControllers + + ", creating api client for " + controllerHostName.s() + ":" + port); + + final JaxRsStrategy<ClusterControllerJaxRsApi> strategy = new NoRetryJaxRsStrategy<>( + controllerHostName, + port, + jaxRsClientFactory, + ClusterControllerJaxRsApi.class, + CLUSTERCONTROLLER_API_PATH); + + return new ClusterControllerClientImpl(strategy, clusterName); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/package-info.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/package-info.java new file mode 100644 index 00000000000..f426d9266b1 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.orchestrator; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/BatchHostStateChangeDeniedException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/BatchHostStateChangeDeniedException.java new file mode 100644 index 00000000000..affe9557013 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/BatchHostStateChangeDeniedException.java @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.policy; + +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.orchestrator.OrchestrationException; + +import java.util.List; + +public class BatchHostStateChangeDeniedException extends OrchestrationException { + public BatchHostStateChangeDeniedException(HostName parentHostname, + List<HostName> orderedHostNames, + HostStateChangeDeniedException e) { + super("Failed to suspend " + orderedHostNames + " with parent host " + + parentHostname + ": " + e.getMessage(), e); + + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java new file mode 100644 index 00000000000..6e12de9bfe7 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.policy; + +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.orchestrator.OrchestrationException; + +/** + * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a> + */ +public class HostStateChangeDeniedException extends OrchestrationException { + private final String constraintName; + private final ServiceType serviceType; + + public HostStateChangeDeniedException( + final HostName hostName, + final String constraintName, + final ServiceType serviceType, + final String message) { + super(createMessage(hostName, constraintName, serviceType, message)); + this.constraintName = constraintName; + this.serviceType = serviceType; + } + + public HostStateChangeDeniedException( + final HostName hostName, + final String constraintName, + final ServiceType serviceType, + final String message, + final Throwable cause) { + super(createMessage(hostName, constraintName, serviceType, message), cause); + this.constraintName = constraintName; + this.serviceType = serviceType; + } + + private static String createMessage(final HostName hostName, + final String constraintName, + final ServiceType serviceType, + final String message) { + return "Changing the state of host " + hostName + " would violate " + constraintName + + " for service type " + serviceType + ": " + message; + } + + public String getConstraintName() { + return constraintName; + } + + public ServiceType getServiceType() { + return serviceType; + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java new file mode 100644 index 00000000000..88674301b21 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java @@ -0,0 +1,232 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.policy; + +import com.yahoo.log.LogLevel; +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceCluster; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.orchestrator.VespaModelUtil; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerState; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse; +import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getHostStatusMap; +import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getHostsUsedByApplicationInstance; +import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getServiceClustersUsingHost; + +/** + * @author oyving + */ + +public class HostedVespaPolicy implements Policy { + + public static final String ENOUGH_SERVICES_UP_CONSTRAINT = "enough-services-up"; + public static final String SET_NODE_STATE_CONSTRAINT = "controller-set-node-state"; + public static final String CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT = "controller-available"; + + private static final Logger log = Logger.getLogger(HostedVespaPolicy.class.getName()); + + private final ClusterControllerClientFactory clusterControllerClientFactory; + + public HostedVespaPolicy(ClusterControllerClientFactory clusterControllerClientFactory) { + this.clusterControllerClientFactory = clusterControllerClientFactory; + } + + private static long numContentServiceClusters(Set<? extends ServiceCluster<?>> serviceClustersOnHost) { + return serviceClustersOnHost.stream().filter(VespaModelUtil::isContent).count(); + } + + + @Override + public void grantSuspensionRequest( + final ApplicationInstance<ServiceMonitorStatus> applicationInstance, + final HostName hostName, + final MutableStatusRegistry hostStatusService) throws HostStateChangeDeniedException { + + Set<ServiceCluster<ServiceMonitorStatus>> serviceClustersOnHost = + getServiceClustersUsingHost(applicationInstance.serviceClusters(), hostName); + + final Map<HostName, HostStatus> hostStatusMap = getHostStatusMap( + getHostsUsedByApplicationInstance(applicationInstance), + hostStatusService); + + boolean hasUpStorageInstance = false; + for (final ServiceCluster<ServiceMonitorStatus> serviceCluster : serviceClustersOnHost) { + final Set<ServiceInstance<ServiceMonitorStatus>> instancesOnThisHost; + final Set<ServiceInstance<ServiceMonitorStatus>> instancesOnOtherHosts; + { + final Map<Boolean, Set<ServiceInstance<ServiceMonitorStatus>>> serviceInstancesByLocality = + serviceCluster.serviceInstances().stream() + .collect( + Collectors.groupingBy( + instance -> instance.hostName().equals(hostName), + Collectors.toSet())); + instancesOnThisHost = serviceInstancesByLocality.getOrDefault(true, Collections.emptySet()); + instancesOnOtherHosts = serviceInstancesByLocality.getOrDefault(false, Collections.emptySet()); + } + + if (VespaModelUtil.isStorage(serviceCluster)) { + final boolean thisHostHasSomeUpInstances = instancesOnThisHost.stream() + .map(ServiceInstance::serviceStatus) + .anyMatch(status -> status == ServiceMonitorStatus.UP); + if (thisHostHasSomeUpInstances) { + hasUpStorageInstance = true; + } + } + + final boolean thisHostHasOnlyDownInstances = instancesOnThisHost.stream() + .map(ServiceInstance::serviceStatus) + .allMatch(status -> status == ServiceMonitorStatus.DOWN); + if (thisHostHasOnlyDownInstances) { + // Suspending this host will not make a difference for this cluster, so no need to investigate further. + continue; + } + + final Set<ServiceInstance<ServiceMonitorStatus>> possiblyDownInstancesOnOtherHosts = + instancesOnOtherHosts.stream() + .filter(instance -> effectivelyDown(instance, hostStatusMap)) + .collect(Collectors.toSet()); + if (possiblyDownInstancesOnOtherHosts.isEmpty()) { + // This short-circuits the percentage calculation below and ensures that we can always upgrade + // any cluster by allowing one host at the time to be suspended, no matter what percentage of + // the cluster that host amounts to. + continue; + } + + // Now calculate what the service suspension percentage will be if we suspend this host. + final int numServiceInstancesTotal = serviceCluster.serviceInstances().size(); + final int numInstancesThatWillBeSuspended = union(possiblyDownInstancesOnOtherHosts, instancesOnThisHost).size(); + final int percentThatWillBeSuspended = numInstancesThatWillBeSuspended * 100 / numServiceInstancesTotal; + final int suspendPercentageAllowed = ServiceClusterSuspendPolicy.getSuspendPercentageAllowed(serviceCluster); + if (percentThatWillBeSuspended > suspendPercentageAllowed) { + // It may seem like this may in some cases prevent upgrading, especially for small clusters (where the + // percentage of service instances affected by suspending a single host may easily exceed the allowed + // suspension percentage). Note that we always allow progress by allowing a single host to suspend. + // See previous section. + final int currentSuspensionPercentage + = possiblyDownInstancesOnOtherHosts.size() * 100 / numServiceInstancesTotal; + final Set<HostName> otherHostsWithThisServiceCluster = instancesOnOtherHosts.stream() + .map(ServiceInstance::hostName) + .collect(Collectors.toSet()); + final Set<HostName> hostsAllowedToBeDown = hostStatusMap.entrySet().stream() + .filter(entry -> entry.getValue() == HostStatus.ALLOWED_TO_BE_DOWN) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + final Set<HostName> otherHostsAllowedToBeDown + = intersection(otherHostsWithThisServiceCluster, hostsAllowedToBeDown); + throw new HostStateChangeDeniedException( + hostName, + ENOUGH_SERVICES_UP_CONSTRAINT, + serviceCluster.serviceType(), + "Suspension percentage would increase from " + currentSuspensionPercentage + + "% to " + percentThatWillBeSuspended + + "%, over the limit of " + suspendPercentageAllowed + "%." + + " These instances may be down: " + possiblyDownInstancesOnOtherHosts + + " and these hosts are allowed to be down: " + otherHostsAllowedToBeDown + ); + } + } + + if (hasUpStorageInstance) { + // If there is an UP storage service on the host, we need to make sure + // there's sufficient redundancy before allowing the suspension. This will + // also avoid redistribution (which is unavoidable if the storage instance + // is already down). + setNodeStateInController(applicationInstance, hostName, ClusterControllerState.MAINTENANCE); + } + + + // We have "go" for suspending the services on the host,store decision. + hostStatusService.setHostState(hostName, HostStatus.ALLOWED_TO_BE_DOWN); + log.log(LogLevel.INFO, hostName + " is now allowed to be down (suspended)"); + } + + private static <T> Set<T> union(final Set<T> setA, Set<T> setB) { + final Set<T> union = new HashSet<>(setA); + union.addAll(setB); + return union; + } + + private static <T> Set<T> intersection(final Set<T> setA, Set<T> setB) { + final Set<T> intersection = new HashSet<>(setA); + intersection.retainAll(setB); + return intersection; + } + + @Override + public void releaseSuspensionGrant( + final ApplicationInstance<ServiceMonitorStatus> applicationInstance, + final HostName hostName, + final MutableStatusRegistry hostStatusService) throws HostStateChangeDeniedException { + Set<ServiceCluster<ServiceMonitorStatus>> serviceClustersOnHost = + getServiceClustersUsingHost(applicationInstance.serviceClusters(), hostName); + + // TODO: Always defer to Cluster Controller whether it's OK to resume host (if content node). + if (numContentServiceClusters(serviceClustersOnHost) > 0) { + setNodeStateInController(applicationInstance, hostName, ClusterControllerState.UP); + } + hostStatusService.setHostState(hostName, HostStatus.NO_REMARKS); + log.log(LogLevel.INFO, hostName + " is no longer allowed to be down (resumed)"); + } + + private static boolean effectivelyDown( + final ServiceInstance<ServiceMonitorStatus> serviceInstance, + final Map<HostName, HostStatus> hostStatusMap) { + final ServiceMonitorStatus instanceStatus = serviceInstance.serviceStatus(); + final HostStatus hostStatus = hostStatusMap.get(serviceInstance.hostName()); + return hostStatus == HostStatus.ALLOWED_TO_BE_DOWN || instanceStatus == ServiceMonitorStatus.DOWN; + } + + private void setNodeStateInController( + ApplicationInstance<?> application, + HostName hostName, + ClusterControllerState nodeState) throws HostStateChangeDeniedException { + ClusterId contentClusterId = VespaModelUtil.getContentClusterName(application, hostName); + Set<? extends ServiceInstance<?>> clusterControllers = VespaModelUtil.getClusterControllerInstances(application, contentClusterId); + final ClusterControllerClient client = clusterControllerClientFactory.createClient( + clusterControllers, + contentClusterId.s()); + int nodeIndex = VespaModelUtil.getStorageNodeIndex(application, hostName); + + log.log(LogLevel.DEBUG, + "application " + application.applicationInstanceId() + + ", host " + hostName + + ", cluster name " + contentClusterId + + ", node index " + nodeIndex + + ", node state " + nodeState); + + final ClusterControllerStateResponse response; + try { + response = client.setNodeState(nodeIndex, nodeState); + } catch (IOException e) { + throw new HostStateChangeDeniedException( + hostName, + CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT, + VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE, + "Failed to communicate with cluster controllers " + clusterControllers, + e); + } + + if (!response.wasModified) { + throw new HostStateChangeDeniedException( + hostName, + SET_NODE_STATE_CONSTRAINT, + VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE, + "Failed to set state to " + nodeState + " in controller: " + response.reason); + } + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java new file mode 100644 index 00000000000..53992436ea5 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.policy; + +import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; + +/** + * @author oyving + */ +public interface Policy { + /** + * Decide whether to grant a request for temporarily suspending the services on a host. + * + * @throws HostStateChangeDeniedException if the grant was not given. + */ + void grantSuspensionRequest( + ApplicationInstance<ServiceMonitorStatus> applicationInstance, + HostName hostName, + MutableStatusRegistry hostStatusService) throws HostStateChangeDeniedException; + + /** + * Release an earlier grant for suspension. + * + * @throws HostStateChangeDeniedException if the release failed. + */ + void releaseSuspensionGrant( + ApplicationInstance<ServiceMonitorStatus> applicationInstance, + HostName hostName, + MutableStatusRegistry hostStatusService) throws HostStateChangeDeniedException; +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ServiceClusterSuspendPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ServiceClusterSuspendPolicy.java new file mode 100644 index 00000000000..b2fdd7878d3 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ServiceClusterSuspendPolicy.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.policy; + +import com.yahoo.vespa.orchestrator.VespaModelUtil; +import com.yahoo.vespa.applicationmodel.ServiceCluster; + +/** + * @author hakon + * @author bakksjo + */ +public final class ServiceClusterSuspendPolicy { + private static final int SUSPENSION_ALLOW_MINIMAL = 0; + private static final int SUSPENSION_ALLOW_TEN_PERCENT = 10; + private static final int SUSPENSION_ALLOW_ALL = 100; + + private ServiceClusterSuspendPolicy() {} // Disallow instantiation. + + public static int getSuspendPercentageAllowed(final ServiceCluster<?> serviceCluster) { + if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(serviceCluster.clusterId())) { + if (VespaModelUtil.SLOBROK_SERVICE_TYPE.equals(serviceCluster.serviceType())) { + return SUSPENSION_ALLOW_MINIMAL; + } + + return SUSPENSION_ALLOW_ALL; + } + + if (VespaModelUtil.isStorage(serviceCluster)) { + return SUSPENSION_ALLOW_MINIMAL; + } + + return SUSPENSION_ALLOW_TEN_PERCENT; + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResource.java new file mode 100644 index 00000000000..f3d87db5d20 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResource.java @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.resources; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.container.jaxrs.annotation.Component; +import com.yahoo.log.LogLevel; +import com.yahoo.vespa.orchestrator.ApplicationIdNotFoundException; +import com.yahoo.vespa.orchestrator.ApplicationStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.OrchestratorImpl; +import com.yahoo.vespa.orchestrator.restapi.ApplicationSuspensionApi; +import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; + +import javax.inject.Inject; +import javax.ws.rs.BadRequestException; +import javax.ws.rs.InternalServerErrorException; +import javax.ws.rs.NotFoundException; +import javax.ws.rs.Path; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.Response; +import java.util.Set; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +@Path(ApplicationSuspensionApi.PATH_PREFIX) +public class ApplicationSuspensionResource implements ApplicationSuspensionApi { + private static final Logger log = Logger.getLogger(ApplicationSuspensionResource.class.getName()); + + private final OrchestratorImpl orchestrator; + + @Inject + public ApplicationSuspensionResource( + @Component OrchestratorImpl orchestrator) { + this.orchestrator = orchestrator; + } + + @Override + public Set<String> getApplications() { + Set<ApplicationId> refs = orchestrator.getAllSuspendedApplications(); + return refs.stream().map(ApplicationId::serializedForm).collect(Collectors.toSet()); + } + + @Override + public void getApplication(String applicationIdString) { + ApplicationId appId = toApplicationId(applicationIdString); + ApplicationInstanceStatus status; + + try { + status = orchestrator.getApplicationInstanceStatus(appId); + } catch (ApplicationIdNotFoundException e) { + throw new NotFoundException("Application " + applicationIdString + " could not be found"); + } + + if (status.equals(ApplicationInstanceStatus.NO_REMARKS)) { + throw new NotFoundException("Application " + applicationIdString + " is not suspended"); + } + + // Return void as we have nothing to return except 204 No + // Content. Unfortunately, Jersey outputs a warning for this case: + // + // The following warnings have been detected: HINT: A HTTP GET + // method, public void com.yahoo.vespa.orchestrator.resources. + // ApplicationSuspensionResource.getApplication(java.lang.String), + // returns a void type. It can be intentional and perfectly fine, + // but it is a little uncommon that GET method returns always "204 + // No Content" + // + // We have whitelisted the warning for our systemtests. + // + // bakksjo has a pending jersey PR fix that avoids making the hint + // become a warning: + // https://github.com/jersey/jersey/pull/212 + // + // TODO: Remove whitelisting and this comment once jersey has been + // fixed. + } + + @Override + public void suspend(String applicationIdString) { + ApplicationId applicationId = toApplicationId(applicationIdString); + try { + orchestrator.suspend(applicationId); + } catch (ApplicationIdNotFoundException e) { + log.log(LogLevel.INFO, "ApplicationId " + applicationIdString + " not found.", e); + throw new NotFoundException(e); + } catch (ApplicationStateChangeDeniedException e) { + log.log(LogLevel.INFO, "Suspend for " + applicationIdString + " failed.", e); + throw new WebApplicationException(Response.Status.CONFLICT); + } catch (RuntimeException e) { + log.log(LogLevel.INFO, "Suspend for " + applicationIdString + " failed from unknown reasons", e); + throw new InternalServerErrorException(e); + } + } + + @Override + public void resume(String applicationIdString) { + ApplicationId applicationId = toApplicationId(applicationIdString); + try { + orchestrator.resume(applicationId); + } catch (ApplicationIdNotFoundException e) { + log.log(LogLevel.INFO, "ApplicationId " + applicationIdString + " not found.", e); + throw new NotFoundException(e); + } catch (ApplicationStateChangeDeniedException e) { + log.log(LogLevel.INFO, "Suspend for " + applicationIdString + " failed.", e); + throw new WebApplicationException(Response.Status.CONFLICT); + } catch (RuntimeException e) { + log.log(LogLevel.INFO, "Suspend for " + applicationIdString + " failed from unknown reasons", e); + throw new InternalServerErrorException(e); + } + } + + private ApplicationId toApplicationId(String applicationIdString) { + try { + return ApplicationId.fromSerializedForm(null, applicationIdString); + } catch (IllegalArgumentException e) { + throw new BadRequestException(e); + } + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java new file mode 100644 index 00000000000..63689037a12 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java @@ -0,0 +1,94 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.resources; + +import com.yahoo.container.jaxrs.annotation.Component; +import com.yahoo.log.LogLevel; +import com.yahoo.vespa.orchestrator.HostNameNotFoundException; +import com.yahoo.vespa.orchestrator.Orchestrator; +import com.yahoo.vespa.orchestrator.OrchestratorImpl; +import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.restapi.wire.GetHostResponse; +import com.yahoo.vespa.orchestrator.restapi.HostApi; +import com.yahoo.vespa.orchestrator.restapi.wire.HostStateChangeDenialReason; +import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse; +import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.applicationmodel.HostName; +import org.apache.commons.lang.exception.ExceptionUtils; + +import java.util.logging.Logger; + +import javax.inject.Inject; +import javax.ws.rs.NotFoundException; +import javax.ws.rs.Path; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +/** + * @author oyving + */ +@Path(HostApi.PATH_PREFIX) +public class HostResource implements HostApi { + private static final Logger log = Logger.getLogger(HostResource.class.getName()); + + private final Orchestrator orchestrator; + + @Inject + public HostResource(@Component Orchestrator orchestrator) { + this.orchestrator = orchestrator; + } + + @Override + public GetHostResponse getHost(final String hostNameString) { + final HostName hostName = new HostName(hostNameString); + try { + HostStatus status = orchestrator.getNodeStatus(hostName); + return new GetHostResponse(hostName.s(), status.name()); + } catch (HostNameNotFoundException e) { + throw new NotFoundException(e); + } + } + + @Override + public UpdateHostResponse suspend(final String hostNameString) { + final HostName hostName = new HostName(hostNameString); + try { + orchestrator.suspend(hostName); + } catch (HostNameNotFoundException e) { + throw new NotFoundException(e); + } catch (HostStateChangeDeniedException e) { + log.log(LogLevel.DEBUG, "Suspend for " + hostName + " failed.", e); + throw webExceptionWithDenialReason(hostName, e); + } + return new UpdateHostResponse(hostName.s(), null); + } + + @Override + public UpdateHostResponse resume(final String hostNameString) { + final HostName hostName = new HostName(hostNameString); + try { + orchestrator.resume(hostName); + } catch (HostNameNotFoundException e) { + throw new NotFoundException(e); + } catch (HostStateChangeDeniedException e) { + log.log(LogLevel.DEBUG, "Resume for " + hostName + " failed.", e); + throw webExceptionWithDenialReason(hostName, e); + } + return new UpdateHostResponse(hostName.s(), null); + } + + private static WebApplicationException webExceptionWithDenialReason(HostName hostName, HostStateChangeDeniedException e) { + final HostStateChangeDenialReason hostStateChangeDenialReason = new HostStateChangeDenialReason( + e.getConstraintName(), e.getServiceType().s(), e.getMessage()); + final UpdateHostResponse response = new UpdateHostResponse(hostName.s(), hostStateChangeDenialReason); + return new WebApplicationException( + hostStateChangeDenialReason.toString(), + e, + Response.status(Response.Status.CONFLICT) + .entity(response) + .type(MediaType.APPLICATION_JSON_TYPE) + .build()); + + } +} + diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostSuspensionResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostSuspensionResource.java new file mode 100644 index 00000000000..855217c298a --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostSuspensionResource.java @@ -0,0 +1,77 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.resources; + +import com.yahoo.container.jaxrs.annotation.Component; +import com.yahoo.log.LogLevel; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException; +import com.yahoo.vespa.orchestrator.BatchInternalErrorException; +import com.yahoo.vespa.orchestrator.Orchestrator; +import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.restapi.HostSuspensionApi; +import com.yahoo.vespa.orchestrator.restapi.wire.BatchHostSuspendRequest; +import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult; + +import javax.inject.Inject; +import javax.ws.rs.Path; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import java.util.List; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +@Path(HostSuspensionApi.PATH_PREFIX) +public class HostSuspensionResource implements HostSuspensionApi { + private static final Logger log = Logger.getLogger(HostSuspensionResource.class.getName()); + + private final Orchestrator orchestrator; + + @Inject + public HostSuspensionResource(@Component Orchestrator orchestrator) { + this.orchestrator = orchestrator; + } + + @Override + public BatchOperationResult suspendAll(BatchHostSuspendRequest request) throws WebApplicationException { + if (!request.getParentHostname().isPresent()) { + String message = "parentHostname missing in request: " + request; + log.log(LogLevel.DEBUG, message); + throw createWebApplicationException(message, Response.Status.BAD_REQUEST); + } + HostName parentHostname = new HostName(request.getParentHostname().get()); + + if (!request.getHostnames().isPresent()) { + String message = "hostnames missing in request: " + request; + log.log(LogLevel.DEBUG, message); + throw createWebApplicationException(message, Response.Status.BAD_REQUEST); + } + List<HostName> hostNames = request.getHostnames().get().stream().map(HostName::new).collect(Collectors.toList()); + + try { + orchestrator.suspendAll(parentHostname, hostNames); + } catch (BatchHostStateChangeDeniedException e) { + log.log(LogLevel.DEBUG, "Failed to suspend nodes " + hostNames + " with parent host " + parentHostname, e); + throw createWebApplicationException(e.getMessage(), Response.Status.CONFLICT); + } catch (BatchHostNameNotFoundException e) { + log.log(LogLevel.DEBUG, "Failed to suspend nodes " + hostNames + " with parent host " + parentHostname, e); + // Note that we're returning BAD_REQUEST instead of NOT_FOUND because the resource identified + // by the URL path was found. It's one of the hostnames in the request it failed to find. + throw createWebApplicationException(e.getMessage(), Response.Status.BAD_REQUEST); + } catch (BatchInternalErrorException e) { + log.log(LogLevel.DEBUG, "Failed to suspend nodes " + hostNames + " with parent host " + parentHostname, e); + throw createWebApplicationException(e.getMessage(), Response.Status.INTERNAL_SERVER_ERROR); + } + + log.log(LogLevel.DEBUG, "Suspended " + hostNames + " with parent " + parentHostname); + return BatchOperationResult.successResult(); + } + + private WebApplicationException createWebApplicationException(String errorMessage, Response.Status status) { + return new WebApplicationException( + Response.status(status) + .entity(new BatchOperationResult(errorMessage)) + .type(MediaType.APPLICATION_JSON_TYPE) + .build()); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java new file mode 100644 index 00000000000..8b3862207b9 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.resources; + +import com.yahoo.container.jaxrs.annotation.Component; +import com.yahoo.vespa.orchestrator.InstanceLookupService; +import com.yahoo.vespa.orchestrator.OrchestratorUtil; +import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.orchestrator.status.StatusService; +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; + +import javax.inject.Inject; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import java.util.Map; +import java.util.Set; + +import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getHostStatusMap; +import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getHostsUsedByApplicationInstance; +import static com.yahoo.vespa.orchestrator.OrchestratorUtil.parseAppInstanceReference; + +/** + * Provides a read-only API for looking into the current state as seen by the Orchestrator. + * This API can be unstable and is not meant to be used programmatically. + * + * @author andreer + * @author bakksjo + */ +@Path("/v1/instances") +public class InstanceResource { + + private final StatusService statusService; + private final InstanceLookupService instanceLookupService; + + @Inject + public InstanceResource( + @Component final InstanceLookupService instanceLookupService, + @Component final StatusService statusService) { + this.instanceLookupService = instanceLookupService; + this.statusService = statusService; + } + + @GET + @Produces(MediaType.APPLICATION_JSON) + public Set<ApplicationInstanceReference> getAllInstances() { + return instanceLookupService.knownInstances(); + } + + @GET + @Path("/{instanceId}") + @Produces(MediaType.APPLICATION_JSON) + public InstanceStatusResponse getInstance(@PathParam("instanceId") String instanceIdString) { + final ApplicationInstanceReference instanceId; + try { + instanceId = parseAppInstanceReference(instanceIdString); + } catch (IllegalArgumentException e) { + throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).build()); + } + + final ApplicationInstance<ServiceMonitorStatus> applicationInstance + = instanceLookupService.findInstanceById(instanceId) + .orElseThrow(() -> new WebApplicationException(Response.status(Response.Status.NOT_FOUND).build())); + + final Set<HostName> hostsUsedByApplicationInstance = getHostsUsedByApplicationInstance(applicationInstance); + final Map<HostName, HostStatus> hostStatusMap = getHostStatusMap( + hostsUsedByApplicationInstance, + statusService.forApplicationInstance(instanceId)); + final Map<HostName, String> hostStatusStringMap = OrchestratorUtil.mapValues( + hostStatusMap, + HostStatus::name); + return InstanceStatusResponse.create(applicationInstance, hostStatusStringMap); + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceStatusResponse.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceStatusResponse.java new file mode 100644 index 00000000000..c38b8ac0437 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceStatusResponse.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.resources; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.auto.value.AutoValue; + +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; + +import java.util.Map; + +/* + * @author andreer + */ +@AutoValue +public abstract class InstanceStatusResponse { + + @JsonProperty("applicationInstance") + public abstract ApplicationInstance<ServiceMonitorStatus> applicationInstance(); + + @JsonProperty("hostStates") + public abstract Map<HostName, String> hostStates(); + + public static InstanceStatusResponse create( + ApplicationInstance<ServiceMonitorStatus> applicationInstance, + Map<HostName, String> hostStates) { + return new AutoValue_InstanceStatusResponse(applicationInstance, hostStates); + } + +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ApplicationInstanceStatus.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ApplicationInstanceStatus.java new file mode 100644 index 00000000000..9c53fb5f5ef --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ApplicationInstanceStatus.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.status; + +/** + * Enumeration of the orchestrator status on the application level. + * + * The naming and conventions follows the same pattern as with the HostStatus and is: + * + * When the node is suspended - the orchestration state is 'allowed to be down'. The application + * is not necessarily suspended pr. se but it is allowed to start suspending - or is back up from suspension + * but the flag is not revoked yet. + * + * @see HostStatus + * @author andreer + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public enum ApplicationInstanceStatus { + NO_REMARKS, + ALLOWED_TO_BE_DOWN +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java new file mode 100644 index 00000000000..f7dc183ae92 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.status; + +/** + * Enumeration of the different status' a host can have. + * + * @author oyving + */ +public enum HostStatus { + NO_REMARKS, + ALLOWED_TO_BE_DOWN; +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java new file mode 100644 index 00000000000..cbe5295799b --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java @@ -0,0 +1,120 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.status; + +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.HostName; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Implementation of the StatusService interface for testing. + * + * @author oyving + */ +public class InMemoryStatusService implements StatusService { + + private final Map<HostName, HostStatus> hostServiceStatus = new HashMap<>(); + private final Set<ApplicationInstanceReference> applicationStatus = new HashSet<>(); + private final LockService<ApplicationInstanceReference> instanceLockService = new LockService<>(); + + private void setHostStatus( + HostName hostName, + HostStatus status) { + + hostServiceStatus.put(hostName, status); + } + + @Override + public ReadOnlyStatusRegistry forApplicationInstance( + final ApplicationInstanceReference applicationInstanceReference) { + return new ReadOnlyStatusRegistry() { + @Override + public HostStatus getHostStatus(final HostName hostName) { + return hostServiceStatus.getOrDefault(hostName, HostStatus.NO_REMARKS); + } + + @Override + public ApplicationInstanceStatus getApplicationInstanceStatus() { + return applicationStatus.contains(applicationInstanceReference) ? + ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN : ApplicationInstanceStatus.NO_REMARKS; + } + }; + } + + @Override + public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly( + final ApplicationInstanceReference applicationInstanceReference) { + final Lock lock = instanceLockService.get(applicationInstanceReference); + return new InMemoryMutableStatusRegistry(lock, applicationInstanceReference); + } + + @Override + public Set<ApplicationInstanceReference> getAllSuspendedApplications() { + return applicationStatus; + } + + private class InMemoryMutableStatusRegistry implements MutableStatusRegistry { + private final Lock lockHandle; + private final ApplicationInstanceReference ref; + + public InMemoryMutableStatusRegistry(final Lock lockHandle, + final ApplicationInstanceReference ref) { + this.lockHandle = lockHandle; + this.ref = ref; + } + + @Override + public void setHostState(HostName hostName, HostStatus status) { + setHostStatus(hostName, status); + } + + @Override + public void setApplicationInstanceStatus(ApplicationInstanceStatus applicationInstanceStatus) { + if (applicationInstanceStatus == ApplicationInstanceStatus.NO_REMARKS) { + applicationStatus.remove(ref); + } else { + applicationStatus.add(ref); + } + } + + @Override + public HostStatus getHostStatus(HostName hostName) { + return hostServiceStatus.getOrDefault(hostName, HostStatus.NO_REMARKS); + } + + @Override + public ApplicationInstanceStatus getApplicationInstanceStatus() { + return applicationStatus.contains(ref) ? ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN : + ApplicationInstanceStatus.NO_REMARKS; + } + + @Override + public void close() { + //lockHandle.unlock(); TODO this casues illeal state monitor exception - how to use it properly + } + } + + private static class LockService<T> { + private final Map<T, Lock> locks; + + public LockService() { + this.locks = new HashMap<>(); + } + + public Lock get(T lockId) { + synchronized (this) { + Lock lock = locks.computeIfAbsent( + lockId, + id -> new ReentrantLock() + ); + + return lock; + } + } + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java new file mode 100644 index 00000000000..dc46a352fe7 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.status; + +import com.yahoo.vespa.applicationmodel.HostName; + +/** + * Registry of the suspension and host statuses for an application instance. + * + * @author oyving + * @author tonytv + * @author bakksjo + */ +public interface MutableStatusRegistry extends ReadOnlyStatusRegistry, AutoCloseable { + /** + * Sets the state for the given host. + */ + void setHostState(HostName hostName, HostStatus status); + + /** + * Sets the orchestration status for the application instance. + */ + void setApplicationInstanceStatus(ApplicationInstanceStatus applicationInstanceStatus); + + /** + * We don't want {@link AutoCloseable#close()} to throw an exception (what to do about it anyway?), + * so we override it here to strip the exception from the signature. + */ + @Override + @NoThrow + void close(); +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/NoThrow.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/NoThrow.java new file mode 100644 index 00000000000..0b465ea0846 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/NoThrow.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.status; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Used to annotate methods that do not throw Exceptions. + * They are still allowed to throw Errors, such as AssertionError + * + * TODO: move to vespajlib or find a suitable replacement + * @author tonytv + */ +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.SOURCE) +@interface NoThrow {} + diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ReadOnlyStatusRegistry.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ReadOnlyStatusRegistry.java new file mode 100644 index 00000000000..c8f9bbf208b --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ReadOnlyStatusRegistry.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.status; + +import com.yahoo.vespa.applicationmodel.HostName; + +/** + * Read-only view of statuses for the application instance and its hosts. + * + * @author oyving + * @author tonytv + * @author bakksjo + */ +public interface ReadOnlyStatusRegistry { + /** + * Gets the current state for the given host. + */ + HostStatus getHostStatus(HostName hostName); + + /** + * Gets the current status for the application instance. + */ + ApplicationInstanceStatus getApplicationInstanceStatus(); +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java new file mode 100644 index 00000000000..927b2f9fb6a --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.status; + +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; + +import java.util.Set; + +/** + * Service that can produce registries for the suspension of an application + * and its hosts. + * + * The registry classes are pr application instance. + * TODO Remove readonly registry class (replace with actual methods) - only adds complexity. + * + * @author oyving + * @author tonytv + * @author smorgrav + */ +public interface StatusService { + /** + * Returns a readable host status registry for the given application instance. No locking is involved, + * so this call will never block. However, since it is possible that mutations are going on simultaneously + * with accessing this registry, the view obtained through the returned registry must be considered to be + * possibly inconsistent snapshot values. It is not recommended that this method is used for anything other + * than monitoring, logging, debugging, etc. It should never be used for multi-step operations (e.g. + * read-then-write) where consistency is required. For those cases, use + * {@link #lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference)}. + */ + ReadOnlyStatusRegistry forApplicationInstance(ApplicationInstanceReference applicationInstanceReference); + + /** + * Returns a mutable host status registry for a locked application instance. All operations performed on + * the returned registry are executed in the context of a lock, including read operations. Hence, multi-step + * operations (e.g. read-then-write) are guaranteed to be consistent. + * + * Some limitations/caveats apply for certain implementations, and since clients of this API must be aware of + * these limitations/caveats when using those implementations, they are expressed here, at interface level + * rather than at implementation level, because the interface represents the lowest common denominator + * of guarantees offered by implementations. Specifically, it is the zookeeper-based implementation's semantics + * that "leak through" in this spec. Now, to the specific caveats: + * + * Locking this application instance only guarantees that the holder is the only one that can mutate host statuses + * for the application instance. + * It is _not_ safe to assume that there is only one entity holding the lock for a given application instance + * reference at any given time. + * + * You cannot have multiple locks in a single thread, even if they are for different application instances, + * (i.e. different HostStatusRegistry instances). (This is due to a limitation in SessionFailRetryLoop.) + * + * While read-then-write-operations are consistent (i.e. the current value doesn't change between the read + * and the write), it is possible that the lock is lost before it is explicitly released by the code. In + * this case, subsequent mutating operations will fail, but previous mutating operations are NOT rolled back. + * This may leave the registry in an inconsistent state (as judged by the client code). + */ + MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference); + + /** + * Returns all application instances that are allowed to be down. The intention is to use this + * for visualization, informational and debugging purposes. + * + * @return A Map between the application instance and its status. + */ + Set<ApplicationInstanceReference> getAllSuspendedApplications(); +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java new file mode 100644 index 00000000000..d9df5800ee8 --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java @@ -0,0 +1,373 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.status; + +import com.yahoo.container.jaxrs.annotation.Component; +import com.yahoo.log.LogLevel; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.curator.Curator; +import com.yahoo.vespa.orchestrator.OrchestratorUtil; +import org.apache.curator.SessionFailRetryLoop; +import org.apache.curator.SessionFailRetryLoop.Mode; +import org.apache.curator.SessionFailRetryLoop.SessionFailedException; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreMutex; +import org.apache.zookeeper.KeeperException.NoNodeException; +import org.apache.zookeeper.KeeperException.NodeExistsException; +import org.apache.zookeeper.data.Stat; + +import javax.annotation.concurrent.GuardedBy; +import javax.inject.Inject; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Logger; + +/** + * Stores instance suspension status and which hosts are allowed to go down in zookeeper. + * + * TODO: expiry of old application instances + * @author tonytv + */ +public class ZookeeperStatusService implements StatusService { + private static final Logger log = Logger.getLogger(ZookeeperStatusService.class.getName()); + + //For debug purposes only: Used to check that operations depending on a lock is done from a single thread, + //and that a threads doing operations actually owns a corresponding lock, + //and that a single thread only owns a single lock (across all ZookeeperStatusServices) + @GuardedBy("threadsHoldingLock") + private static final Map<Thread, ApplicationInstanceReference> threadsHoldingLock = new HashMap<>(); + + final static String HOST_STATUS_BASE_PATH = "/vespa/host-status-service"; + final static String APPLICATION_STATUS_BASE_PATH = "/vespa/application-status-service"; + + private final CuratorFramework curatorFramework; + + @Inject + public ZookeeperStatusService(@Component Curator curator) { + this(curator.framework()); + } + + /** + * Called via public constructor on directly on testing. + */ + ZookeeperStatusService(CuratorFramework curatorFramework) { + this.curatorFramework = curatorFramework; + } + + @Override + public ReadOnlyStatusRegistry forApplicationInstance( + final ApplicationInstanceReference applicationInstanceReference) { + return new ReadOnlyStatusRegistry() { + @Override + public HostStatus getHostStatus(final HostName hostName) { + return getInternalHostStatus(applicationInstanceReference, hostName); + } + + @Override + public ApplicationInstanceStatus getApplicationInstanceStatus() { + return getInternalApplicationInstanceStatus(applicationInstanceReference); + } + }; + } + + /** + * 1) locks the status service for an application instance. + * 2) fails all operations in this thread when the session is lost, + * since session loss might cause the lock to be lost. + * Since it only fails operations in this thread, + * all operations depending on a lock, including the locking itself, must be done in this thread. + * Note that since it is the thread that fails, all status operations in this thread will fail + * even if they're not supposed to be guarded by this lock + * (i.e. the request is for another applicationInstanceReference) + */ + @Override + public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly( + final ApplicationInstanceReference applicationInstanceReference) { + return lockApplicationInstance_forCurrentThreadOnly(applicationInstanceReference, 10, TimeUnit.SECONDS); + } + + @Override + public Set<ApplicationInstanceReference> getAllSuspendedApplications() { + try { + Set<ApplicationInstanceReference> resultSet = new HashSet<>(); + + // Return empty set if the base path does not exist + Stat stat = curatorFramework.checkExists().forPath(APPLICATION_STATUS_BASE_PATH); + if (stat == null) return resultSet; + + // The path exist and we may have children + for (String appRefStr : curatorFramework.getChildren().forPath(APPLICATION_STATUS_BASE_PATH)) { + ApplicationInstanceReference appRef = OrchestratorUtil.parseAppInstanceReference(appRefStr); + resultSet.add(appRef); + } + + return resultSet; + } catch (Exception e) { + log.log(LogLevel.DEBUG, "Something went wrong while listing out applications in suspend.", e); + throw new RuntimeException(e); + } + } + + MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly( + ApplicationInstanceReference applicationInstanceReference, + long timeout, + TimeUnit timeoutTimeUnit) { + + final Thread currentThread = Thread.currentThread(); + + //Due to limitations in SessionFailRetryLoop. + assertThreadDoesNotHoldLock(currentThread, + "Can't lock " + applicationInstanceReference); + + try { + SessionFailRetryLoop sessionFailRetryLoop = + curatorFramework.getZookeeperClient().newSessionFailRetryLoop(Mode.FAIL); + sessionFailRetryLoop.start(); + try { + String lockPath = applicationInstanceLockPath(applicationInstanceReference); + InterProcessSemaphoreMutex mutex = acquireMutexOrThrow(timeout, timeoutTimeUnit, lockPath); + + synchronized (threadsHoldingLock) { + threadsHoldingLock.put(currentThread, applicationInstanceReference); + } + + return new ZkMutableStatusRegistry(mutex, sessionFailRetryLoop, applicationInstanceReference, currentThread); + } catch (Throwable t) { + sessionFailRetryLoop.close(); + throw t; + } + } catch (Exception e) { + //TODO: IOException with explanation + throw new RuntimeException(e); + } + } + + private void assertThreadDoesNotHoldLock(Thread currentThread, String message) { + synchronized (threadsHoldingLock) { + if (threadsHoldingLock.containsKey(currentThread)) { + throw new AssertionError(message + ", already have a lock on " + threadsHoldingLock.get(currentThread)); + } + } + } + + private InterProcessSemaphoreMutex acquireMutexOrThrow(long timeout, TimeUnit timeoutTimeUnit, String lockPath) throws Exception { + InterProcessSemaphoreMutex mutex = new InterProcessSemaphoreMutex(curatorFramework, lockPath); + + log.log(LogLevel.DEBUG, "Waiting for lock on " + lockPath); + boolean acquired = mutex.acquire(timeout, timeoutTimeUnit); + if (!acquired) { + log.log(LogLevel.DEBUG, "Timed out waiting for lock on " + lockPath); + throw new TimeoutException(); + } + log.log(LogLevel.DEBUG, "Successfully acquired lock on " + lockPath); + return mutex; + } + + private void setHostStatus( + ApplicationInstanceReference applicationInstanceReference, + HostName hostName, + HostStatus status) { + assertThreadHoldsLock(applicationInstanceReference); + + String path = hostAllowedDownPath(applicationInstanceReference, hostName); + + try { + switch (status) { + case NO_REMARKS: + deleteNode_ignoreNoNodeException(path, + "Host already has state NO_REMARKS, path = " + path); + break; + case ALLOWED_TO_BE_DOWN: + createNode_ignoreNodeExistsException(path, + "Host already has state ALLOWED_TO_BE_DOWN, path = " + path); + } + } catch (Exception e) { + //TODO: IOException with explanation + throw new RuntimeException(e); + } + } + + private static void assertThreadHoldsLock(ApplicationInstanceReference applicationInstanceReference) { + synchronized (threadsHoldingLock) { + ApplicationInstanceReference lockedApplicationInstanceReference = + threadsHoldingLock.get(Thread.currentThread()); + + if (lockedApplicationInstanceReference == null) { + throw new AssertionError("The current thread does not own any status service locks. " + + "Application Instance = " + applicationInstanceReference); + } + + if (!lockedApplicationInstanceReference.equals(applicationInstanceReference)) { + throw new AssertionError("The current thread does not have a lock on " + + "application instance " + applicationInstanceReference + + ", but instead have a lock on " + lockedApplicationInstanceReference); + } + } + } + + private void deleteNode_ignoreNoNodeException(String path, String debugLogMessageIfNotExists) throws Exception { + try { + curatorFramework.delete().forPath(path); + } catch (NoNodeException e) { + log.log(LogLevel.DEBUG, debugLogMessageIfNotExists, e); + } + } + + private void createNode_ignoreNodeExistsException(String path, String debugLogMessageIfExists) throws Exception { + try { + curatorFramework.create() + .creatingParentsIfNeeded() + .forPath(path); + } catch (NodeExistsException e) { + log.log(LogLevel.DEBUG, debugLogMessageIfExists, e); + } + } + + //TODO: Eliminate repeated calls to getHostStatus, replace with bulk operation. + private HostStatus getInternalHostStatus(ApplicationInstanceReference applicationInstanceReference, HostName hostName) { + try { + Stat statOrNull = curatorFramework.checkExists().forPath( + hostAllowedDownPath(applicationInstanceReference, hostName)); + + return (statOrNull == null) ? HostStatus.NO_REMARKS : HostStatus.ALLOWED_TO_BE_DOWN; + } catch (Exception e) { + //TODO: IOException with explanation - Should we only catch IOExceptions or are they a special case? + throw new RuntimeException(e); + } + } + + /** Common implementation for the two internal classes that sets ApplicationInstanceStatus. */ + private ApplicationInstanceStatus getInternalApplicationInstanceStatus(ApplicationInstanceReference applicationInstanceReference) { + try { + Stat statOrNull = curatorFramework.checkExists().forPath( + applicationInstanceSuspendedPath(applicationInstanceReference)); + + return (statOrNull == null) ? ApplicationInstanceStatus.NO_REMARKS : ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private HostStatus getHostStatusWithLock( + final ApplicationInstanceReference applicationInstanceReference, + final HostName hostName) { + assertThreadHoldsLock(applicationInstanceReference); + return getInternalHostStatus(applicationInstanceReference, hostName); + } + + private static String applicationInstancePath(ApplicationInstanceReference applicationInstanceReference) { + return HOST_STATUS_BASE_PATH + '/' + + applicationInstanceReference.tenantId() + ":" + applicationInstanceReference.applicationInstanceId(); + } + + private static String hostsAllowedDownPath(ApplicationInstanceReference applicationInstanceReference) { + return applicationInstancePath(applicationInstanceReference) + "/hosts-allowed-down"; + } + + private static String applicationInstanceLockPath(ApplicationInstanceReference applicationInstanceReference) { + return applicationInstancePath(applicationInstanceReference) + "/lock"; + } + + private String applicationInstanceSuspendedPath(ApplicationInstanceReference applicationInstanceReference) { + return APPLICATION_STATUS_BASE_PATH + "/" + OrchestratorUtil.toRestApiFormat(applicationInstanceReference); + } + + private static String hostAllowedDownPath(ApplicationInstanceReference applicationInstanceReference, HostName hostname) { + return hostsAllowedDownPath(applicationInstanceReference) + '/' + hostname.s(); + } + + private class ZkMutableStatusRegistry implements MutableStatusRegistry { + private final InterProcessSemaphoreMutex mutex; + private final SessionFailRetryLoop sessionFailRetryLoop; + private final ApplicationInstanceReference applicationInstanceReference; + private final Thread lockingThread; + + public ZkMutableStatusRegistry( + InterProcessSemaphoreMutex mutex, + SessionFailRetryLoop sessionFailRetryLoop, + ApplicationInstanceReference applicationInstanceReference, + Thread lockingThread) { + + this.mutex = mutex; + this.sessionFailRetryLoop = sessionFailRetryLoop; + this.applicationInstanceReference = applicationInstanceReference; + this.lockingThread = lockingThread; + } + + @Override + public void setHostState(final HostName hostName, final HostStatus status) { + setHostStatus(applicationInstanceReference, hostName, status); + } + + @Override + public void setApplicationInstanceStatus(ApplicationInstanceStatus applicationInstanceStatus) { + assertThreadHoldsLock(applicationInstanceReference); + + String path = applicationInstanceSuspendedPath(applicationInstanceReference); + + try { + switch (applicationInstanceStatus) { + case NO_REMARKS: + deleteNode_ignoreNoNodeException(path, + "Instance is already in state NO_REMARKS, path = " + path); + break; + case ALLOWED_TO_BE_DOWN: + createNode_ignoreNodeExistsException(path, + "Instance is already in state ALLOWED_TO_BE_DOWN, path = " + path); + break; + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public HostStatus getHostStatus(final HostName hostName) { + return getHostStatusWithLock(applicationInstanceReference, hostName); + } + + @Override + public ApplicationInstanceStatus getApplicationInstanceStatus() { + return getInternalApplicationInstanceStatus(applicationInstanceReference); + } + + @Override + @NoThrow + public void close() { + synchronized (threadsHoldingLock) { + threadsHoldingLock.remove(lockingThread, applicationInstanceReference); + } + + try { + mutex.release(); + } catch (Exception e) { + if (e.getCause() instanceof SessionFailedException) { + log.log(LogLevel.DEBUG, "Session expired, mutex should be freed automatically", e); + } else { + //Failing to unlock the mutex should not fail the request, + //since the status database has already been updated at this point. + log.log(LogLevel.WARNING, "Failed unlocking application instance " + applicationInstanceReference, e); + } + } + + //Similar precondition checked in sessionFailRetryLoop.close, + //but this has more useful debug output. + if (lockingThread != Thread.currentThread()) { + throw new AssertionError("LockHandle should only be used from a single thread. " + + "Application instance = " + applicationInstanceReference + + " Locking thread = " + lockingThread + + " Current thread = " + Thread.currentThread()); + } + + try { + sessionFailRetryLoop.close(); + } catch (Exception e) { + log.log(LogLevel.ERROR, "Failed closing SessionRetryLoop", e); + } + } + } +} diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/package-info.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/package-info.java new file mode 100644 index 00000000000..55dfdf9573d --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.orchestrator.status; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/orchestrator/src/main/resources/configdefinitions/orchestrator.def b/orchestrator/src/main/resources/configdefinitions/orchestrator.def new file mode 100644 index 00000000000..4d569e0f55c --- /dev/null +++ b/orchestrator/src/main/resources/configdefinitions/orchestrator.def @@ -0,0 +1,5 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.orchestrator + +# TODO: Change the default to actual latency in real setup. +serviceMonitorConvergenceLatencySeconds int default=0 diff --git a/orchestrator/src/test/application/services.xml b/orchestrator/src/test/application/services.xml new file mode 100644 index 00000000000..8c68f7d0b50 --- /dev/null +++ b/orchestrator/src/test/application/services.xml @@ -0,0 +1,16 @@ +<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +<services> + <jdisc version="1.0" jetty="true"> + <config name="container.handler.threadpool"> + <maxthreads>10</maxthreads> + </config> + <component id="com.yahoo.vespa.orchestrator.status.InMemoryStatusService" bundle="orchestrator" /> + <component id="com.yahoo.vespa.orchestrator.DummyInstanceLookupService" bundle="orchestrator" /> + <component id="com.yahoo.vespa.orchestrator.OrchestratorImpl" bundle="orchestrator" /> + <component id="com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock" bundle="orchestrator" /> + + <rest-api path="orchestrator" jersey2="true"> + <components bundle="orchestrator" /> + </rest-api> + </jdisc> +</services> diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyInstanceLookupService.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyInstanceLookupService.java new file mode 100644 index 00000000000..e1b73c1fe65 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyInstanceLookupService.java @@ -0,0 +1,166 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + + +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceCluster; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.applicationmodel.TenantId; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; + +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * A hardcoded set of applications with one storage cluster with two nodes each. + * + * @author oyving + * @author smorgrav + */ +public class DummyInstanceLookupService implements InstanceLookupService { + + public static final HostName TEST1_HOST_NAME = new HostName("test1.prod.utpoia-1.vespahosted.ut1.yahoo.com"); + public static final HostName TEST3_HOST_NAME = new HostName("test3.prod.utpoia-1.vespahosted.ut1.yahoo.com"); + public static final HostName TEST6_HOST_NAME = new HostName("test6.prod.us-east-1.vespahosted.ne1.yahoo.com"); + + private static final Set<ApplicationInstance<ServiceMonitorStatus>> apps = new HashSet<>(); + + + static { + apps.add(new ApplicationInstance<>( + new TenantId("test-tenant-id"), + new ApplicationInstanceId("application:instance"), + TestUtil.makeServiceClusterSet( + new ServiceCluster<>( + new ClusterId("test-cluster-id-1"), + new ServiceType("storagenode"), + TestUtil.makeServiceInstanceSet( + new ServiceInstance<>( + new ConfigId("storage/storage/1"), + TEST1_HOST_NAME, + ServiceMonitorStatus.UP), + new ServiceInstance<>( + new ConfigId("storage/storage/2"), + new HostName("test2.prod.utpoia-1.vespahosted.ut1.yahoo.com"), + ServiceMonitorStatus.UP))), + new ServiceCluster<>( + new ClusterId("clustercontroller"), + new ServiceType("container-clustercontroller"), + TestUtil.makeServiceInstanceSet( + new ServiceInstance<>( + new ConfigId("clustercontroller-1"), + new HostName("myclustercontroller.prod.utopia-1.vespahosted.ut1.yahoo.com"), + ServiceMonitorStatus.UP))) + + ) + )); + + apps.add(new ApplicationInstance<>( + new TenantId("mediasearch"), + new ApplicationInstanceId("imagesearch:default"), + TestUtil.makeServiceClusterSet( + new ServiceCluster<>( + new ClusterId("image"), + new ServiceType("storagenode"), + TestUtil.makeServiceInstanceSet( + new ServiceInstance<>( + new ConfigId("storage/storage/3"), + TEST3_HOST_NAME, + ServiceMonitorStatus.UP), + new ServiceInstance<>( + new ConfigId("storage/storage/4"), + new HostName("test4.prod.utpoia-1.vespahosted.ut1.yahoo.com"), + ServiceMonitorStatus.UP))), + new ServiceCluster<>( + new ClusterId("clustercontroller"), + new ServiceType("container-clustercontroller"), + TestUtil.makeServiceInstanceSet( + new ServiceInstance<>( + new ConfigId("clustercontroller-1"), + new HostName("myclustercontroller2.prod.utopia-1.vespahosted.ut1.yahoo.com"), + ServiceMonitorStatus.UP))) + ) + ) + ); + + apps.add(new ApplicationInstance<>( + new TenantId("tenant-id-3"), + new ApplicationInstanceId("application-instance-3:default"), + TestUtil.makeServiceClusterSet( + new ServiceCluster<>( + new ClusterId("cluster-id-3"), + new ServiceType("storagenode"), + TestUtil.makeServiceInstanceSet( + new ServiceInstance<>( + new ConfigId("storage/storage/1"), + TEST6_HOST_NAME, + ServiceMonitorStatus.UP), + new ServiceInstance<>( + new ConfigId("storage/storage/4"), + new HostName("test4.prod.utpoia-1.vespahosted.ut1.yahoo.com"), + ServiceMonitorStatus.UP))), + new ServiceCluster<>( + new ClusterId("clustercontroller"), + new ServiceType("container-clustercontroller"), + TestUtil.makeServiceInstanceSet( + new ServiceInstance<>( + new ConfigId("clustercontroller-1"), + new HostName("myclustercontroller3.prod.utopia-1.vespahosted.ut1.yahoo.com"), + ServiceMonitorStatus.UP))) + ) + )); + } + + + @Override + public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceById( + final ApplicationInstanceReference applicationInstanceReference) { + for (ApplicationInstance<ServiceMonitorStatus> app : apps) { + if (app.reference().equals(applicationInstanceReference)) return Optional.of(app); + } + return Optional.empty(); + } + + @Override + public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceByHost(HostName hostName) { + for (ApplicationInstance<ServiceMonitorStatus> app : apps) { + for (ServiceCluster<ServiceMonitorStatus> cluster : app.serviceClusters()) { + for (ServiceInstance<ServiceMonitorStatus> service : cluster.serviceInstances()) { + if (hostName.equals(service.hostName())) return Optional.of(app); + } + } + } + return Optional.empty(); + } + + @Override + public Set<ApplicationInstanceReference> knownInstances() { + return apps.stream().map(a -> + new ApplicationInstanceReference(a.tenantId(),a.applicationInstanceId())).collect(Collectors.toSet()); + + } + + public static Set<HostName> getContentHosts(ApplicationInstanceReference appRef) { + Set<HostName> hosts = apps.stream() + .filter(application -> application.reference().equals(appRef)) + .flatMap(appReference -> appReference.serviceClusters().stream()) + .filter(VespaModelUtil::isContent) + .flatMap(serviceCluster -> serviceCluster.serviceInstances().stream()) + .map(ServiceInstance::hostName) + .collect(Collectors.toSet()); + + return hosts; + } + + public static Set<ApplicationInstance<ServiceMonitorStatus>> getApplications() { + return apps; + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java new file mode 100644 index 00000000000..5c1d9bd45be --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java @@ -0,0 +1,300 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock; +import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.orchestrator.status.InMemoryStatusService; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.InOrder; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import static com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN; +import static com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus.NO_REMARKS; +import static org.hamcrest.collection.IsEmptyCollection.empty; +import static org.hamcrest.core.Is.is; +import static org.hamcrest.core.IsCollectionContaining.hasItem; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.inOrder; +import static org.mockito.Mockito.spy; + +/** + * Test Orchestrator with a mock backend (the InMemoryStatusService) + * + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public class OrchestratorImplTest { + + private ApplicationId app1; + private ApplicationId app2; + private HostName app1_host1; + + private OrchestratorImpl orchestrator; + private ClusterControllerClientFactoryMock clustercontroller; + + @Before + public void setUp() throws Exception { + // Extract applications and hosts from dummy instance lookup service + Iterator<ApplicationInstance<ServiceMonitorStatus>> iterator = DummyInstanceLookupService.getApplications().iterator(); + ApplicationInstanceReference app1_ref = iterator.next().reference(); + app1 = OrchestratorUtil.toApplicationId(app1_ref); + app1_host1 = DummyInstanceLookupService.getContentHosts(app1_ref).iterator().next(); + app2 = OrchestratorUtil.toApplicationId(iterator.next().reference()); + + clustercontroller = new ClusterControllerClientFactoryMock(); + orchestrator = new OrchestratorImpl( + clustercontroller, + new InMemoryStatusService(), + new OrchestratorConfig(new OrchestratorConfig.Builder()), + new DummyInstanceLookupService()); + + clustercontroller.setAllDummyNodesAsUp(); + } + + @After + public void tearDown() throws Exception { + orchestrator = null; + clustercontroller = null; + } + + @Test + public void application_has_initially_no_remarks() throws Exception { + assertThat(orchestrator.getApplicationInstanceStatus(app1), is(NO_REMARKS)); + } + + @Test + public void application_can_be_set_in_suspend() throws Exception { + orchestrator.suspend(app1); + assertThat(orchestrator.getApplicationInstanceStatus(app1), is(ALLOWED_TO_BE_DOWN)); + } + + @Test + public void application_can_be_removed_from_suspend() throws Exception { + orchestrator.suspend(app1); + orchestrator.resume(app1); + assertThat(orchestrator.getApplicationInstanceStatus(app1), is(NO_REMARKS)); + } + + @Test + public void appliations_list_returns_empty_initially() throws Exception { + assertThat(orchestrator.getAllSuspendedApplications(), is(empty())); + } + + @Test + public void appliations_list_returns_suspended_apps() throws Exception { + // One suspended app + orchestrator.suspend(app1); + assertThat(orchestrator.getAllSuspendedApplications().size(), is(1)); + assertThat(orchestrator.getAllSuspendedApplications(), hasItem(app1)); + + // Two suspended apps + orchestrator.suspend(app2); + assertThat(orchestrator.getAllSuspendedApplications().size(), is(2)); + assertThat(orchestrator.getAllSuspendedApplications(), hasItem(app1)); + assertThat(orchestrator.getAllSuspendedApplications(), hasItem(app2)); + + // Back to one when resetting one app to no_remarks + orchestrator.resume(app1); + assertThat(orchestrator.getAllSuspendedApplications().size(), is(1)); + assertThat(orchestrator.getAllSuspendedApplications(), hasItem(app2)); + } + + + @Test + public void application_operations_are_idempotent() throws Exception { + // Two suspends + orchestrator.suspend(app1); + orchestrator.suspend(app1); + assertThat(orchestrator.getApplicationInstanceStatus(app1), is(ALLOWED_TO_BE_DOWN)); + assertThat(orchestrator.getApplicationInstanceStatus(app2), is(NO_REMARKS)); + + // Three no_remarks + orchestrator.resume(app1); + orchestrator.resume(app1); + orchestrator.resume(app1); + assertThat(orchestrator.getApplicationInstanceStatus(app1), is(NO_REMARKS)); + assertThat(orchestrator.getApplicationInstanceStatus(app2), is(NO_REMARKS)); + + // Two suspends and two on two applications interleaved + orchestrator.suspend(app2); + orchestrator.resume(app1); + orchestrator.suspend(app2); + orchestrator.resume(app1); + assertThat(orchestrator.getApplicationInstanceStatus(app1), is(NO_REMARKS)); + assertThat(orchestrator.getApplicationInstanceStatus(app2), is(ALLOWED_TO_BE_DOWN)); + } + + + @Test + public void application_suspend_sets_application_nodes_in_maintenance_and_allowed_to_be_down() throws Exception { + // Pre condition + assertEquals(NO_REMARKS, orchestrator.getApplicationInstanceStatus(app1)); + assertEquals(HostStatus.NO_REMARKS, orchestrator.getNodeStatus(app1_host1)); + assertFalse(isInMaintenance(app1, app1_host1)); + + orchestrator.suspend(app1); + + assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1)); + assertTrue(isInMaintenance(app1, app1_host1)); + } + + @Test + public void node_suspend_while_app_is_resumed_set_allowed_to_be_down_and_set_it_in_maintenance() throws Exception { + // Pre condition + assertEquals(NO_REMARKS, orchestrator.getApplicationInstanceStatus(app1)); + assertEquals(HostStatus.NO_REMARKS, orchestrator.getNodeStatus(app1_host1)); + assertFalse(isInMaintenance(app1, app1_host1)); + + orchestrator.suspend(app1_host1); + + assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1)); + assertTrue(isInMaintenance(app1, app1_host1)); + } + + @Test + public void node_suspend_while_app_is_suspended_does_nothing() throws Exception { + // Pre condition + orchestrator.suspend(app1); + assertEquals(ALLOWED_TO_BE_DOWN, orchestrator.getApplicationInstanceStatus(app1)); + assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1)); + assertTrue(isInMaintenance(app1, app1_host1)); + + orchestrator.suspend(app1_host1); + + // Should not change anything + assertEquals(ALLOWED_TO_BE_DOWN, orchestrator.getApplicationInstanceStatus(app1)); + assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1)); + assertTrue(isInMaintenance(app1, app1_host1)); + } + + @Test + public void node_resume_after_app_is_resumed_removes_allowed_be_down_and_set_it_up() throws Exception { + // Pre condition + orchestrator.suspend(app1); + assertEquals(ALLOWED_TO_BE_DOWN, orchestrator.getApplicationInstanceStatus(app1)); + assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1)); + assertTrue(isInMaintenance(app1, app1_host1)); + + orchestrator.resume(app1); + orchestrator.resume(app1_host1); + + assertEquals(HostStatus.NO_REMARKS, orchestrator.getNodeStatus(app1_host1)); + assertFalse(isInMaintenance(app1, app1_host1)); + } + + @Test + public void node_resume_while_app_is_suspended_does_nothing() throws Exception { + orchestrator.suspend(app1_host1); + orchestrator.suspend(app1); + + orchestrator.resume(app1_host1); + + assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1)); + assertTrue(isInMaintenance(app1, app1_host1)); + } + + @Test + public void applicationReferenceHasTenantAndAppInstance() { + InstanceLookupService service = new DummyInstanceLookupService(); + String applicationInstanceId = service.findInstanceByHost(DummyInstanceLookupService.TEST1_HOST_NAME).get() + .reference().toString(); + assertEquals("test-tenant-id:application:instance", applicationInstanceId); + } + + @Test + public void sortHostNamesForSuspend() throws Exception { + HostName parentHostName = new HostName("parentHostName"); + List<HostName> expectedOrder = Arrays.asList( + DummyInstanceLookupService.TEST3_HOST_NAME, + DummyInstanceLookupService.TEST1_HOST_NAME); + + assertEquals(expectedOrder, orchestrator.sortHostNamesForSuspend(Arrays.asList( + DummyInstanceLookupService.TEST1_HOST_NAME, + DummyInstanceLookupService.TEST3_HOST_NAME))); + + assertEquals(expectedOrder, orchestrator.sortHostNamesForSuspend(Arrays.asList( + DummyInstanceLookupService.TEST3_HOST_NAME, + DummyInstanceLookupService.TEST1_HOST_NAME))); + } + + @Test + public void rollbackWorks() throws Exception { + // A spy is preferential because suspendAll() relies on delegating the hard work to suspend() and resume(). + OrchestratorImpl orchestrator = spy(this.orchestrator); + + doNothing().when(orchestrator).suspend(DummyInstanceLookupService.TEST3_HOST_NAME); + + Throwable supensionFailure = new HostStateChangeDeniedException( + DummyInstanceLookupService.TEST6_HOST_NAME, + "some-constraint", + new ServiceType("foo"), + "error message"); + doThrow(supensionFailure).when(orchestrator).suspend(DummyInstanceLookupService.TEST1_HOST_NAME); + + doThrow(new HostStateChangeDeniedException(DummyInstanceLookupService.TEST1_HOST_NAME, "foo1-constraint", new ServiceType("foo1-service"), "foo1-message")) + .when(orchestrator).resume(DummyInstanceLookupService.TEST1_HOST_NAME); + doNothing().when(orchestrator).resume(DummyInstanceLookupService.TEST6_HOST_NAME); + doNothing().when(orchestrator).resume(DummyInstanceLookupService.TEST3_HOST_NAME); + + try { + orchestrator.suspendAll( + new HostName("parentHostname"), + Arrays.asList( + DummyInstanceLookupService.TEST1_HOST_NAME, + DummyInstanceLookupService.TEST3_HOST_NAME, + DummyInstanceLookupService.TEST6_HOST_NAME)); + fail(); + } catch (BatchHostStateChangeDeniedException e) { + assertEquals(e.getSuppressed().length, 1); + assertEquals("Failed to suspend [test3.prod.utpoia-1.vespahosted.ut1.yahoo.com, " + + "test6.prod.us-east-1.vespahosted.ne1.yahoo.com, test1.prod.utpoia-1.vespahosted.ut1.yahoo.com] " + + "with parent host parentHostname: Changing the state of host " + + "test6.prod.us-east-1.vespahosted.ne1.yahoo.com would violate some-constraint for " + + "service type foo: error message; " + + "With suppressed throwable com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException: " + + "Changing the state of host test1.prod.utpoia-1.vespahosted.ut1.yahoo.com would violate " + + "foo1-constraint for service type foo1-service: foo1-message", e.getMessage()); + } + + InOrder order = inOrder(orchestrator); + order.verify(orchestrator).suspend(DummyInstanceLookupService.TEST3_HOST_NAME); + order.verify(orchestrator).suspend(DummyInstanceLookupService.TEST6_HOST_NAME); + + // As of 2016-06-07: + // TEST1_HOST_NAME: test-tenant-id:application:instance + // TEST3_HOST_NAME: mediasearch:imagesearch:default + // TEST6_HOST_NAME: tenant-id-3:application-instance-3:default + // Meaning the order is 3, 6, then 1. For rollback/resume the order is reversed. + order.verify(orchestrator).resume(DummyInstanceLookupService.TEST1_HOST_NAME); + order.verify(orchestrator).resume(DummyInstanceLookupService.TEST6_HOST_NAME); + order.verify(orchestrator).resume(DummyInstanceLookupService.TEST3_HOST_NAME); + order.verifyNoMoreInteractions(); + } + + private boolean isInMaintenance(ApplicationId appId, HostName hostName) throws ApplicationIdNotFoundException { + for (ApplicationInstance<ServiceMonitorStatus> app : DummyInstanceLookupService.getApplications()) { + if (app.reference().equals(OrchestratorUtil.toApplicationInstanceReference(appId))) { + return clustercontroller.isInMaintenance(app, hostName); + } + } + throw new ApplicationIdNotFoundException(); + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorUtilTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorUtilTest.java new file mode 100644 index 00000000000..0626bf72e60 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorUtilTest.java @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.InstanceName; +import com.yahoo.config.provision.TenantName; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.TenantId; +import org.junit.Assert; +import org.junit.Test; + +/** + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public class OrchestratorUtilTest { + + private static final ApplicationId APPID_1 = ApplicationId.from( + TenantName.from("mediasearch"), + ApplicationName.from("tumblr-search"), + InstanceName.defaultName()); + + private static final ApplicationInstanceReference APPREF_1 = new ApplicationInstanceReference( + new TenantId("test-tenant"), + new ApplicationInstanceId("test-application:test-environment:test-region:test-instance-key")); + + /** + * Here we don't care how the internal of the different application + * id/reference look like as long as we get back to exactly where we + * started from a round trip. I.e I'm not testing validity of the + * different representations. + */ + @Test + public void applicationid_conversion_are_symmetric() throws Exception { + + // From appId to appRef and back + ApplicationInstanceReference appRef = OrchestratorUtil.toApplicationInstanceReference(APPID_1); + ApplicationId appIdRoundTrip = OrchestratorUtil.toApplicationId(appRef); + + Assert.assertEquals(APPID_1, appIdRoundTrip); + + // From appRef to appId and back + ApplicationId appId = OrchestratorUtil.toApplicationId(APPREF_1); + ApplicationInstanceReference appRefRoundTrip = OrchestratorUtil.toApplicationInstanceReference(appId); + + Assert.assertEquals(APPREF_1, appRefRoundTrip); + } +}
\ No newline at end of file diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestIds.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestIds.java new file mode 100644 index 00000000000..e9d8b498f32 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestIds.java @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.TenantId; + +/** + * @author tonytv + */ +public class TestIds { + public static final ApplicationInstanceReference APPLICATION_INSTANCE_REFERENCE = + new ApplicationInstanceReference( + new TenantId("test-tenant"), + new ApplicationInstanceId("test-application:test-environment:test-region:test-instance-key")); + + public static final ApplicationInstanceReference APPLICATION_INSTANCE_REFERENCE2 = + new ApplicationInstanceReference( + new TenantId("test-tenant2"), + new ApplicationInstanceId("test-application2:test-environment:test-region:test-instance-key")); + + public static final HostName HOST_NAME1 = new HostName("host1.test.corp.yahoo.com"); +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java new file mode 100644 index 00000000000..244ddfc858d --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.ServiceCluster; +import com.yahoo.vespa.applicationmodel.ServiceInstance; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +/** + * Utility methods for creating test setups. + * + * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a> + */ +public class TestUtil { + @SafeVarargs + public static <S> Set<ServiceInstance<S>> makeServiceInstanceSet( + final ServiceInstance<S>... serviceInstances) { + return new HashSet<>(Arrays.asList(serviceInstances)); + } + + @SafeVarargs + public static <S> Set<ServiceCluster<S>> makeServiceClusterSet( + final ServiceCluster<S>... serviceClusters) { + return new HashSet<>(Arrays.asList(serviceClusters)); + } + + public static ConfigId storageNodeConfigId(int index) { + return new ConfigId("storage/storage/" + index); + } + + public static ConfigId clusterControllerConfigId(int index) { + return new ConfigId("admin/cluster-controllers/" + index); + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/VespaModelUtilTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/VespaModelUtilTest.java new file mode 100644 index 00000000000..ab533528cd4 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/VespaModelUtilTest.java @@ -0,0 +1,223 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator; + +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceCluster; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.applicationmodel.TenantId; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; +import org.junit.Test; + +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; + +import static com.google.common.collect.Sets.newHashSet; +import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceClusterSet; +import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceInstanceSet; +import static junit.framework.TestCase.assertFalse; +import static junit.framework.TestCase.assertTrue; +import static org.fest.assertions.Assertions.assertThat; + +/** + * @author hakon + */ +public class VespaModelUtilTest { + // Cluster Controller Service Cluster + + private static final ClusterId CONTENT_CLUSTER_ID = new ClusterId("content-cluster-0"); + + public static final HostName controller0Host = new HostName("controller-0"); + + private static final ServiceInstance<ServiceMonitorStatus> controller0 = new ServiceInstance<>( + TestUtil.clusterControllerConfigId(0), + controller0Host, + ServiceMonitorStatus.UP); + private static final ServiceInstance<ServiceMonitorStatus> controller1 = new ServiceInstance<>( + TestUtil.clusterControllerConfigId(1), + new HostName("controller-1"), + ServiceMonitorStatus.UP); + + private static final ServiceCluster<ServiceMonitorStatus> controllerCluster = + new ServiceCluster<>( + new ClusterId(CONTENT_CLUSTER_ID.s() + "-controller"), + VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE, + makeServiceInstanceSet(controller1, controller0)); + + // Distributor Service Cluster + + private static final ServiceInstance<ServiceMonitorStatus> distributor0 = new ServiceInstance<>( + new ConfigId("distributor-config-id"), + new HostName("distributor-0"), + ServiceMonitorStatus.UP); + + + private static final ServiceCluster<ServiceMonitorStatus> distributorCluster = + new ServiceCluster<>( + CONTENT_CLUSTER_ID, + VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE, + makeServiceInstanceSet(distributor0)); + + // Storage Node Service Cluster + + public static final HostName storage0Host = new HostName("storage-0"); + private static final ServiceInstance<ServiceMonitorStatus> storage0 = new ServiceInstance<>( + new ConfigId("storage-config-id"), + storage0Host, + ServiceMonitorStatus.UP); + + private static final ServiceCluster<ServiceMonitorStatus> storageCluster = + new ServiceCluster<>( + CONTENT_CLUSTER_ID, + VespaModelUtil.STORAGENODE_SERVICE_TYPE, + makeServiceInstanceSet(storage0)); + + // Secondary Distributor Service Cluster + + private static final ServiceInstance<ServiceMonitorStatus> secondaryDistributor0 = new ServiceInstance<>( + new ConfigId("secondary-distributor-config-id"), + new HostName("secondary-distributor-0"), + ServiceMonitorStatus.UP); + + private static final ClusterId SECONDARY_CONTENT_CLUSTER_ID = new ClusterId("secondary-content-cluster-0"); + private static final ServiceCluster<ServiceMonitorStatus> secondaryDistributorCluster = + new ServiceCluster<>( + SECONDARY_CONTENT_CLUSTER_ID, + VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE, + makeServiceInstanceSet(secondaryDistributor0)); + + // Secondary Storage Node Service Cluster + + public static final HostName secondaryStorage0Host = new HostName("secondary-storage-0"); + private static final ServiceInstance<ServiceMonitorStatus> secondaryStorage0 = new ServiceInstance<>( + new ConfigId("secondary-storage-config-id"), + secondaryStorage0Host, + ServiceMonitorStatus.UP); + + private static final ServiceCluster<ServiceMonitorStatus> secondaryStorageCluster = + new ServiceCluster<>( + SECONDARY_CONTENT_CLUSTER_ID, + VespaModelUtil.STORAGENODE_SERVICE_TYPE, + makeServiceInstanceSet(secondaryStorage0)); + + // The Application Instance + + public static final ApplicationInstance<ServiceMonitorStatus> application = + new ApplicationInstance<>( + new TenantId("tenant-0"), + new ApplicationInstanceId("application-0"), + makeServiceClusterSet( + controllerCluster, + distributorCluster, + storageCluster, + secondaryDistributorCluster, + secondaryStorageCluster)); + + private ServiceCluster<?> createServiceCluster(ServiceType serviceType) { + return new ServiceCluster<ServiceMonitorStatus>( + new ClusterId("cluster-id"), + serviceType, + new HashSet<>()); + } + + @Test + public void verifyControllerClusterIsRecognized() { + ServiceCluster<?> cluster = createServiceCluster(VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE); + assertTrue(VespaModelUtil.isClusterController(cluster)); + } + + @Test + public void verifyNonControllerClusterIsNotRecognized() { + ServiceCluster<?> cluster = createServiceCluster(new ServiceType("foo")); + assertFalse(VespaModelUtil.isClusterController(cluster)); + } + + @Test + public void verifyStorageClusterIsRecognized() { + ServiceCluster<?> cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE); + assertTrue(VespaModelUtil.isStorage(cluster)); + cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE); + assertTrue(VespaModelUtil.isStorage(cluster)); + } + + @Test + public void verifyNonStorageClusterIsNotRecognized() { + ServiceCluster<?> cluster = createServiceCluster(new ServiceType("foo")); + assertFalse(VespaModelUtil.isStorage(cluster)); + } + + @Test + public void verifyContentClusterIsRecognized() { + ServiceCluster<?> cluster = createServiceCluster(VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE); + assertTrue(VespaModelUtil.isContent(cluster)); + cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE); + assertTrue(VespaModelUtil.isContent(cluster)); + cluster = createServiceCluster(VespaModelUtil.SEARCHNODE_SERVICE_TYPE); + assertTrue(VespaModelUtil.isContent(cluster)); + } + + @Test + public void verifyNonContentClusterIsNotRecognized() { + ServiceCluster<?> cluster = createServiceCluster(new ServiceType("foo")); + assertFalse(VespaModelUtil.isContent(cluster)); + } + + @Test + public void testGettingClusterControllerInstances() { + Set<ServiceInstance<?>> controllers = + new HashSet<>(VespaModelUtil.getClusterControllerInstances(application, CONTENT_CLUSTER_ID)); + Set<ServiceInstance<ServiceMonitorStatus>> expectedControllers = newHashSet(controller0, controller1); + + assertThat(controllers).isEqualTo(expectedControllers); + } + + @Test + public void testGetControllerHostName() { + HostName host = VespaModelUtil.getControllerHostName(application, CONTENT_CLUSTER_ID); + assertThat(host).isEqualTo(controller0Host); + } + + @Test + public void testGetContentClusterName() { + ClusterId contentClusterName = VespaModelUtil.getContentClusterName(application, distributor0.hostName()); + assertThat(CONTENT_CLUSTER_ID).isEqualTo(contentClusterName); + } + + @Test + public void testGetContentClusterNameForSecondaryContentCluster() { + ClusterId contentClusterName = VespaModelUtil.getContentClusterName(application, secondaryDistributor0.hostName()); + assertThat(SECONDARY_CONTENT_CLUSTER_ID).isEqualTo(contentClusterName); + } + + @Test + public void testGetStorageNodeAtHost() { + Optional<ServiceInstance<ServiceMonitorStatus>> service = + VespaModelUtil.getStorageNodeAtHost(application, storage0Host); + assertTrue(service.isPresent()); + assertThat(service.get()).isEqualTo(storage0); + } + + @Test + public void testGetStorageNodeAtHostWithUnknownHost() { + Optional<ServiceInstance<ServiceMonitorStatus>> service = + VespaModelUtil.getStorageNodeAtHost(application, new HostName("storage-1")); + assertFalse(service.isPresent()); + } + + @Test + public void testGetClusterControllerIndex() { + ConfigId configId = new ConfigId("admin/cluster-controllers/2"); + assertThat(VespaModelUtil.getClusterControllerIndex(configId)).isEqualTo(2); + } + + @Test + public void testGetStorageNodeIndex() { + ConfigId configId = TestUtil.storageNodeConfigId(3); + assertThat(VespaModelUtil.getStorageNodeIndex(configId)).isEqualTo(3); + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java new file mode 100644 index 00000000000..a682bfe8856 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.orchestrator.DummyInstanceLookupService; +import com.yahoo.vespa.orchestrator.VespaModelUtil; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Mock implementation of ClusterControllerClient + * <p> + * + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public class ClusterControllerClientFactoryMock implements ClusterControllerClientFactory { + Map<String, ClusterControllerState> nodes = new HashMap<>(); + + public boolean isInMaintenance(ApplicationInstance<ServiceMonitorStatus> appInstance, HostName hostName) { + try { + ClusterId clusterName = VespaModelUtil.getContentClusterName(appInstance, hostName); + int storageNodeIndex = VespaModelUtil.getStorageNodeIndex(appInstance, hostName); + String globalMapKey = clusterName.s() + storageNodeIndex; + return nodes.getOrDefault(globalMapKey, ClusterControllerState.UP) == ClusterControllerState.MAINTENANCE; + } catch (Exception e) { + //Catch all - meant to catch cases where the node is not part of a storage cluster + return false; + } + } + + public void setAllDummyNodesAsUp() { + for (ApplicationInstance<ServiceMonitorStatus> app : DummyInstanceLookupService.getApplications()) { + Set<HostName> hosts = DummyInstanceLookupService.getContentHosts(app.reference()); + for (HostName host : hosts) { + ClusterId clusterName = VespaModelUtil.getContentClusterName(app, host); + int storageNodeIndex = VespaModelUtil.getStorageNodeIndex(app, host); + String globalMapKey = clusterName.s() + storageNodeIndex; + nodes.put(globalMapKey, ClusterControllerState.UP); + } + } + } + + @Override + public ClusterControllerClient createClient(Collection<? extends ServiceInstance<?>> clusterControllers, String clusterName) { + return new ClusterControllerClient() { + + @Override + public ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerState wantedState) throws IOException { + nodes.put(clusterName + storageNodeIndex, wantedState); + return new ClusterControllerStateResponse(true, "Yes"); + } + + @Override + public ClusterControllerStateResponse setApplicationState(ClusterControllerState wantedState) throws IOException { + Set<String> keyCopy = new HashSet<>(nodes.keySet()); + for (String s : keyCopy) { + if (s.startsWith(clusterName)) { + nodes.put(s, wantedState); + } + } + return new ClusterControllerStateResponse(true, "It works"); + } + }; + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java new file mode 100644 index 00000000000..68df4dce393 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.yahoo.vespa.jaxrs.client.JaxRsStrategy; +import com.yahoo.vespa.jaxrs.client.LocalPassThroughJaxRsStrategy; +import org.junit.Test; + +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +public class ClusterControllerClientTest { + private static final String CLUSTER_NAME = "clusterName"; + private static final int STORAGE_NODE_INDEX = 0; + + @Test + public void correctParametersArePassedThrough() throws Exception { + final ClusterControllerJaxRsApi clusterControllerApi = mock(ClusterControllerJaxRsApi.class); + final JaxRsStrategy<ClusterControllerJaxRsApi> strategyMock = new LocalPassThroughJaxRsStrategy<>(clusterControllerApi); + final ClusterControllerClient clusterControllerClient = new ClusterControllerClientImpl( + strategyMock, + CLUSTER_NAME); + + final ClusterControllerState wantedState = ClusterControllerState.MAINTENANCE; + + clusterControllerClient.setNodeState(STORAGE_NODE_INDEX, wantedState); + + final ClusterControllerStateRequest expectedNodeStateRequest = new ClusterControllerStateRequest( + new ClusterControllerStateRequest.State(wantedState, ClusterControllerClientImpl.REQUEST_REASON), + ClusterControllerStateRequest.Condition.SAFE); + verify(clusterControllerApi, times(1)) + .setNodeState( + eq(CLUSTER_NAME), + eq(STORAGE_NODE_INDEX), + eq(expectedNodeStateRequest)); + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java new file mode 100644 index 00000000000..cc217885047 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java @@ -0,0 +1,117 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.controller; + +import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory; +import com.yahoo.vespa.orchestrator.TestUtil; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; +import org.junit.Before; +import org.junit.Test; + +import java.util.Collection; +import java.util.Collections; + +import static org.hamcrest.CoreMatchers.anyOf; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.anyString; +import static org.mockito.Matchers.argThat; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class SingleInstanceClusterControllerClientFactoryTest { + private static final int PORT = SingleInstanceClusterControllerClientFactory.CLUSTERCONTROLLER_HARDCODED_PORT; + private static final String PATH = SingleInstanceClusterControllerClientFactory.CLUSTERCONTROLLER_API_PATH; + + private static final HostName HOST_NAME_1 = new HostName("host1"); + private static final HostName HOST_NAME_2 = new HostName("host2"); + private static final HostName HOST_NAME_3 = new HostName("host3"); + + private final ClusterControllerJaxRsApi mockApi = mock(ClusterControllerJaxRsApi.class); + private final JaxRsClientFactory jaxRsClientFactory = mock(JaxRsClientFactory.class); + private final ClusterControllerClientFactory clientFactory + = new SingleInstanceClusterControllerClientFactory(jaxRsClientFactory); + + @Before + public void setup() { + when( + jaxRsClientFactory.createClient( + eq(ClusterControllerJaxRsApi.class), + any(HostName.class), + anyInt(), + anyString())) + .thenReturn(mockApi); + } + + @Test + public void testCreateClientWithNoClusterControllerInstances() throws Exception { + final Collection<ServiceInstance<ServiceMonitorStatus>> clusterControllers = Collections.emptySet(); + + try { + clientFactory.createClient(clusterControllers, "clusterName"); + fail(); + } catch (IllegalArgumentException e) { + // As expected. + } + } + + @Test + public void testCreateClientWithSingleClusterControllerInstance() throws Exception { + final Collection<ServiceInstance<ServiceMonitorStatus>> clusterControllers = Collections.singleton( + new ServiceInstance<>(clusterControllerConfigId(1), HOST_NAME_1, ServiceMonitorStatus.UP)); + + clientFactory.createClient(clusterControllers, "clusterName") + .setNodeState(0, ClusterControllerState.MAINTENANCE); + + verify(jaxRsClientFactory).createClient( + ClusterControllerJaxRsApi.class, + HOST_NAME_1, + PORT, + PATH); + } + + @Test + public void testCreateClientWithTwoNonClusterControllerInstances() throws Exception { + final Collection<ServiceInstance<ServiceMonitorStatus>> clusterControllers = TestUtil.makeServiceInstanceSet( + new ServiceInstance<>(new ConfigId("not-a-cluster-controller-1"), HOST_NAME_1, ServiceMonitorStatus.UP), + new ServiceInstance<>(new ConfigId("not-a-cluster-controller-2"), HOST_NAME_2, ServiceMonitorStatus.UP)); + + try { + clientFactory.createClient(clusterControllers, "clusterName"); + fail(); + } catch (IllegalArgumentException e) { + // As expected. + } + } + + @Test + public void testCreateClientWithThreeClusterControllerInstances() throws Exception { + final Collection<ServiceInstance<ServiceMonitorStatus>> clusterControllers = TestUtil.makeServiceInstanceSet( + new ServiceInstance<>(clusterControllerConfigId(1), HOST_NAME_1, ServiceMonitorStatus.UP), + new ServiceInstance<>(clusterControllerConfigId(2), HOST_NAME_2, ServiceMonitorStatus.UP), + new ServiceInstance<>(clusterControllerConfigId(3), HOST_NAME_3, ServiceMonitorStatus.UP)); + + clientFactory.createClient(clusterControllers, "clusterName") + .setNodeState(0, ClusterControllerState.MAINTENANCE); + + verify(jaxRsClientFactory).createClient( + eq(ClusterControllerJaxRsApi.class), + argThat(is(anyOf( + equalTo(HOST_NAME_1), + equalTo(HOST_NAME_2), + equalTo(HOST_NAME_3)))), + eq(PORT), + eq(PATH)); + } + + private static ConfigId clusterControllerConfigId(final int index) { + return new ConfigId("admin/cluster-controllers/" + index); + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java new file mode 100644 index 00000000000..45d605b6d8a --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java @@ -0,0 +1,738 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.policy; + + +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceCluster; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.applicationmodel.TenantId; +import com.yahoo.vespa.orchestrator.TestUtil; +import com.yahoo.vespa.orchestrator.VespaModelUtil; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerState; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse; +import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; +import org.junit.Test; + +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; + +import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceClusterSet; +import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceInstanceSet; +import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.DOWN; +import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.NOT_CHECKED; +import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.UP; +import static org.fest.assertions.Assertions.assertThat; +import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * @author oyving + * @author bakksjo + */ +public class HostedVespaPolicyTest { + private static final TenantId TENANT_ID = new TenantId("tenantId"); + private static final ApplicationInstanceId APPLICATION_INSTANCE_ID = new ApplicationInstanceId("applicationId"); + private static final HostName HOST_NAME_1 = new HostName("host-1"); + private static final HostName HOST_NAME_2 = new HostName("host-2"); + private static final HostName HOST_NAME_3 = new HostName("host-3"); + private static final HostName HOST_NAME_4 = new HostName("host-4"); + private static final HostName HOST_NAME_5 = new HostName("host-5"); + private static final ServiceType SERVICE_TYPE_1 = new ServiceType("service-1"); + private static final ServiceType SERVICE_TYPE_2 = new ServiceType("service-2"); + + private final ClusterControllerClientFactory clusterControllerClientFactory + = mock(ClusterControllerClientFactory.class); + private final ClusterControllerClient client = mock(ClusterControllerClient.class); + { + when(clusterControllerClientFactory.createClient(any(), any())).thenReturn(client); + } + + private final HostedVespaPolicy policy + = new HostedVespaPolicy(clusterControllerClientFactory); + + private final MutableStatusRegistry mutablestatusRegistry = mock(MutableStatusRegistry.class); + { + when(mutablestatusRegistry.getHostStatus(any())).thenReturn(HostStatus.NO_REMARKS); + } + + @Test + public void test_policy_everyone_agrees_everything_is_up() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, UP) + .instance(HOST_NAME_3, UP)) + .build(); + + policy.grantSuspensionRequest( + applicationInstance, + HOST_NAME_1, + mutablestatusRegistry + ); + + verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_1, HostStatus.ALLOWED_TO_BE_DOWN); + } + + private void grantWithAdminCluster( + ServiceMonitorStatus statusParallelInstanceOtherHost, + ServiceMonitorStatus statusInstanceOtherHost, + ServiceType serviceType, + boolean expectGranted) throws HostStateChangeDeniedException { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, statusInstanceOtherHost) + .instance(HOST_NAME_3, UP)) + .addCluster(new ClusterBuilder(VespaModelUtil.ADMIN_CLUSTER_ID, serviceType) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, statusParallelInstanceOtherHost)) + .build(); + + if (expectGranted) { + policy.grantSuspensionRequest( + applicationInstance, + HOST_NAME_1, + mutablestatusRegistry + ); + + verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_1, HostStatus.ALLOWED_TO_BE_DOWN); + } else { + try { + policy.grantSuspensionRequest( + applicationInstance, + HOST_NAME_1, + mutablestatusRegistry); + fail(); + } catch (HostStateChangeDeniedException e) { + // As expected. + assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); + } + + verify(mutablestatusRegistry, never()).setHostState(any(), any()); + } + } + + @Test + public void test_parallel_cluster_down_is_ok() throws Exception { + grantWithAdminCluster(DOWN, UP, new ServiceType("some-service-type"), true); + } + + @Test + public void test_slobrok_cluster_down_is_not_ok() throws Exception { + grantWithAdminCluster(DOWN, UP, VespaModelUtil.SLOBROK_SERVICE_TYPE, false); + } + + @Test + public void test_other_cluster_instance_down_is_not_ok() throws Exception { + grantWithAdminCluster(DOWN, DOWN, new ServiceType("some-service-type"), false); + } + + @Test + public void test_all_up_is_ok() throws Exception { + grantWithAdminCluster(UP, UP, new ServiceType("some-service-type"), true); + } + + @Test + public void test_policy_other_host_allowed_to_be_down() { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, UP) + .instance(HOST_NAME_3, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_5, UP)) + .build(); + + when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); + + try { + policy.grantSuspensionRequest( + applicationInstance, + HOST_NAME_3, + mutablestatusRegistry); + fail(); + } catch (HostStateChangeDeniedException e) { + // As expected. + assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); + assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1); + } + + verify(mutablestatusRegistry, never()).setHostState(any(), any()); + } + + @Test + public void test_policy_this_host_allowed_to_be_down() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, UP) + .instance(HOST_NAME_3, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_5, UP)) + .build(); + + when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_3))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); + + policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); + + verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_3, HostStatus.ALLOWED_TO_BE_DOWN); + } + + @Test + public void from_five_to_ten_percent_suspension() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, UP) + .instance(HOST_NAME_3, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP)) + .build(); + + when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); + + policy.grantSuspensionRequest( + applicationInstance, + HOST_NAME_3, + mutablestatusRegistry); + + verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_3, HostStatus.ALLOWED_TO_BE_DOWN); + } + + @Test + public void from_ten_to_fifteen_percent_suspension() { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, UP) + .instance(HOST_NAME_2, UP) + .instance(HOST_NAME_3, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP)) + .build(); + + when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); + + try { + policy.grantSuspensionRequest( + applicationInstance, + HOST_NAME_3, + mutablestatusRegistry); + fail(); + } catch (HostStateChangeDeniedException e) { + // As expected. + assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); + assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1); + } + + verify(mutablestatusRegistry, never()).setHostState(any(), any()); + } + + @Test + public void from_five_to_fifteen_percent_suspension() { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, UP) + .instance(HOST_NAME_3, UP) + .instance(HOST_NAME_3, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP) + .instance(HOST_NAME_5, UP)) + .build(); + + when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); + + try { + policy.grantSuspensionRequest( + applicationInstance, + HOST_NAME_3, + mutablestatusRegistry); + fail(); + } catch (HostStateChangeDeniedException e) { + // As expected. + assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); + assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1); + } + + verify(mutablestatusRegistry, never()).setHostState(any(), any()); + } + + @Test + public void test_policy_no_services() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder().build(); + + HostName hostName = new HostName("test-hostname"); + policy.grantSuspensionRequest( + applicationInstance, + hostName, + mutablestatusRegistry + ); + + verify(mutablestatusRegistry, times(1)).setHostState(hostName, HostStatus.ALLOWED_TO_BE_DOWN); + } + + // The cluster name happens to be the cluster id of any of the content service clusters. + private static final String CONTENT_CLUSTER_NAME = "content-cluster-id"; + private static final HostName CLUSTER_CONTROLLER_HOST = new HostName("controller-0"); + private static final HostName STORAGE_NODE_HOST = new HostName("storage-2"); + private static final int STORAGE_NODE_INDEX = 2; + + private static final ServiceCluster<ServiceMonitorStatus> CLUSTER_CONTROLLER_SERVICE_CLUSTER = new ServiceCluster<>( + new ClusterId("cluster-0"), + VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE, + makeServiceInstanceSet( + new ServiceInstance<>( + TestUtil.clusterControllerConfigId(0), + CLUSTER_CONTROLLER_HOST, + UP))); + + private static final ServiceCluster<ServiceMonitorStatus> DISTRIBUTOR_SERVICE_CLUSTER = new ServiceCluster<>( + new ClusterId(CONTENT_CLUSTER_NAME), + VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE, + makeServiceInstanceSet( + new ServiceInstance<>( + new ConfigId("distributor-id-1"), + new HostName("distributor-1"), + UP))); + + private static final ServiceCluster<ServiceMonitorStatus> STORAGE_SERVICE_CLUSTER = new ServiceCluster<>( + new ClusterId(CONTENT_CLUSTER_NAME), + VespaModelUtil.STORAGENODE_SERVICE_TYPE, + makeServiceInstanceSet( + new ServiceInstance<>( + TestUtil.storageNodeConfigId(STORAGE_NODE_INDEX), + STORAGE_NODE_HOST, + UP))); + + private static final ApplicationInstance<ServiceMonitorStatus> APPLICATION_INSTANCE = + new ApplicationInstance<>( + TENANT_ID, + APPLICATION_INSTANCE_ID, + makeServiceClusterSet( + CLUSTER_CONTROLLER_SERVICE_CLUSTER, + DISTRIBUTOR_SERVICE_CLUSTER, + STORAGE_SERVICE_CLUSTER)); + + // The grantSuspensionRequest() and releaseSuspensionGrant() functions happen to have similar signature, + // which allows us to reuse test code for testing both functions. The actual call to one of these two functions + // is encapsulated into the following functional interface. + interface PolicyFunction { + void grant( + final HostedVespaPolicy policy, + final ApplicationInstance<ServiceMonitorStatus> applicationInstance, + final HostName hostName, + final MutableStatusRegistry hostStatusRegistry) throws HostStateChangeDeniedException; + } + + /** + * Since grantSuspensionRequest and releaseSuspensionGrant is quite similar, this test util contains the bulk + * of the test code used to test their common functionality. + * + * @param grantFunction Encapsulates the grant function to call + * @param currentHostStatus The current HostStatus of the host + * @param expectedNodeStateSentToClusterController The NodeState the test expects to be sent to the controller, + * or null if no CC request is expected to be sent. + * @param expectedHostStateSetOnHostStatusService The HostState the test expects to be set on the host service. + */ + private void testCommonGrantFunctionality( + PolicyFunction grantFunction, + ApplicationInstance<ServiceMonitorStatus> application, + HostStatus currentHostStatus, + Optional<ClusterControllerState> expectedNodeStateSentToClusterController, + HostStatus expectedHostStateSetOnHostStatusService) throws Exception { + // There is only one service running on the host, which is a storage node. + // Therefore, the corresponding cluster controller will have to be contacted + // to ask for permission. + + ClusterControllerStateResponse response = new ClusterControllerStateResponse(true, "ok"); + // MOTD: anyInt() MUST be used for an int field, otherwise a NullPointerException is thrown! + // In general, special anyX() must be used for primitive fields. + when(client.setNodeState(anyInt(), any())).thenReturn(response); + + when(mutablestatusRegistry.getHostStatus(any())).thenReturn(currentHostStatus); + + // Execution phase. + grantFunction.grant(policy, application, STORAGE_NODE_HOST, mutablestatusRegistry); + + // Verification phase. + + if (expectedNodeStateSentToClusterController.isPresent()) { + verify(clusterControllerClientFactory, times(1)) + .createClient( + CLUSTER_CONTROLLER_SERVICE_CLUSTER.serviceInstances(), + CONTENT_CLUSTER_NAME); + verify(client, times(1)) + .setNodeState( + STORAGE_NODE_INDEX, + expectedNodeStateSentToClusterController.get()); + } else { + verify(client, never()).setNodeState(anyInt(), any()); + } + + verify(mutablestatusRegistry, times(1)) + .setHostState( + STORAGE_NODE_HOST, + expectedHostStateSetOnHostStatusService); + } + + @Test + public void test_defer_to_controller() throws Exception { + HostStatus currentHostStatus = HostStatus.NO_REMARKS; + ClusterControllerState expectedNodeStateSentToClusterController = ClusterControllerState.MAINTENANCE; + HostStatus expectedHostStateSetOnHostStatusService = HostStatus.ALLOWED_TO_BE_DOWN; + testCommonGrantFunctionality( + HostedVespaPolicy::grantSuspensionRequest, + APPLICATION_INSTANCE, + currentHostStatus, + Optional.of(expectedNodeStateSentToClusterController), + expectedHostStateSetOnHostStatusService); + } + + @Test + public void test_release_suspension_grant_gives_no_remarks() throws Exception { + HostStatus currentHostStatus = HostStatus.ALLOWED_TO_BE_DOWN; + ClusterControllerState expectedNodeStateSentToClusterController = ClusterControllerState.UP; + HostStatus expectedHostStateSetOnHostStatusService = HostStatus.NO_REMARKS; + testCommonGrantFunctionality( + HostedVespaPolicy::releaseSuspensionGrant, + APPLICATION_INSTANCE, + currentHostStatus, + Optional.of(expectedNodeStateSentToClusterController), + expectedHostStateSetOnHostStatusService); + } + + @Test + public void okToSuspendHostWithNoConfiguredServices() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, DOWN) + .instance(HOST_NAME_2, DOWN)) + .addCluster(new ClusterBuilder(SERVICE_TYPE_2) + .instance(HOST_NAME_4, DOWN) + .instance(HOST_NAME_5, DOWN)) + .build(); + + policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); + } + + @Test + public void okToSuspendHostWithAllItsServicesDownEvenIfOthersAreDownToo() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, DOWN) + .instance(HOST_NAME_3, DOWN)) + .addCluster(new ClusterBuilder(SERVICE_TYPE_2) + .instance(HOST_NAME_3, DOWN) + .instance(HOST_NAME_4, DOWN) + .instance(HOST_NAME_5, UP)) + .build(); + + policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); + } + + @Test + public void okToSuspendStorageNodeWhenStorageIsDown() throws Exception { + ServiceMonitorStatus storageNodeStatus = DOWN; + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new StorageClusterBuilder() + // DOWN storage service => ok to suspend and no cluster controller call + .instance(STORAGE_NODE_HOST, DOWN, STORAGE_NODE_INDEX) + .instance(HOST_NAME_2, DOWN, STORAGE_NODE_INDEX + 1) + .instance(HOST_NAME_3, DOWN, STORAGE_NODE_INDEX + 2)) + .addCluster(CLUSTER_CONTROLLER_SERVICE_CLUSTER) + .addCluster(DISTRIBUTOR_SERVICE_CLUSTER) + // This service has one down service on another host, which should + // not block us from suspension because STORAGE_NODE_HOST down too. + .addCluster(new ClusterBuilder(SERVICE_TYPE_2) + .instance(STORAGE_NODE_HOST, DOWN) + .instance(HOST_NAME_4, DOWN) + .instance(HOST_NAME_5, UP)) + .build(); + + HostStatus currentHostStatus = HostStatus.NO_REMARKS; + Optional<ClusterControllerState> dontExpectAnyCallsToClusterController = Optional.empty(); + testCommonGrantFunctionality( + HostedVespaPolicy::grantSuspensionRequest, + applicationInstance, + currentHostStatus, + dontExpectAnyCallsToClusterController, + HostStatus.ALLOWED_TO_BE_DOWN); + } + + @Test + public void denySuspendOfStorageIfOthersAreDown() throws Exception { + // If the storage service is up, but other hosts' storage services are down, + // we should be denied permission to suspend. This behavior is common for + // storage service and non-storage service (they differ when it comes to + // the cluster controller). + ServiceMonitorStatus storageNodeStatus = UP; + + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new StorageClusterBuilder() + .instance(STORAGE_NODE_HOST, storageNodeStatus, STORAGE_NODE_INDEX) + .instance(HOST_NAME_2, DOWN, STORAGE_NODE_INDEX + 1) + .instance(HOST_NAME_3, DOWN, STORAGE_NODE_INDEX + 2)) + .addCluster(CLUSTER_CONTROLLER_SERVICE_CLUSTER) + .addCluster(DISTRIBUTOR_SERVICE_CLUSTER) + .addCluster(new ClusterBuilder(SERVICE_TYPE_2) + .instance(STORAGE_NODE_HOST, DOWN) + .instance(HOST_NAME_4, DOWN) + .instance(HOST_NAME_5, UP)) + .build(); + + when(mutablestatusRegistry.getHostStatus(any())).thenReturn(HostStatus.NO_REMARKS); + + try { + policy.grantSuspensionRequest(applicationInstance, STORAGE_NODE_HOST, mutablestatusRegistry); + fail(); + } catch (HostStateChangeDeniedException e) { + // As expected. + assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); + assertThat(e.getServiceType()).isEqualTo(VespaModelUtil.STORAGENODE_SERVICE_TYPE); + } + } + + // In this test we verify the storage service cluster suspend policy of allowing at most 1 + // storage service to be effectively down. The normal policy (and the one used previously for storage) + // is to allow 10%. Therefore, the test verifies we disallow suspending 2 hosts = 5% (some random number <10%). + // + // Since the Orchestrator doesn't allow suspending the host, the Orchestrator doesn't even bother calling the + // Cluster Controller. The CC also has a policy of max 1, so it's just an optimization and safety guard. + @Test + public void dontBotherCallingClusterControllerIfOtherStorageNodesAreDown() throws Exception { + StorageClusterBuilder clusterBuilder = new StorageClusterBuilder(); + for (int i = 0; i < 40; ++i) { + clusterBuilder.instance(new HostName("host-" + i), UP, i); + } + ApplicationInstance<ServiceMonitorStatus> applicationInstance = + new AppBuilder().addCluster(clusterBuilder).build(); + + HostName host_1 = new HostName("host-1"); + when(mutablestatusRegistry.getHostStatus(eq(host_1))).thenReturn(HostStatus.NO_REMARKS); + + HostName host_2 = new HostName("host-2"); + when(mutablestatusRegistry.getHostStatus(eq(host_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); + + try { + policy.grantSuspensionRequest(applicationInstance, host_1, mutablestatusRegistry); + fail(); + } catch (HostStateChangeDeniedException e) { + // As expected. + assertThat(e.getConstraintName()) + .isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); + assertThat(e.getServiceType()).isEqualTo(VespaModelUtil.STORAGENODE_SERVICE_TYPE); + } + + verify(mutablestatusRegistry, never()).setHostState(any(), any()); + } + + @Test + public void ownServiceInstanceDown() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, DOWN) + .instance(HOST_NAME_3, DOWN)) + .build(); + + policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); + } + + @Test + public void ownServiceInstanceDown_otherServiceIsAllNotChecked() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, DOWN) + .instance(HOST_NAME_3, DOWN)) + .addCluster(new ClusterBuilder(SERVICE_TYPE_2) + .instance(HOST_NAME_3, NOT_CHECKED) + .instance(HOST_NAME_4, NOT_CHECKED) + .instance(HOST_NAME_5, NOT_CHECKED)) + .build(); + + policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); + } + + @Test + public void ownServiceInstanceDown_otherServiceIsAllNotChecked_oneHostDown() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, DOWN) + .instance(HOST_NAME_3, DOWN)) + .addCluster(new ClusterBuilder(SERVICE_TYPE_2) + .instance(HOST_NAME_3, NOT_CHECKED) + .instance(HOST_NAME_4, NOT_CHECKED) + .instance(HOST_NAME_5, NOT_CHECKED)) + .build(); + + when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_4))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); + try { + policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); + fail("Should not be allowed to set " + HOST_NAME_3 + " down when " + HOST_NAME_4 + " is already down."); + } catch (HostStateChangeDeniedException e) { + // As expected. + assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); + assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_2); + } + } + + @Test + public void ownServiceInstanceDown_otherServiceIsAllUp() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, DOWN) + .instance(HOST_NAME_3, DOWN)) + .addCluster(new ClusterBuilder(SERVICE_TYPE_2) + .instance(HOST_NAME_3, UP) + .instance(HOST_NAME_4, UP) + .instance(HOST_NAME_5, UP)) + .build(); + + policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); + } + + @Test + public void hostHasTwoInstances_oneDownOneUp() throws Exception { + final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() + .addCluster(new ClusterBuilder(SERVICE_TYPE_1) + .instance(HOST_NAME_1, UP) + .instance(HOST_NAME_2, UP) + .instance(HOST_NAME_3, UP) + .instance(HOST_NAME_3, DOWN)) + .build(); + + policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); + } + + // Helper classes for terseness. + + private static class AppBuilder { + private final Set<ServiceCluster<ServiceMonitorStatus>> serviceClusters = new HashSet<>(); + + public AppBuilder addCluster(final ServiceCluster<ServiceMonitorStatus> cluster) { + serviceClusters.add(cluster); + return this; + } + + public AppBuilder addCluster(final ClusterBuilder clusterBuilder) { + serviceClusters.add(clusterBuilder.build()); + return this; + } + + public AppBuilder addCluster(final StorageClusterBuilder clusterBuilder) { + serviceClusters.add(clusterBuilder.build()); + return this; + } + + public ApplicationInstance<ServiceMonitorStatus> build() { + return new ApplicationInstance<>( + TENANT_ID, + APPLICATION_INSTANCE_ID, + serviceClusters); + } + } + + private static class ClusterBuilder { + private final ServiceType serviceType; + private final Set<ServiceInstance<ServiceMonitorStatus>> instances = new HashSet<>(); + private final ClusterId clusterId; + private int instanceIndex = 0; + + public ClusterBuilder(final ClusterId clusterId, final ServiceType serviceType) { + this.clusterId = clusterId; + this.serviceType = serviceType; + } + + public ClusterBuilder(final ServiceType serviceType) { + this.clusterId = new ClusterId("clusterId"); + this.serviceType = serviceType; + } + + public ClusterBuilder instance(final HostName hostName, final ServiceMonitorStatus status) { + instances.add(new ServiceInstance<>(new ConfigId("configId-" + instanceIndex), hostName, status)); + ++instanceIndex; + return this; + } + + public ServiceCluster<ServiceMonitorStatus> build() { + return new ServiceCluster<>(clusterId, serviceType, instances); + } + } + + private static class StorageClusterBuilder { + private final Set<ServiceInstance<ServiceMonitorStatus>> instances = new HashSet<>(); + + public StorageClusterBuilder instance(final HostName hostName, final ServiceMonitorStatus status, int index) { + instances.add(new ServiceInstance<>(TestUtil.storageNodeConfigId(index), hostName, status)); + return this; + } + + public ServiceCluster<ServiceMonitorStatus> build() { + return new ServiceCluster<>(new ClusterId(CONTENT_CLUSTER_NAME), VespaModelUtil.STORAGENODE_SERVICE_TYPE, instances); + } + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java new file mode 100644 index 00000000000..b8396a51e1e --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java @@ -0,0 +1,143 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.resources; + +import com.yahoo.application.Application; +import com.yahoo.application.Networking; +import com.yahoo.container.Container; +import com.yahoo.jdisc.http.server.jetty.JettyHttpServer; +import org.junit.After; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; + +import javax.ws.rs.client.Client; +import javax.ws.rs.client.ClientBuilder; +import javax.ws.rs.client.Entity; +import javax.ws.rs.client.WebTarget; +import javax.ws.rs.core.GenericType; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import java.net.URI; +import java.nio.file.Paths; +import java.util.Set; + +import static org.junit.Assert.assertEquals; + +/** + * Tests the implementation of the orchestrators ApplicationAPI. + * + * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a> + */ +public class ApplicationSuspensionResourceTest { + + static final String BASE_PATH = "/orchestrator/v1/suspensions/applications"; + static final String RESOURCE_1 = "mediasearch:imagesearch:default"; + static final String RESOURCE_2 = "test-tenant-id:application:instance"; + static final String INVALID_RESOURCE_NAME = "something_without_colons"; + + Application jdiscApplication; + WebTarget webTarget; + + @Before + public void setup() throws Exception { + jdiscApplication = Application.fromApplicationPackage(Paths.get("src/test/application"), + Networking.enable); + Client client = ClientBuilder.newClient(); + + JettyHttpServer serverProvider = (JettyHttpServer) Container.get().getServerProviderRegistry().allComponents().get(0); + String url = "http://localhost:" + serverProvider.getListenPort() + BASE_PATH; + webTarget = client.target(new URI(url)); + } + + @After + public void teardown() throws Exception { + jdiscApplication.close(); + webTarget = null; + } + + @Ignore + @Test + public void run_application_locally_for_manual_browser_testing() throws Exception { + System.out.println(webTarget.getUri()); + Thread.sleep(3600 * 1000); + } + + @Test + public void get_all_suspended_applications_return_empty_list_initially() throws Exception { + Response reply = webTarget.request().get(); + assertEquals(200, reply.getStatus()); + assertEquals("[]", reply.readEntity(String.class)); + } + + @Test + public void invalid_application_id_throws_http_400() throws Exception { + Response reply = webTarget.request().post(Entity.entity(INVALID_RESOURCE_NAME,MediaType.APPLICATION_JSON_TYPE)); + assertEquals(400, reply.getStatus()); + } + + @Test + public void get_application_status_returns_404_for_notsuspended_and_204_for_suspended() throws Exception { + // Get on application that is not suspended + Response reply = webTarget.path(RESOURCE_1).request().get(); + assertEquals(404, reply.getStatus()); + + // Post application + reply = webTarget.request().post(Entity.entity(RESOURCE_1,MediaType.APPLICATION_JSON_TYPE)); + assertEquals(204, reply.getStatus()); + + // Get on the application that now should be in suspended + reply = webTarget.path(RESOURCE_1).request().get(); + assertEquals(204, reply.getStatus()); + } + + + @Test + public void delete_works_on_suspended_and_not_suspended_applications() throws Exception { + // Delete an application that is not suspended + Response reply = webTarget.path(RESOURCE_1).request().delete(); + assertEquals(204, reply.getStatus()); + + // Put application in suspend + reply = webTarget.request().post(Entity.entity(RESOURCE_1,MediaType.APPLICATION_JSON_TYPE)); + assertEquals(204, reply.getStatus()); + + // Check that it is in suspend + reply = webTarget.path(RESOURCE_1).request(MediaType.APPLICATION_JSON).get(); + assertEquals(204, reply.getStatus()); + + // Delete it + reply = webTarget.path(RESOURCE_1).request().delete(); + assertEquals(204, reply.getStatus()); + + // Check that it is not in suspend anymore + reply = webTarget.path(RESOURCE_1).request(MediaType.APPLICATION_JSON).get(); + assertEquals(404, reply.getStatus()); + } + + @Test + public void list_applications_returns_the_correct_list_of_suspended_applications() throws Exception { + // Test that initially we have the empty set + Response reply = webTarget.request(MediaType.APPLICATION_JSON).get(); + assertEquals(200, reply.getStatus()); + assertEquals("[]", reply.readEntity(String.class)); + + // Add a couple of applications to maintenance + webTarget.request().post(Entity.entity(RESOURCE_1,MediaType.APPLICATION_JSON_TYPE)); + webTarget.request().post(Entity.entity(RESOURCE_2,MediaType.APPLICATION_JSON_TYPE)); + assertEquals(200, reply.getStatus()); + + // Test that we get them back + Set<String> responses = webTarget.request(MediaType.APPLICATION_JSON_TYPE) + .get(new GenericType<Set<String>>() {}); + assertEquals(2, responses.size()); + + // Remove suspend for the first resource + webTarget.path(RESOURCE_1).request().delete(); + + // Test that we are back to the start with the empty set + responses = webTarget.request(MediaType.APPLICATION_JSON_TYPE) + .get(new GenericType<Set<String>>() {}); + assertEquals(1, responses.size()); + assertEquals(RESOURCE_2, responses.iterator().next()); + } +}
\ No newline at end of file diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java new file mode 100644 index 00000000000..fb046f978c7 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java @@ -0,0 +1,227 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.resources; + +import com.yahoo.vespa.applicationmodel.ApplicationInstance; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.applicationmodel.TenantId; +import com.yahoo.vespa.orchestrator.InstanceLookupService; +import com.yahoo.vespa.orchestrator.OrchestratorImpl; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock; +import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.policy.Policy; +import com.yahoo.vespa.orchestrator.restapi.wire.BatchHostSuspendRequest; +import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult; +import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse; +import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; +import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; +import com.yahoo.vespa.orchestrator.status.StatusService; +import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; +import org.junit.Test; + +import javax.ws.rs.WebApplicationException; +import java.util.Arrays; +import java.util.Collections; +import java.util.Optional; +import java.util.Set; + +import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceClusterSet; +import static org.fest.assertions.Assertions.assertThat; +import static org.fest.assertions.Fail.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class HostResourceTest { + private static final int SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS = 0; + private static final TenantId TENANT_ID = new TenantId("tenantId"); + private static final ApplicationInstanceId APPLICATION_INSTANCE_ID = new ApplicationInstanceId("applicationId"); + private static final ApplicationInstanceReference APPLICATION_INSTANCE_REFERENCE = + new ApplicationInstanceReference(TENANT_ID, APPLICATION_INSTANCE_ID); + + private static final StatusService EVERY_HOST_IS_UP_HOST_STATUS_SERVICE = mock(StatusService.class); + private static final MutableStatusRegistry EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY = mock(MutableStatusRegistry.class); + static { + when(EVERY_HOST_IS_UP_HOST_STATUS_SERVICE.forApplicationInstance(eq(APPLICATION_INSTANCE_REFERENCE))) + .thenReturn(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY); + when(EVERY_HOST_IS_UP_HOST_STATUS_SERVICE.lockApplicationInstance_forCurrentThreadOnly(eq(APPLICATION_INSTANCE_REFERENCE))) + .thenReturn(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY); + when(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY.getHostStatus(any())) + .thenReturn(HostStatus.NO_REMARKS); + when(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY.getApplicationInstanceStatus()) + .thenReturn(ApplicationInstanceStatus.NO_REMARKS); + } + + private static final InstanceLookupService mockInstanceLookupService = mock(InstanceLookupService.class); + static { + when(mockInstanceLookupService.findInstanceByHost(any())) + .thenReturn(Optional.of( + new ApplicationInstance<>( + TENANT_ID, + APPLICATION_INSTANCE_ID, + makeServiceClusterSet()))); + } + + + private static final InstanceLookupService alwaysEmptyInstanceLookUpService = new InstanceLookupService() { + @Override + public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceById( + final ApplicationInstanceReference applicationInstanceReference) { + return Optional.empty(); + } + + @Override + public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceByHost(final HostName hostName) { + return Optional.empty(); + } + + @Override + public Set<ApplicationInstanceReference> knownInstances() { + return Collections.emptySet(); + } + }; + + private static class AlwaysAllowPolicy implements Policy { + @Override + public void grantSuspensionRequest( + ApplicationInstance<ServiceMonitorStatus> applicationInstance, + HostName hostName, + MutableStatusRegistry hostStatusRegistry) { + } + @Override + public void releaseSuspensionGrant( + ApplicationInstance<ServiceMonitorStatus> applicationInstance, + HostName hostName, + MutableStatusRegistry hostStatusRegistry) { + } + } + + private static final OrchestratorImpl alwaysAllowOrchestrator = new OrchestratorImpl( + new AlwaysAllowPolicy(), + new ClusterControllerClientFactoryMock(), + EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, mockInstanceLookupService, + SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS + ); + + private static final OrchestratorImpl hostNotFoundOrchestrator = new OrchestratorImpl( + new AlwaysAllowPolicy(), + new ClusterControllerClientFactoryMock(), + EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, alwaysEmptyInstanceLookUpService, + SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS + ); + + @Test + public void returns_200_on_success() throws Exception { + HostResource hostResource = + new HostResource(alwaysAllowOrchestrator); + + final String hostName = "hostname"; + + UpdateHostResponse response = hostResource.suspend(hostName); + + assertThat(response.hostname()).isEqualTo(hostName); + } + + @Test + public void returns_200_on_success_batch() throws Exception { + HostSuspensionResource hostSuspensionResource = new HostSuspensionResource(alwaysAllowOrchestrator); + BatchHostSuspendRequest request = + new BatchHostSuspendRequest("parentHostname", Arrays.asList("hostname1", "hostname2")); + BatchOperationResult response = hostSuspensionResource.suspendAll(request); + assertThat(response.success()); + } + + @Test + public void throws_404_when_host_unknown() throws Exception { + try { + HostResource hostResource = + new HostResource(hostNotFoundOrchestrator); + hostResource.suspend("hostname"); + fail(); + } catch (WebApplicationException w) { + assertThat(w.getResponse().getStatus()).isEqualTo(404); + } + } + + // Note: Missing host is 404 for a single-host, but 400 for multi-host (batch). + // This is so because the hostname is part of the URL path for single-host, while the + // hostnames are part of the request body for multi-host. + @Test + public void throws_400_when_host_unknown_for_batch() throws Exception { + try { + HostSuspensionResource hostSuspensionResource = new HostSuspensionResource(hostNotFoundOrchestrator); + BatchHostSuspendRequest request = + new BatchHostSuspendRequest("parentHostname", Arrays.asList("hostname1", "hostname2")); + hostSuspensionResource.suspendAll(request); + fail(); + } catch (WebApplicationException w) { + assertThat(w.getResponse().getStatus()).isEqualTo(400); + } + } + + private static class AlwaysFailPolicy implements Policy { + @Override + public void grantSuspensionRequest( + ApplicationInstance<ServiceMonitorStatus> applicationInstance, + HostName hostName, + MutableStatusRegistry hostStatusRegistry) throws HostStateChangeDeniedException { + doThrow(); + } + @Override + public void releaseSuspensionGrant( + ApplicationInstance<ServiceMonitorStatus> applicationInstance, + HostName hostName, + MutableStatusRegistry hostStatusRegistry) throws HostStateChangeDeniedException { + doThrow(); + } + + private static void doThrow() throws HostStateChangeDeniedException { + throw new HostStateChangeDeniedException( + new HostName("some-host"), + "impossible-policy", + new ServiceType("silly-service"), + "This policy rejects all requests"); + } + } + + @Test + public void throws_409_when_request_rejected_by_policies() throws Exception { + final OrchestratorImpl alwaysRejectResolver = new OrchestratorImpl( + new AlwaysFailPolicy(), + new ClusterControllerClientFactoryMock(), + EVERY_HOST_IS_UP_HOST_STATUS_SERVICE,mockInstanceLookupService, + SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS); + + try { + HostResource hostResource = new HostResource(alwaysRejectResolver); + hostResource.suspend("hostname"); + fail(); + } catch (WebApplicationException w) { + assertThat(w.getResponse().getStatus()).isEqualTo(409); + } + } + + @Test + public void throws_409_when_request_rejected_by_policies_for_batch() throws Exception { + final OrchestratorImpl alwaysRejectResolver = new OrchestratorImpl( + new AlwaysFailPolicy(), + new ClusterControllerClientFactoryMock(), + EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, + mockInstanceLookupService, + SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS); + + try { + HostSuspensionResource hostSuspensionResource = new HostSuspensionResource(alwaysRejectResolver); + BatchHostSuspendRequest request = + new BatchHostSuspendRequest("parentHostname", Arrays.asList("hostname1", "hostname2")); + hostSuspensionResource.suspendAll(request); + fail(); + } catch (WebApplicationException w) { + assertThat(w.getResponse().getStatus()).isEqualTo(409); + } + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java new file mode 100644 index 00000000000..ac73e264477 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java @@ -0,0 +1,323 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.status; + +import com.yahoo.log.LogLevel; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.orchestrator.TestIds; +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.curator.SessionFailRetryLoop.SessionFailedException; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.CuratorFrameworkFactory; +import org.apache.curator.retry.ExponentialBackoffRetry; +import org.apache.curator.test.KillSession; +import org.apache.curator.test.TestingServer; +import org.hamcrest.Description; +import org.hamcrest.Matcher; +import org.hamcrest.TypeSafeMatcher; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Logger; + +import static org.hamcrest.core.Is.is; +import static org.hamcrest.core.IsCollectionContaining.hasItem; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.fail; + +public class ZookeeperStatusServiceTest { + private TestingServer testingServer; + private ZookeeperStatusService zookeeperStatusService; + private CuratorFramework curatorFramework; + + @Before + public void setUp() throws Exception { + Logger.getLogger("").setLevel(LogLevel.WARNING); + + testingServer = new TestingServer(); + curatorFramework = createConnectedCuratorFramework(testingServer); + zookeeperStatusService = new ZookeeperStatusService(curatorFramework); + } + + private static CuratorFramework createConnectedCuratorFramework(TestingServer server) throws InterruptedException { + CuratorFramework curatorFramework = CuratorFrameworkFactory.builder() + .connectString(server.getConnectString()) + .retryPolicy(new ExponentialBackoffRetry(1000, 3)) + .build(); + + curatorFramework.start(); + curatorFramework.blockUntilConnected(1, TimeUnit.MINUTES); + return curatorFramework; + } + + @After + public void tearDown() throws Exception { + if (curatorFramework != null) { //teardown is called even if setUp fails. + curatorFramework.close(); + } + if (testingServer != null) { + testingServer.close(); + } + } + + @Test + public void host_state_for_unknown_hosts_is_no_remarks() { + assertThat( + zookeeperStatusService.forApplicationInstance(TestIds.APPLICATION_INSTANCE_REFERENCE) + .getHostStatus(TestIds.HOST_NAME1), + is(HostStatus.NO_REMARKS)); + } + + @Test + public void setting_host_state_is_idempotent() { + try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( + TestIds.APPLICATION_INSTANCE_REFERENCE)) { + + //shuffling to catch "clean database" failures for all cases. + for (HostStatus hostStatus: shuffledList(HostStatus.values())) { + doTimes(2, () -> { + statusRegistry.setHostState( + TestIds.HOST_NAME1, + hostStatus); + + assertThat(statusRegistry.getHostStatus( + TestIds.HOST_NAME1), + is(hostStatus)); + }); + } + } + } + + @Test + public void locks_are_exclusive() throws Exception { + try (CuratorFramework curatorFramework2 = createConnectedCuratorFramework(testingServer)) { + ZookeeperStatusService zookeeperStatusService2 = new ZookeeperStatusService(curatorFramework2); + + final CompletableFuture<Void> lockedSuccessfullyFuture; + try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( + TestIds.APPLICATION_INSTANCE_REFERENCE)) { + + lockedSuccessfullyFuture = CompletableFuture.runAsync(() -> { + try (MutableStatusRegistry statusRegistry2 = zookeeperStatusService2 + .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE)) + { + } + }); + + try { + lockedSuccessfullyFuture.get(3, TimeUnit.SECONDS); + fail("Both zookeeper host status services locked simultaneously for the same application instance"); + } catch (TimeoutException ignored) { + } + } + + lockedSuccessfullyFuture.get(1, TimeUnit.MINUTES); + } + } + + @Test + public void session_expiry_when_holding_lock_causes_operations_to_fail() throws Exception { + try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( + TestIds.APPLICATION_INSTANCE_REFERENCE)) { + + KillSession.kill(curatorFramework.getZookeeperClient().getZooKeeper(), testingServer.getConnectString()); + + assertSessionFailed(() -> + statusRegistry.setHostState( + TestIds.HOST_NAME1, + HostStatus.ALLOWED_TO_BE_DOWN)); + + + assertSessionFailed(() -> + statusRegistry.getHostStatus( + TestIds.HOST_NAME1)); + + } + } + + @Test + public void failing_to_get_lock_closes_SessionFailRetryLoop() throws Exception { + try (CuratorFramework curatorFramework2 = createConnectedCuratorFramework(testingServer)) { + ZookeeperStatusService zookeeperStatusService2 = new ZookeeperStatusService(curatorFramework2); + + try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( + TestIds.APPLICATION_INSTANCE_REFERENCE)) { + + //must run in separate thread, since having 2 locks in the same thread fails + CompletableFuture<Void> resultOfZkOperationAfterLockFailure = CompletableFuture.runAsync(() -> { + try { + zookeeperStatusService2.lockApplicationInstance_forCurrentThreadOnly( + TestIds.APPLICATION_INSTANCE_REFERENCE, + 1, TimeUnit.SECONDS); + fail("Both zookeeper host status services locked simultaneously for the same application instance"); + } catch (RuntimeException e) { + } + + killSession(curatorFramework2, testingServer); + + //Throws SessionFailedException if the SessionFailRetryLoop has not been closed. + zookeeperStatusService2.forApplicationInstance(TestIds.APPLICATION_INSTANCE_REFERENCE) + .getHostStatus(TestIds.HOST_NAME1); + }); + + assertThat(resultOfZkOperationAfterLockFailure, notHoldsException()); + } + } + } + + //IsNot does not delegate to matcher.describeMismatch. See the related issue + //https://code.google.com/p/hamcrest/issues/detail?id=107 Confusing failure description when using negation + //Creating not(holdsException) directly instead. + private Matcher<Future<?>> notHoldsException() { + return new TypeSafeMatcher<Future<?>>() { + @Override + protected boolean matchesSafely(Future<?> item) { + return !getException(item).isPresent(); + } + + private Optional<Throwable> getException(Future<?> item) { + try { + item.get(); + return Optional.empty(); + } catch (ExecutionException e) { + return Optional.of(e.getCause()); + } catch (InterruptedException e) { + return Optional.of(e); + } + } + + @Override + public void describeTo(Description description) { + description.appendText("notHoldsException()"); + } + + @Override + protected void describeMismatchSafely(Future<?> item, Description mismatchDescription) { + getException(item).ifPresent( throwable -> + mismatchDescription + .appendText("Got exception: ") + .appendText(ExceptionUtils.getMessage(throwable)) + .appendText(ExceptionUtils.getFullStackTrace(throwable))); + } + }; + } + + private static void killSession(CuratorFramework curatorFramework, TestingServer testingServer) { + try { + KillSession.kill(curatorFramework.getZookeeperClient().getZooKeeper(), testingServer.getConnectString()); + } catch (Exception e) { + throw new RuntimeException("Failed killing session. ", e); + } + } + + /** + * This requirement is due to limitations in SessionFailRetryLoop + */ + @Test(expected = AssertionError.class) + public void multiple_locks_in_a_single_thread_gives_error() throws InterruptedException { + try (CuratorFramework curatorFramework2 = createConnectedCuratorFramework(testingServer)) { + ZookeeperStatusService zookeeperStatusService2 = new ZookeeperStatusService(curatorFramework2); + + try (MutableStatusRegistry statusRegistry1 = zookeeperStatusService + .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE); + MutableStatusRegistry statusRegistry2 = zookeeperStatusService2 + .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE2)) + { + } + } + } + + @Test + public void suspend_and_resume_application_works_and_is_symmetric() { + + // Initial state is NO_REMARK + assertThat( + zookeeperStatusService + .forApplicationInstance(TestIds.APPLICATION_INSTANCE_REFERENCE) + .getApplicationInstanceStatus(), + is(ApplicationInstanceStatus.NO_REMARKS)); + + // Suspend + try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( + TestIds.APPLICATION_INSTANCE_REFERENCE)) { + statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); + } + + assertThat( + zookeeperStatusService + .forApplicationInstance(TestIds.APPLICATION_INSTANCE_REFERENCE) + .getApplicationInstanceStatus(), + is(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN)); + + // Resume + try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( + TestIds.APPLICATION_INSTANCE_REFERENCE)) { + statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.NO_REMARKS); + } + + assertThat( + zookeeperStatusService + .forApplicationInstance(TestIds.APPLICATION_INSTANCE_REFERENCE) + .getApplicationInstanceStatus(), + is(ApplicationInstanceStatus.NO_REMARKS)); + } + + @Test + public void suspending_two_applications_returns_two_applications() { + Set<ApplicationInstanceReference> suspendedApps + = zookeeperStatusService.getAllSuspendedApplications(); + assertThat(suspendedApps.size(), is(0)); + + try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( + TestIds.APPLICATION_INSTANCE_REFERENCE)) { + statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); + } + + try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly( + TestIds.APPLICATION_INSTANCE_REFERENCE2)) { + statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); + } + + suspendedApps = zookeeperStatusService.getAllSuspendedApplications(); + assertThat(suspendedApps.size(), is(2)); + assertThat(suspendedApps, hasItem(TestIds.APPLICATION_INSTANCE_REFERENCE)); + assertThat(suspendedApps, hasItem(TestIds.APPLICATION_INSTANCE_REFERENCE2)); + } + + private static void assertSessionFailed(Runnable statusServiceOperations) { + try { + statusServiceOperations.run(); + fail("Expected session expired exception"); + } catch (RuntimeException e) { + if (!(e.getCause() instanceof SessionFailedException)) { + throw e; + } + } + } + + //TODO: move to vespajlib + private static <T> List<T> shuffledList(T[] values) { + //new ArrayList necessary to avoid "write through" behaviour + List<T> list = new ArrayList<>(Arrays.asList(values)); + Collections.shuffle(list); + return list; + } + + //TODO: move to vespajlib + private static void doTimes(int numberOfIterations, Runnable runnable) { + for (int i = 0; i < numberOfIterations; i++) { + runnable.run(); + } + } +} |