aboutsummaryrefslogtreecommitdiffstats
path: root/orchestrator
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /orchestrator
Publish
Diffstat (limited to 'orchestrator')
-rw-r--r--orchestrator/OWNERS1
-rw-r--r--orchestrator/README1
-rw-r--r--orchestrator/README.md10
-rw-r--r--orchestrator/pom.xml155
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationIdNotFoundException.java10
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationStateChangeDeniedException.java22
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchHostNameNotFoundException.java15
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchInternalErrorException.java15
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/HostNameNotFoundException.java15
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/InstanceLookupService.java19
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java29
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java98
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java344
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorUtil.java144
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ServiceMonitorInstanceLookupService.java71
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/VespaModelUtil.java227
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java24
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactory.java13
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java70
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerJaxRsApi.java31
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java28
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java95
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateResponse.java25
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java52
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java60
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/package-info.java5
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/BatchHostStateChangeDeniedException.java17
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java51
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java232
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java32
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ServiceClusterSuspendPolicy.java33
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResource.java121
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java94
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostSuspensionResource.java77
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java81
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceStatusResponse.java31
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ApplicationInstanceStatus.java20
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java12
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java120
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java31
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/NoThrow.java19
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ReadOnlyStatusRegistry.java23
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java64
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java373
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/package-info.java5
-rw-r--r--orchestrator/src/main/resources/configdefinitions/orchestrator.def5
-rw-r--r--orchestrator/src/test/application/services.xml16
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyInstanceLookupService.java166
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java300
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorUtilTest.java49
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestIds.java24
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java37
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/VespaModelUtilTest.java223
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java74
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java38
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java117
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java738
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java143
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java227
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java323
60 files changed, 5495 insertions, 0 deletions
diff --git a/orchestrator/OWNERS b/orchestrator/OWNERS
new file mode 100644
index 00000000000..3d08c49311b
--- /dev/null
+++ b/orchestrator/OWNERS
@@ -0,0 +1 @@
+hakon
diff --git a/orchestrator/README b/orchestrator/README
new file mode 100644
index 00000000000..e30b29fa236
--- /dev/null
+++ b/orchestrator/README
@@ -0,0 +1 @@
+Determines whether it is safe for a node to halt services for upgrade/restart
diff --git a/orchestrator/README.md b/orchestrator/README.md
new file mode 100644
index 00000000000..455bd265fcc
--- /dev/null
+++ b/orchestrator/README.md
@@ -0,0 +1,10 @@
+# Orchestrator, a.k.a. Maestro
+A service to facilitate safe and staggered restart and upgrades of services in a Vespa instance.
+It uses consolidated information from Slobrok and the application model to decide if a hosts
+should be allowed to stop its services.
+
+## TODO:
+* Constraint on requests on start-up.
+* Constraint on requests after permitting host to go down (it should last at least as long as Slobrok heartbeat cycle).
+* Implement caching of host-down decisions.
+* Instance resource, exposing the orchestrators current knowledge \ No newline at end of file
diff --git a/orchestrator/pom.xml b/orchestrator/pom.xml
new file mode 100644
index 00000000000..7ad5393aa31
--- /dev/null
+++ b/orchestrator/pom.xml
@@ -0,0 +1,155 @@
+<?xml version="1.0"?>
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+ http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>parent</artifactId>
+ <version>6-SNAPSHOT</version>
+ <relativePath>../parent/pom.xml</relativePath>
+ </parent>
+ <artifactId>orchestrator</artifactId>
+ <version>6-SNAPSHOT</version>
+ <packaging>container-plugin</packaging>
+ <name>${project.artifactId}</name>
+ <dependencies>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>container-dev</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>service-monitor</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope> <!-- TODO: Unbundle, but fix JSON serialization of model objects first. -->
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>orchestrator-restapi</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.auto.value</groupId>
+ <artifactId>auto-value</artifactId>
+ <version>1.0-rc1</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>jaxrs_client_utils</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>application</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.easytesting</groupId>
+ <artifactId>fest-assert</artifactId>
+ <version>1.4</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>config-provisioning</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.glassfish.jersey.ext</groupId>
+ <artifactId>jersey-proxy-client</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>org.glassfish.jersey.core</groupId>
+ <artifactId>jersey-common</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.ws.rs</groupId>
+ <artifactId>javax.ws.rs-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.module</groupId>
+ <artifactId>jackson-module-scala_${scala.major-version}</artifactId>
+ <version>${jackson2.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>findbugs</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ <version>${jackson2.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>zkfacade</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.curator</groupId>
+ <artifactId>curator-test</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>hamcrest-all</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationIdNotFoundException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationIdNotFoundException.java
new file mode 100644
index 00000000000..0e4535a3a51
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationIdNotFoundException.java
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+/**
+ * Thrown when applicationId is invalid or not found.
+ *
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public class ApplicationIdNotFoundException extends Exception {
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationStateChangeDeniedException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationStateChangeDeniedException.java
new file mode 100644
index 00000000000..28f92b3dca2
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ApplicationStateChangeDeniedException.java
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+/**
+ * Exception covering all cases where the state change could not
+ * be executed.
+ *
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public class ApplicationStateChangeDeniedException extends Exception {
+
+ final String reason;
+
+ public ApplicationStateChangeDeniedException() {
+ this("Unknown");
+ }
+
+ public ApplicationStateChangeDeniedException(String reason) {
+ super();
+ this.reason = reason;
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchHostNameNotFoundException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchHostNameNotFoundException.java
new file mode 100644
index 00000000000..6da27678b21
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchHostNameNotFoundException.java
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.vespa.applicationmodel.HostName;
+
+import java.util.List;
+
+public class BatchHostNameNotFoundException extends OrchestrationException {
+ public BatchHostNameNotFoundException(HostName parentHostname,
+ List<HostName> hostNames,
+ HostNameNotFoundException e) {
+ super("Failed to suspend " + hostNames + " with parent host "
+ + parentHostname + ": " + e.getMessage(), e);
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchInternalErrorException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchInternalErrorException.java
new file mode 100644
index 00000000000..7bd0237d37b
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/BatchInternalErrorException.java
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.vespa.applicationmodel.HostName;
+
+import java.util.List;
+
+public class BatchInternalErrorException extends OrchestrationException {
+ public BatchInternalErrorException(HostName parentHostname,
+ List<HostName> orderedHostNames,
+ RuntimeException e) {
+ super("Failed to suspend " + orderedHostNames + " with parent host "
+ + parentHostname + ": " + e.getMessage(), e);
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/HostNameNotFoundException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/HostNameNotFoundException.java
new file mode 100644
index 00000000000..5919f974234
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/HostNameNotFoundException.java
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.vespa.applicationmodel.HostName;
+
+/**
+ * Exception thrown if hostname is not found in the system (i.e node repo)
+ *
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public class HostNameNotFoundException extends OrchestrationException {
+ public HostNameNotFoundException(HostName hostName) {
+ super("Hostname " + hostName + " not found in any instances");
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/InstanceLookupService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/InstanceLookupService.java
new file mode 100644
index 00000000000..11221e3a464
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/InstanceLookupService.java
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+
+import java.util.Optional;
+import java.util.Set;
+
+/**
+ * @author oyving
+ */
+public interface InstanceLookupService {
+ Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceById(ApplicationInstanceReference applicationInstanceReference);
+ Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceByHost(HostName hostName);
+ Set<ApplicationInstanceReference> knownInstances();
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java
new file mode 100644
index 00000000000..600fe8bfe8b
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import java.util.Arrays;
+
+public class OrchestrationException extends Exception {
+ public OrchestrationException(Throwable cause) {
+ super(cause);
+ }
+
+ public OrchestrationException(String message) {
+ super(message);
+ }
+
+ public OrchestrationException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ // Overrides getMessage() to include suppressed Throwables, which is useful to see which
+ // "resumes" of a failed suspend succeeded.
+ @Override
+ public String getMessage() {
+ StringBuilder builder = new StringBuilder();
+ builder.append(super.getMessage());
+ Throwable[] suppressedThrowables = getSuppressed();
+ Arrays.stream(suppressedThrowables).forEach(t -> builder.append("; With suppressed throwable " + t));
+ return builder.toString();
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java
new file mode 100644
index 00000000000..a7db8e42e46
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java
@@ -0,0 +1,98 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+
+import java.util.List;
+import java.util.Set;
+
+/**
+ * The orchestrator is used to coordinate the need of vespa services to restart or
+ * disconnect from normal operations for debugging or maintenance. We are not coordinating on the service
+ * level but rather on the granularity of host and application instance.
+ *
+ * (A host will have multiple services and an application will have multiple hots)
+ *
+ * A policy decides how many hosts can go down at the same time based on which services that runs
+ * on the hosts, the redundancy in the system, coverage requirements and potentially more
+ * (see policies for details).
+ *
+ * An application level suspend - enables all services to go down at the same time and bypasses the
+ * host level state and the host level policies.
+ * This is used for parallel upgrade and larger maintenance tasks.
+ *
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public interface Orchestrator {
+
+ /**
+ * Get the status of a given node. If no state is recorded
+ * then this will return the status 'No Remarks'
+ *
+ * @param hostName The FQDN which are used in the noderepo.
+ * @return The enum describing the current state.
+ * @throws HostNameNotFoundException if hostName is unrecognized (in node repo)
+ */
+ HostStatus getNodeStatus(final HostName hostName) throws HostNameNotFoundException;
+
+ /**
+ * Resume normal operation for this host.
+ *
+ * @param hostName The FQDN
+ * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints.
+ * @throws HostNameNotFoundException if the hostName is not recognized in the system (node repo)
+ */
+ void resume(final HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException;
+
+ /**
+ * Suspend normal operations for this host.
+ *
+ * @param hostName The FQDN
+ * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints.
+ * @throws HostNameNotFoundException if the hostName is not recognized in the system (node repo)
+ */
+ void suspend(final HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException;
+
+ /**
+ * Get the orchestrator status of the application instance.
+ *
+ * @param appId Identifier of the application to check
+ * @return The enum describing the current state.
+ */
+ ApplicationInstanceStatus getApplicationInstanceStatus(final ApplicationId appId) throws ApplicationIdNotFoundException;
+
+ /**
+ * Returns all application instances that are suspended. The intention is to use this
+ * for visualization, informational and debugging purposes.
+ *
+ * @return A Map between the application instance and its status.
+ */
+ Set<ApplicationId> getAllSuspendedApplications();
+
+ /**
+ * Resume normal orchestration for hosts belonging to this application.
+ *
+ * @param appId Identifier of the application to resume
+ */
+ void resume(final ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException;
+
+
+ /**
+ * Suspend orchestration for hosts belonging to this application.
+ * I.e all suspend requests for its hosts will succeed.
+ *
+ * @param appId Identifier of the application to resume
+ */
+ void suspend(final ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException;
+
+ /**
+ * Suspend all hosts. On failure, all hosts are resumed before exiting the method with an exception.
+ */
+ void suspendAll(HostName parentHostname, List<HostName> hostNames)
+ throws BatchInternalErrorException, BatchHostStateChangeDeniedException, BatchHostNameNotFoundException;
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
new file mode 100644
index 00000000000..ad72a43ff72
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
@@ -0,0 +1,344 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.google.inject.Inject;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.ClusterId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceCluster;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerState;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse;
+import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy;
+import com.yahoo.vespa.orchestrator.policy.Policy;
+import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
+import com.yahoo.vespa.orchestrator.status.StatusService;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+/**
+ * @author oyving
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public class OrchestratorImpl implements Orchestrator {
+
+
+ private static final Logger log = Logger.getLogger(OrchestratorImpl.class.getName());
+
+ private final Policy policy;
+ private final StatusService statusService;
+ private final InstanceLookupService instanceLookupService;
+ private final int serviceMonitorConvergenceLatencySeconds;
+ private final ClusterControllerClientFactory clusterControllerClientFactory;
+
+ @Inject
+ public OrchestratorImpl(
+ final ClusterControllerClientFactory clusterControllerClientFactory,
+ final StatusService statusService,
+ final OrchestratorConfig orchestratorConfig,
+ final InstanceLookupService instanceLookupService)
+ {
+ this(new HostedVespaPolicy(clusterControllerClientFactory),
+ clusterControllerClientFactory,
+ statusService,
+ instanceLookupService,
+ orchestratorConfig.serviceMonitorConvergenceLatencySeconds());
+ }
+
+ public OrchestratorImpl(
+ final Policy policy,
+ final ClusterControllerClientFactory clusterControllerClientFactory,
+ final StatusService statusService,
+ final InstanceLookupService instanceLookupService,
+ final int serviceMonitorConvergenceLatencySeconds)
+ {
+ this.policy = policy;
+ this.clusterControllerClientFactory = clusterControllerClientFactory;
+ this.statusService = statusService;
+ this.serviceMonitorConvergenceLatencySeconds = serviceMonitorConvergenceLatencySeconds;
+ this.instanceLookupService = instanceLookupService;
+
+ }
+
+ @Override
+ public HostStatus getNodeStatus(HostName hostName) throws HostNameNotFoundException {
+ return getNodeStatus(getApplicationInstance(hostName).reference(),hostName);
+ }
+
+ @Override
+ public void resume(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {
+ /**
+ * When making a state transition to this state, we have to consider that if the host has been in
+ * ALLOWED_TO_BE_DOWN state, services on the host may recently have been stopped (and, presumably, started).
+ * Service monitoring may not have had enough time to detect that services were stopped,
+ * and may therefore mistakenly report services as up, even if they still haven't initialized and
+ * are not yet ready for serving. Erroneously reporting both host and services as up causes a race
+ * where services on other hosts may be stopped prematurely. A delay here ensures that service
+ * monitoring will have had time to catch up. Since we don't want do the delay with the lock held,
+ * and the host status service's locking functionality does not support something like condition
+ * variables or Object.wait(), we break out here, releasing the lock before delaying.
+ */
+ sleep(serviceMonitorConvergenceLatencySeconds, TimeUnit.SECONDS);
+
+ ApplicationInstance<ServiceMonitorStatus> appInstance = getApplicationInstance(hostName);
+
+ try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(appInstance.reference())) {
+ final HostStatus currentHostState = statusRegistry.getHostStatus(hostName);
+
+ if (HostStatus.NO_REMARKS == currentHostState) {
+ return;
+ }
+
+ ApplicationInstanceStatus appStatus = statusService.forApplicationInstance(appInstance.reference()).getApplicationInstanceStatus();
+ if (appStatus == ApplicationInstanceStatus.NO_REMARKS) {
+ policy.releaseSuspensionGrant(appInstance, hostName, statusRegistry);
+ }
+ }
+ }
+
+ @Override
+ public void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {
+ ApplicationInstance<ServiceMonitorStatus> appInstance = getApplicationInstance(hostName);
+
+
+ try (MutableStatusRegistry hostStatusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(appInstance.reference())) {
+ final HostStatus currentHostState = hostStatusRegistry.getHostStatus(hostName);
+
+ if (HostStatus.ALLOWED_TO_BE_DOWN == currentHostState) {
+ return;
+ }
+
+ ApplicationInstanceStatus appStatus = statusService.forApplicationInstance(appInstance.reference()).getApplicationInstanceStatus();
+ if (appStatus == ApplicationInstanceStatus.NO_REMARKS) {
+ policy.grantSuspensionRequest(appInstance, hostName, hostStatusRegistry);
+ }
+ }
+ }
+
+ @Override
+ public ApplicationInstanceStatus getApplicationInstanceStatus(
+ final ApplicationId appId) throws ApplicationIdNotFoundException {
+ ApplicationInstanceReference appRef = OrchestratorUtil.toApplicationInstanceReference(appId);
+ return statusService.forApplicationInstance(appRef).getApplicationInstanceStatus();
+ }
+
+ @Override
+ public Set<ApplicationId> getAllSuspendedApplications() {
+ Set<ApplicationInstanceReference> refSet = statusService.getAllSuspendedApplications();
+ return refSet.stream().map(OrchestratorUtil::toApplicationId).collect(Collectors.toSet());
+ }
+
+ @Override
+ public void resume(ApplicationId appId) throws ApplicationIdNotFoundException, ApplicationStateChangeDeniedException {
+ setApplicationStatus(appId, ApplicationInstanceStatus.NO_REMARKS);
+ }
+
+ @Override
+ public void suspend(ApplicationId appId) throws ApplicationIdNotFoundException, ApplicationStateChangeDeniedException {
+ setApplicationStatus(appId, ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
+ }
+
+ @Override
+ public void suspendAll(HostName parentHostname, List<HostName> hostNames)
+ throws BatchHostStateChangeDeniedException, BatchHostNameNotFoundException, BatchInternalErrorException {
+ try {
+ hostNames = sortHostNamesForSuspend(hostNames);
+ } catch (HostNameNotFoundException e) {
+ throw new BatchHostNameNotFoundException(parentHostname, hostNames, e);
+ }
+
+ for (HostName hostName : hostNames) {
+ try {
+ suspend(hostName);
+ } catch (HostStateChangeDeniedException e) {
+ BatchHostStateChangeDeniedException exception =
+ new BatchHostStateChangeDeniedException(parentHostname, hostNames, e);
+ rollbackSuspendAll(hostNames, exception);
+ throw exception;
+ } catch (HostNameNotFoundException e) {
+ // Should never get here since since we would have received HostNameNotFoundException earlier.
+ BatchHostNameNotFoundException exception =
+ new BatchHostNameNotFoundException(parentHostname, hostNames, e);
+ rollbackSuspendAll(hostNames, exception);
+ throw exception;
+ } catch (RuntimeException e) {
+ BatchInternalErrorException exception =
+ new BatchInternalErrorException(parentHostname, hostNames, e);
+ rollbackSuspendAll(hostNames, exception);
+ throw exception;
+ }
+ }
+ }
+
+ private void rollbackSuspendAll(List<HostName> orderedHostNames, Exception exception) {
+ List<HostName> reverseOrderedHostNames = new ArrayList<>(orderedHostNames);
+ Collections.reverse(reverseOrderedHostNames);
+ for (HostName hostName : reverseOrderedHostNames) {
+ try {
+ resume(hostName);
+ } catch (HostStateChangeDeniedException | HostNameNotFoundException | RuntimeException e) {
+ // We're forced to ignore these since we're already rolling back a suspension.
+ exception.addSuppressed(e);
+ }
+ }
+ }
+
+ /**
+ * PROBLEM
+ * Take the example of 2 Docker hosts:
+ * - Docker host 1 has two nodes A1 and B1, belonging to the application with
+ * a globally unique ID A and B, respectively.
+ * - Similarly, Docker host 2 has two nodes running content nodes A2 and B2,
+ * and we assume both A1 and A2 (and B1 and B2) have services within the same service cluster.
+ *
+ * Suppose both Docker hosts wanting to reboot, and
+ * - Docker host 1 asks to suspend A1 and B1, while
+ * - Docker host 2 asks to suspend B2 and A2.
+ *
+ * The Orchestrator may allow suspend of A1 and B2, before requesting the suspension of B1 and A2.
+ * None of these can be suspended (assuming max 1 suspended content node per content cluster),
+ * and so both requests for suspension will fail.
+ *
+ * Note that it's not a deadlock - both client will fail immediately and resume both A1 and B2 before
+ * responding to the client, and if host 1 asks later w/o host 2 asking at the same time,
+ * it will be given permission to suspend. However if both hosts were to request in lock-step,
+ * there would be starvation. And in general, it would fail requests for suspension more
+ * than necessary.
+ *
+ * SOLUTION
+ * The solution we're using is to order the hostnames by the globally unique application instance ID,
+ * e.g. hosted-vespa:routing:dev:ci-corp-us-east-1:default. In the example above, it would guarantee
+ * Docker host 2 would ensure ask to suspend B2 before A2. We take care of that ordering here.
+ */
+ public List<HostName> sortHostNamesForSuspend(List<HostName> hostNames) throws HostNameNotFoundException {
+ Map<HostName, ApplicationInstanceReference> applicationReferences = new HashMap<>(hostNames.size());
+ for (HostName hostName : hostNames) {
+ ApplicationInstance<?> appInstance = getApplicationInstance(hostName);
+ applicationReferences.put(hostName, appInstance.reference());
+ }
+
+ List<HostName> orderedHostNames = hostNames.stream()
+ .sorted((leftHostname, rightHostname) -> compareHostNamesForSuspend(leftHostname, rightHostname, applicationReferences))
+ .collect(Collectors.toList());
+
+ return orderedHostNames;
+ }
+
+ private int compareHostNamesForSuspend(
+ HostName leftHostname,
+ HostName rightHostname,
+ Map<HostName, ApplicationInstanceReference> applicationReferences) {
+ ApplicationInstanceReference leftApplicationReference = applicationReferences.get(leftHostname);
+ assert leftApplicationReference != null;
+
+ ApplicationInstanceReference rightApplicationReference = applicationReferences.get(rightHostname);
+ assert rightApplicationReference != null;
+
+ // ApplicationInstanceReference.toString() is e.g. "hosted-vespa:routing:dev:ci-corp-us-east-1:default"
+ int diff = leftApplicationReference.toString().compareTo(rightApplicationReference.toString());
+ if (diff != 0) {
+ return diff;
+ }
+
+ return leftHostname.toString().compareTo(rightHostname.toString());
+ }
+
+ private HostStatus getNodeStatus(
+ final ApplicationInstanceReference applicationRef,
+ final HostName hostName) {
+ return statusService.forApplicationInstance(applicationRef).getHostStatus(hostName);
+ }
+
+ private void setApplicationStatus(
+ final ApplicationId appId,
+ final ApplicationInstanceStatus status) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException{
+
+ ApplicationInstanceReference appRef = OrchestratorUtil.toApplicationInstanceReference(appId);
+ try (MutableStatusRegistry statusRegistry =
+ statusService.lockApplicationInstance_forCurrentThreadOnly(appRef)) {
+
+ // Short-circuit if already in wanted state
+ if (status == statusRegistry.getApplicationInstanceStatus()) return;
+
+ // Set content clusters for this application in maintenance on suspend
+ if (status == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) {
+ ApplicationInstance<ServiceMonitorStatus> application = getApplicationInstance(appRef);
+
+ // Mark it allowed to be down before we manipulate the clustercontroller
+ OrchestratorUtil.getHostsUsedByApplicationInstance(application).stream()
+ .forEach(h -> statusRegistry.setHostState(h, HostStatus.ALLOWED_TO_BE_DOWN));
+
+ // If the clustercontroller throws an error the nodes will be marked as allowed to be down
+ // and be set back up on next resume invocation.
+ setClusterStateInController(application, ClusterControllerState.MAINTENANCE);
+ }
+
+ statusRegistry.setApplicationInstanceStatus(status);
+ }
+ }
+
+ private void setClusterStateInController(
+ final ApplicationInstance<ServiceMonitorStatus> application,
+ final ClusterControllerState state) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException {
+
+ // Get all content clusters for this application
+ Set<ClusterId> contentClusterIds = application.serviceClusters().stream()
+ .filter(VespaModelUtil::isContent)
+ .map(ServiceCluster::clusterId)
+ .collect(Collectors.toSet());
+
+ // For all content clusters set in maintenance
+ log.log(LogLevel.INFO, String.format("Setting content clusters %s for application %s to %s",
+ contentClusterIds,application.applicationInstanceId(),state));
+ for (ClusterId clusterId : contentClusterIds) {
+ final ClusterControllerClient client = clusterControllerClientFactory.createClient(
+ VespaModelUtil.getClusterControllerInstances(application, clusterId),
+ clusterId.s());
+ try {
+ ClusterControllerStateResponse response = client.setApplicationState(state);
+ if (!response.wasModified) {
+ String msg = String.format("Fail to set application %s, cluster name %s to cluster state %s due to: %s",
+ application.applicationInstanceId(), clusterId, state, response.reason);
+ throw new ApplicationStateChangeDeniedException(msg);
+ }
+ } catch (IOException e) {
+ throw new ApplicationStateChangeDeniedException(e.getMessage());
+ }
+ }
+ }
+
+ private ApplicationInstance<ServiceMonitorStatus> getApplicationInstance(HostName hostName) throws HostNameNotFoundException{
+ return instanceLookupService.findInstanceByHost(hostName).orElseThrow(
+ () -> new HostNameNotFoundException(hostName));
+ }
+
+ private ApplicationInstance<ServiceMonitorStatus> getApplicationInstance(ApplicationInstanceReference appRef) throws ApplicationIdNotFoundException {
+ return instanceLookupService.findInstanceById(appRef).orElseThrow(ApplicationIdNotFoundException::new);
+ }
+
+ private static void sleep(final long time, final TimeUnit timeUnit) {
+ try {
+ Thread.sleep(timeUnit.toMillis(time));
+ } catch (InterruptedException e) {
+ throw new RuntimeException("Unexpectedly interrupted", e);
+ }
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorUtil.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorUtil.java
new file mode 100644
index 00000000000..ca7bd058169
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorUtil.java
@@ -0,0 +1,144 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ApplicationName;
+import com.yahoo.config.provision.InstanceName;
+import com.yahoo.config.provision.TenantName;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.orchestrator.status.ReadOnlyStatusRegistry;
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceCluster;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.applicationmodel.TenantId;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static java.util.stream.Collectors.toMap;
+import static java.util.stream.Collectors.toSet;
+
+/**
+ * Utility methods for working with service monitor model entity objects.
+ *
+ * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a>
+ */
+public class OrchestratorUtil {
+ // Utility class, not to be instantiated.
+ private OrchestratorUtil() {}
+
+ public static Set<HostName> getHostsUsedByApplicationInstance(final ApplicationInstance<?> applicationInstance) {
+ return applicationInstance.serviceClusters().stream()
+ .flatMap(serviceCluster -> getHostsUsedByServiceCluster(serviceCluster).stream())
+ .collect(toSet());
+ }
+
+ public static Set<HostName> getHostsUsedByServiceCluster(final ServiceCluster<?> serviceCluster) {
+ return serviceCluster.serviceInstances().stream()
+ .map(ServiceInstance::hostName)
+ .collect(toSet());
+ }
+
+ public static <T> Set<ServiceCluster<T>> getServiceClustersUsingHost(
+ final Collection<ServiceCluster<T>> serviceClusters,
+ final HostName hostName) {
+ return serviceClusters.stream()
+ .filter(serviceCluster -> hasServiceInstanceOnHost(serviceCluster, hostName))
+ .collect(toSet());
+ }
+
+ public static Map<HostName, HostStatus> getHostStatusMap(
+ final Collection<HostName> hosts,
+ final ReadOnlyStatusRegistry hostStatusService) {
+ return hosts.stream()
+ .collect(Collectors.toMap(
+ hostName -> hostName,
+ hostName -> hostStatusService.getHostStatus(hostName)));
+ }
+
+ private static boolean hasServiceInstanceOnHost(
+ final ServiceCluster<?> serviceCluster,
+ final HostName hostName) {
+ return serviceInstancesOnHost(serviceCluster, hostName).count() > 0;
+ }
+
+ public static <T> Stream<ServiceInstance<T>> serviceInstancesOnHost(
+ final ServiceCluster<T> serviceCluster,
+ final HostName hostName) {
+ return serviceCluster.serviceInstances().stream()
+ .filter(instance -> instance.hostName().equals(hostName));
+ }
+
+ public static <K, V1, V2> Map<K, V2> mapValues(
+ final Map<K, V1> map,
+ final Function<V1, V2> valueConverter) {
+ return map.entrySet().stream()
+ .collect(toMap(Map.Entry::getKey, entry -> valueConverter.apply(entry.getValue())));
+ }
+
+ private static final Pattern APPLICATION_INSTANCE_REFERENCE_REST_FORMAT_PATTERN = Pattern.compile("^([^:]+):(.+)$");
+
+ /** Returns an ApplicationInstanceReference constructed from the serialized format used in the REST API. */
+ public static ApplicationInstanceReference parseAppInstanceReference(final String restFormat) {
+ if (restFormat == null) {
+ throw new IllegalArgumentException("Could not construct instance id from null string");
+ }
+
+ final Matcher matcher = APPLICATION_INSTANCE_REFERENCE_REST_FORMAT_PATTERN.matcher(restFormat);
+ if (!matcher.matches()) {
+ throw new IllegalArgumentException("Could not construct instance id from string \"" + restFormat +"\"");
+ }
+
+ final TenantId tenantId = new TenantId(matcher.group(1));
+ final ApplicationInstanceId applicationInstanceId = new ApplicationInstanceId(matcher.group(2));
+ return new ApplicationInstanceReference(tenantId, applicationInstanceId);
+ }
+
+ public static String toRestApiFormat(final ApplicationInstanceReference applicationInstanceReference) {
+ return applicationInstanceReference.tenantId() + ":" + applicationInstanceReference.applicationInstanceId();
+ }
+
+
+ public static ApplicationInstanceReference toApplicationInstanceReference(ApplicationId appId) {
+ TenantId tenantId = new TenantId(appId.tenant().toString());
+
+ String appName = appId.application().toString();
+ String instanceName = appId.instance().toString();
+ ApplicationInstanceId appInstanceId = new ApplicationInstanceId(appName + ":" + instanceName);
+
+ return new ApplicationInstanceReference(tenantId,appInstanceId);
+ }
+
+ public static ApplicationId toApplicationId(ApplicationInstanceReference appRef) {
+ TenantName tenantName = TenantName.from(appRef.tenantId().toString());
+
+ // Now for the application/instance pair we need to split this
+ String appNameStr = appRef.applicationInstanceId().toString();
+ String[] appNameParts = appNameStr.split(":");
+
+ // We assume a valid application reference has at lest two parts appname:instancename
+ // TODO is this assumption valid?
+ if (appNameParts.length < 2) {
+ // TODO Since this is used internally we should perhapes use another exception type?
+ throw new IllegalArgumentException("Application reference not valid: " + appRef);
+ }
+
+ // Last part of string is the instance name
+ InstanceName instanceName = InstanceName.from(appNameParts[appNameParts.length-1]);
+
+ // The rest is application
+ int whereAppNameEnds = appNameStr.lastIndexOf(":");
+ ApplicationName appName = ApplicationName.from(appNameStr.substring(0, whereAppNameEnds));
+
+ return ApplicationId.from(tenantName, appName, instanceName);
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ServiceMonitorInstanceLookupService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ServiceMonitorInstanceLookupService.java
new file mode 100644
index 00000000000..a421db2732c
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/ServiceMonitorInstanceLookupService.java
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.google.inject.Inject;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+import com.yahoo.vespa.service.monitor.SlobrokAndConfigIntersector;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Uses slobrok data (a.k.a. heartbeat) to implement {@link InstanceLookupService}.
+ *
+ * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a>
+ */
+public class ServiceMonitorInstanceLookupService implements InstanceLookupService {
+ private final SlobrokAndConfigIntersector slobrokAndConfigIntersector;
+
+ @Inject
+ public ServiceMonitorInstanceLookupService(final SlobrokAndConfigIntersector slobrokAndConfigIntersector) {
+ this.slobrokAndConfigIntersector = slobrokAndConfigIntersector;
+ }
+
+ @Override
+ public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceById(
+ final ApplicationInstanceReference applicationInstanceReference) {
+ final Map<ApplicationInstanceReference, ApplicationInstance<ServiceMonitorStatus>> instanceMap
+ = slobrokAndConfigIntersector.queryStatusOfAllApplicationInstances();
+ return Optional.ofNullable(instanceMap.get(applicationInstanceReference));
+ }
+
+ @Override
+ public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceByHost(HostName hostName) {
+ final Map<ApplicationInstanceReference, ApplicationInstance<ServiceMonitorStatus>> instanceMap
+ = slobrokAndConfigIntersector.queryStatusOfAllApplicationInstances();
+ final List<ApplicationInstance<ServiceMonitorStatus>> applicationInstancesUsingHost = instanceMap.entrySet().stream()
+ .filter(entry -> applicationInstanceUsesHost(entry.getValue(), hostName))
+ .map(Map.Entry::getValue)
+ .collect(Collectors.toList());
+ if (applicationInstancesUsingHost.isEmpty()) {
+ return Optional.empty();
+ }
+ if (applicationInstancesUsingHost.size() > 1) {
+ throw new AssertionError(
+ "Major assumption broken: Multiple application instances contain host " + hostName.s()
+ + ": " + applicationInstancesUsingHost);
+ }
+ return Optional.of(applicationInstancesUsingHost.get(0));
+ }
+
+ @Override
+ public Set<ApplicationInstanceReference> knownInstances() {
+ return slobrokAndConfigIntersector.queryStatusOfAllApplicationInstances().keySet();
+ }
+
+ private static boolean applicationInstanceUsesHost(
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ final HostName hostName) {
+ return applicationInstance.serviceClusters().stream()
+ .anyMatch(serviceCluster ->
+ serviceCluster.serviceInstances().stream()
+ .anyMatch(serviceInstance ->
+ serviceInstance.hostName().equals(hostName)));
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/VespaModelUtil.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/VespaModelUtil.java
new file mode 100644
index 00000000000..148c9463f62
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/VespaModelUtil.java
@@ -0,0 +1,227 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ClusterId;
+import com.yahoo.vespa.applicationmodel.ConfigId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceCluster;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.applicationmodel.ServiceType;
+
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import static com.yahoo.collections.CollectionUtil.first;
+
+/**
+ * Utility methods for working with Vespa-specific model entities (see OrchestratorUtil
+ * for more generic model utilities).
+ *
+ * @author hakon
+ */
+public class VespaModelUtil {
+ private static final Logger log = Logger.getLogger(VespaModelUtil.class.getName());
+
+ public static final ClusterId ADMIN_CLUSTER_ID = new ClusterId("admin");
+
+ public static final ServiceType SLOBROK_SERVICE_TYPE = new ServiceType("slobrok");
+ public static final ServiceType CLUSTER_CONTROLLER_SERVICE_TYPE = new ServiceType("container-clustercontroller");
+ public static final ServiceType DISTRIBUTOR_SERVICE_TYPE = new ServiceType("distributor");
+ public static final ServiceType SEARCHNODE_SERVICE_TYPE = new ServiceType("searchnode");
+ public static final ServiceType STORAGENODE_SERVICE_TYPE = new ServiceType("storagenode");
+
+ // @return true iff the service cluster refers to a cluster controller service cluster.
+ public static boolean isClusterController(final ServiceCluster<?> cluster) {
+ return CLUSTER_CONTROLLER_SERVICE_TYPE.equals(cluster.serviceType());
+ }
+
+ /**
+ * Note that a search node service cluster (service type searchnode aka proton) is
+ * always accompanied by a storage node service cluster, but not vice versa.
+ *
+ * @return true iff the service cluster consists of storage nodes (proton or vds).
+ */
+ public static boolean isStorage(ServiceCluster<?> cluster) {
+ return STORAGENODE_SERVICE_TYPE.equals(cluster.serviceType());
+ }
+
+ /**
+ * @return true iff the service cluster is a content service cluster.
+ */
+ public static boolean isContent(final ServiceCluster<?> cluster) {
+ return DISTRIBUTOR_SERVICE_TYPE.equals(cluster.serviceType()) ||
+ SEARCHNODE_SERVICE_TYPE.equals(cluster.serviceType()) ||
+ STORAGENODE_SERVICE_TYPE.equals(cluster.serviceType());
+ }
+
+ /**
+ * @return The set of all Cluster Controller service instances for the application.
+ */
+ public static <T> Set<ServiceInstance<T>> getClusterControllerInstances(
+ ApplicationInstance<T> application,
+ ClusterId contentClusterId)
+ {
+ final Set<ServiceCluster<T>> controllerClusters = getClusterControllerServiceClusters(application);
+
+ Collection<ServiceCluster<T>> controllerClustersForContentCluster = filter(controllerClusters, contentClusterId);
+
+ if (controllerClustersForContentCluster.size() == 1) {
+ return first(controllerClustersForContentCluster).serviceInstances();
+ } else if (controllerClusters.size() == 1) {
+ ServiceCluster<T> cluster = first(controllerClusters);
+ log.warning("No cluster controller cluster for content cluster " + contentClusterId
+ + ", using the only cluster controller cluster available: " + cluster.clusterId());
+
+ return cluster.serviceInstances();
+ } else {
+ throw new RuntimeException("Failed getting cluster controller for content cluster " + contentClusterId +
+ ". Available clusters = " + controllerClusters +
+ ", matching clusters = " + controllerClustersForContentCluster);
+ }
+ }
+
+ private static <T> Collection<ServiceCluster<T>> filter(
+ Set<ServiceCluster<T>> controllerClusters,
+ ClusterId contentClusterId) {
+ ClusterId clusterControllerClusterId = new ClusterId(contentClusterId.s() + "-controllers");
+
+ return controllerClusters.stream().
+ filter(cluster -> cluster.clusterId().equals(clusterControllerClusterId)).
+ collect(Collectors.toList());
+ }
+
+ public static <T> Set<ServiceCluster<T>> getClusterControllerServiceClusters(
+ final ApplicationInstance<T> application) {
+ return application.serviceClusters().stream()
+ .filter(VespaModelUtil::isClusterController)
+ .collect(Collectors.toSet());
+ }
+
+ /**
+ * @return Host name for a Cluster Controller that is likely to be the master, is !isPresent() if
+ * no cluster controller was found.
+ * @throws java.lang.IllegalArgumentException if there are no cluster controller instances.
+ */
+ public static HostName getControllerHostName(ApplicationInstance<?> application, ClusterId contentClusterId) {
+ // It happens that the master Cluster Controller is the one with the lowest index, if up.
+ final ServiceInstance<?> serviceInstance = getClusterControllerInstances(application, contentClusterId)
+ .stream()
+ .min(Comparator.comparing(instance -> getClusterControllerIndex(instance.configId())))
+ .orElseThrow(() ->
+ new IllegalArgumentException("No cluster controllers found in application " + application));
+ return serviceInstance.hostName();
+ }
+
+ /**
+ * A content cluster consists of many content-related service clusters, like distributor and storagenode.
+ * All of the service clusters within a content cluster have the same service cluster ID,
+ * which is also called the content ID (specified in services.xml) and also cluster name
+ * (terminology used in Cluster Controller). The cluster name is used when referring to
+ * content cluster resources through the HTTP REST on the Cluster Controller.
+ *
+ * There may be many content clusters within an application. But only one content cluster may be
+ * present on any single host,
+ *
+ * @return The cluster name managed by a Cluster Controller.
+ * @throws IllegalArgumentException if there is not exactly one content cluster name.
+ */
+ public static ClusterId getContentClusterName(ApplicationInstance<?> application, HostName hostName) {
+ Set<ClusterId> contentClusterIdsOnHost = application.serviceClusters().stream()
+ .filter(VespaModelUtil::isContent)
+ .filter(cluster -> clusterHasInstanceOnHost(cluster, hostName))
+ .map(ServiceCluster::clusterId)
+ .collect(Collectors.toSet());
+
+ if (contentClusterIdsOnHost.size() != 1) {
+ throw new IllegalArgumentException("Expected exactly one content cluster within application " +
+ application.applicationInstanceId() + " and host " + hostName + ", but found " +
+ contentClusterIdsOnHost.size() + ": " + contentClusterIdsOnHost + ", application: " +
+ application);
+ }
+
+ return contentClusterIdsOnHost.iterator().next();
+ }
+
+ private static boolean clusterHasInstanceOnHost(ServiceCluster<?> cluster, HostName hostName) {
+ return cluster.serviceInstances().stream().anyMatch(service -> Objects.equals(hostName, service.hostName()));
+ }
+
+ /**
+ * @return The node index of the storage node running on the host.
+ * @throws java.lang.IllegalArgumentException if there is not exactly one storage node running on the host,
+ * or if the index of that storage node could not be found.
+ */
+ public static <T> int getStorageNodeIndex(ApplicationInstance<T> application, HostName hostName) {
+ Optional<ServiceInstance<T>> storageNode = getStorageNodeAtHost(application, hostName);
+ if (!storageNode.isPresent()) {
+ throw new IllegalArgumentException("Failed to find a storage node for application " +
+ application.applicationInstanceId() + " at host " + hostName);
+ }
+
+ return getStorageNodeIndex(storageNode.get().configId());
+ }
+
+ public static <T> Optional<ServiceInstance<T>> getStorageNodeAtHost(
+ ApplicationInstance<T> application, HostName hostName) {
+ Set<ServiceInstance<T>> storageNodesOnHost = application.serviceClusters().stream()
+ .filter(VespaModelUtil::isStorage)
+ .flatMap(cluster -> cluster.serviceInstances().stream())
+ .filter(service -> service.hostName().equals(hostName))
+ .collect(Collectors.toSet());
+
+ if (storageNodesOnHost.isEmpty()) {
+ return Optional.empty();
+ }
+
+ if (storageNodesOnHost.size() > 1) {
+ throw new RuntimeException("Expected application " + application.applicationInstanceId() +
+ " to have exactly one storage node service on host " + hostName + " but got " +
+ storageNodesOnHost.size() + ": " + storageNodesOnHost);
+ }
+
+ return storageNodesOnHost.stream().findAny();
+ }
+
+ // See getClusterControllerIndex()
+ private static final Pattern CONTROLLER_INDEX_PATTERN =
+ Pattern.compile("admin/cluster-controllers/(\\d+)");
+
+ /**
+ * @param configId Must be of the form admin/cluster-controllers/2
+ * @return the Cluster Controller index given its config ID.
+ * @throws java.lang.IllegalArgumentException if the config ID is not of the proper format.
+ */
+ public static int getClusterControllerIndex(ConfigId configId) {
+ Matcher matcher = CONTROLLER_INDEX_PATTERN.matcher(configId.s());
+ if (!matcher.matches()) {
+ throw new IllegalArgumentException("Unable to extract cluster controller index from config ID " + configId);
+ }
+
+ return Integer.valueOf(matcher.group(1));
+ }
+
+ // See getStorageNodeIndex()
+ private static final Pattern STORAGE_NODE_INDEX_PATTERN = Pattern.compile(".*/(\\d+)");
+
+ /**
+ * @param configId Config ID is of the form "storage/storage/3", where 3 is the storage node index.
+ * @return The storage node index.
+ * @throws java.lang.IllegalArgumentException If configId does not have the required form.
+ */
+ public static int getStorageNodeIndex(ConfigId configId) {
+ Matcher matcher = STORAGE_NODE_INDEX_PATTERN.matcher(configId.s());
+ if (!matcher.matches()) {
+ throw new IllegalArgumentException("Unable to extract node index from config ID " + configId);
+ }
+
+ return Integer.valueOf(matcher.group(1));
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java
new file mode 100644
index 00000000000..5352c81a7a1
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import java.io.IOException;
+
+/**
+ * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a>
+ */
+public interface ClusterControllerClient {
+
+ /**
+ * Requests that a cluster controller sets the requested node to the requested state.
+ *
+ * @throws IOException if there was a problem communicating with the cluster controller
+ */
+ ClusterControllerStateResponse setNodeState(final int storageNodeIndex, final ClusterControllerState wantedState) throws IOException;
+
+ /**
+ * Requests that a cluster controller sets the requested node to the requested state.
+ *
+ * @throws IOException if there was a problem communicating with the cluster controller
+ */
+ ClusterControllerStateResponse setApplicationState(final ClusterControllerState wantedState) throws IOException;
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactory.java
new file mode 100644
index 00000000000..b30a3daab31
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactory.java
@@ -0,0 +1,13 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+
+import java.util.Collection;
+
+/**
+ * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a>
+ */
+public interface ClusterControllerClientFactory {
+ ClusterControllerClient createClient(Collection<? extends ServiceInstance<?>> clusterControllers, String clusterName);
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
new file mode 100644
index 00000000000..79f91ff4255
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
@@ -0,0 +1,70 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.yahoo.vespa.jaxrs.client.JaxRsStrategy;
+
+import java.io.IOException;
+
+/**
+ * Default implementation of the ClusterControllerClient.
+ *
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public class ClusterControllerClientImpl implements ClusterControllerClient{
+ public static final String REQUEST_REASON = "Orchestrator";
+
+ private final JaxRsStrategy<ClusterControllerJaxRsApi> clusterControllerApi;
+ private final String clusterName;
+
+ public ClusterControllerClientImpl(
+ final JaxRsStrategy<ClusterControllerJaxRsApi> clusterControllerApi,
+ final String clusterName) {
+ this.clusterName = clusterName;
+ this.clusterControllerApi = clusterControllerApi;
+ }
+
+ /**
+ * Requests that a cluster controller sets the requested node to the requested state.
+ *
+ * @throws IOException if there was a problem communicating with the cluster controller
+ */
+ @Override
+ public ClusterControllerStateResponse setNodeState(final int storageNodeIndex, final ClusterControllerState wantedState) throws IOException {
+ final ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON);
+ final ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.SAFE);
+
+ try {
+ return clusterControllerApi.apply(api ->
+ api.setNodeState(clusterName, storageNodeIndex, stateRequest));
+ } catch (IOException e) {
+ final String message = String.format(
+ "Giving up setting %s for cluster %s",
+ stateRequest,
+ clusterName);
+
+ throw new IOException(message, e);
+ }
+ }
+
+ /**
+ * Requests that a cluster controller sets the requested node to the requested state.
+ *
+ * @throws IOException if there was a problem communicating with the cluster controller
+ */
+ @Override
+ public ClusterControllerStateResponse setApplicationState(final ClusterControllerState wantedState) throws IOException {
+ final ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON);
+ final ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.FORCE);
+
+ try {
+ return clusterControllerApi.apply(api -> api.setClusterState(clusterName,stateRequest));
+ } catch (IOException e) {
+ final String message = String.format(
+ "Giving up setting %s for cluster %s",
+ stateRequest,
+ clusterName);
+
+ throw new IOException(message, e);
+ }
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerJaxRsApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerJaxRsApi.java
new file mode 100644
index 00000000000..e49173d84bf
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerJaxRsApi.java
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.MediaType;
+
+/**
+ * @author hakon
+ */
+public interface ClusterControllerJaxRsApi {
+ @POST
+ @Path("/cluster/v2/{clusterName}/storage/{storageNodeIndex}")
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.APPLICATION_JSON)
+ ClusterControllerStateResponse setNodeState(
+ @PathParam("clusterName") String clusterName,
+ @PathParam("storageNodeIndex") int storageNodeIndex,
+ ClusterControllerStateRequest request);
+
+ @POST
+ @Path("/cluster/v2/{clusterName}/storage")
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.APPLICATION_JSON)
+ ClusterControllerStateResponse setClusterState(
+ @PathParam("clusterName") String clusterName,
+ ClusterControllerStateRequest request);
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java
new file mode 100644
index 00000000000..58f1ef32b10
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.fasterxml.jackson.annotation.JsonValue;
+
+/**
+ * This denotes the different states passable through the set-node-state API against the ClusterController.
+ * In the cluster controller, it maps to com.yahoo.vdslib.state.State. Consider using that instead (however
+ * that class is already fairly complicated, and may perhaps best be screened from JSON annotations - the only
+ * thing we need is the enum &lt; - &gt; String conversions).
+ *
+ * @author hakon
+ */
+public enum ClusterControllerState {
+ MAINTENANCE("maintenance"),
+ UP("up");
+
+ private final String wireName;
+
+ ClusterControllerState(final String wireName) {
+ this.wireName = wireName;
+ }
+
+ @JsonValue
+ public String getWireName() {
+ return wireName;
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java
new file mode 100644
index 00000000000..870ba79158c
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * @author hakon
+ */
+public class ClusterControllerStateRequest {
+
+ @JsonProperty("state")
+ public final Map<String, State> state;
+
+ @JsonProperty("condition")
+ public final Condition condition;
+
+ public ClusterControllerStateRequest(State currentState, Condition condition) {
+ Map<String, State> state = Collections.singletonMap("user", currentState);
+ this.state = Collections.unmodifiableMap(state);
+ this.condition = condition;
+ }
+
+ @Override
+ public boolean equals(Object object) {
+ if (!(object instanceof ClusterControllerStateRequest)) {
+ return false;
+ }
+
+ final ClusterControllerStateRequest that = (ClusterControllerStateRequest) object;
+ return Objects.equals(this.state, that.state)
+ && Objects.equals(this.condition, that.condition);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(state, condition);
+ }
+
+ @Override
+ public String toString() {
+ return "NodeStateRequest {"
+ + " condition=" + condition
+ + " state=" + state
+ + " }";
+ }
+
+ public static class State {
+ @JsonProperty("state")
+ public final ClusterControllerState state;
+
+ /**
+ * The reason the client is making the request to set the node state.
+ * Useful for logging in the Cluster Controller.
+ */
+ @JsonProperty("reason")
+ public final String reason;
+
+ public State(ClusterControllerState state, String reason) {
+ this.state = state;
+ this.reason = reason;
+ }
+
+ @Override
+ public boolean equals(Object object) {
+ if (!(object instanceof State)) {
+ return false;
+ }
+
+ State that = (State) object;
+ return this.state.equals(that.state) &&
+ this.reason.equals(that.reason);
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = 1;
+ hash = 17 * hash + state.hashCode();
+ hash = 13 * hash + reason.hashCode();
+ return hash;
+ }
+
+ @Override
+ public String toString() {
+ return "reason: " + reason.toString() + ", state: " + state.toString();
+ }
+ }
+
+ public enum Condition {
+ FORCE, SAFE;
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateResponse.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateResponse.java
new file mode 100644
index 00000000000..16d64566b8e
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateResponse.java
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * The response returned by the cluster controller's set-node-state APIs.
+ *
+ * @author hakon
+ */
+public class ClusterControllerStateResponse {
+ @JsonProperty("wasModified")
+ public final boolean wasModified;
+
+ @JsonProperty("reason")
+ public final String reason;
+
+ @JsonCreator
+ public ClusterControllerStateResponse(@JsonProperty("wasModified") boolean wasModified,
+ @JsonProperty("reason") String reason) {
+ this.wasModified = wasModified;
+ this.reason = reason;
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java
new file mode 100644
index 00000000000..2c432f00a28
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/RetryingClusterControllerClientFactory.java
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.google.inject.Inject;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory;
+import com.yahoo.vespa.jaxrs.client.JaxRsStrategy;
+import com.yahoo.vespa.jaxrs.client.JaxRsStrategyFactory;
+import com.yahoo.vespa.jaxrs.client.JerseyJaxRsClientFactory;
+
+import java.util.Collection;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a>
+ */
+public class RetryingClusterControllerClientFactory implements ClusterControllerClientFactory {
+ // TODO: Figure this port out dynamically.
+ public static final int HARDCODED_CLUSTERCONTROLLER_PORT = 19050;
+ public static final String CLUSTERCONTROLLER_API_PATH = "/";
+ private static final int CLUSTER_CONTROLLER_CONNECT_TIMEOUT_MS = 1000;
+ private static final int CLUSTER_CONTROLLER_READ_TIMEOUT_MS = 1000;
+
+ private JaxRsClientFactory jaxRsClientFactory;
+
+ @Inject
+ public RetryingClusterControllerClientFactory() {
+ this(new JerseyJaxRsClientFactory(
+ CLUSTER_CONTROLLER_CONNECT_TIMEOUT_MS,
+ CLUSTER_CONTROLLER_READ_TIMEOUT_MS));
+ }
+
+ public RetryingClusterControllerClientFactory(
+ final JaxRsClientFactory jaxRsClientFactory) {
+ this.jaxRsClientFactory = jaxRsClientFactory;
+ }
+
+ @Override
+ public ClusterControllerClient createClient(
+ final Collection<? extends ServiceInstance<?>> clusterControllers,
+ final String clusterName) {
+ final Set<HostName> hostNames = clusterControllers.stream()
+ .map(ServiceInstance::hostName)
+ .collect(Collectors.toSet());
+ final JaxRsStrategy<ClusterControllerJaxRsApi> jaxRsApi
+ = new JaxRsStrategyFactory(hostNames, HARDCODED_CLUSTERCONTROLLER_PORT, jaxRsClientFactory)
+ .apiWithRetries(ClusterControllerJaxRsApi.class, CLUSTERCONTROLLER_API_PATH);
+ return new ClusterControllerClientImpl(jaxRsApi, clusterName);
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java
new file mode 100644
index 00000000000..711d819e348
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactory.java
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory;
+import com.yahoo.vespa.jaxrs.client.JaxRsStrategy;
+import com.yahoo.vespa.jaxrs.client.NoRetryJaxRsStrategy;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.logging.Logger;
+
+import static com.yahoo.vespa.orchestrator.VespaModelUtil.getClusterControllerIndex;
+
+/**
+ * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a>
+ */
+public class SingleInstanceClusterControllerClientFactory implements ClusterControllerClientFactory {
+ public static final int CLUSTERCONTROLLER_HARDCODED_PORT = 19050;
+ public static final String CLUSTERCONTROLLER_API_PATH = "/";
+
+ private static final Logger log = Logger.getLogger(SingleInstanceClusterControllerClientFactory.class.getName());
+
+ private static final Comparator<ServiceInstance<?>> CLUSTER_CONTROLLER_INDEX_COMPARATOR = Comparator.comparing(
+ serviceInstance ->
+ getClusterControllerIndex(serviceInstance.configId()));
+
+ private JaxRsClientFactory jaxRsClientFactory;
+
+ public SingleInstanceClusterControllerClientFactory(
+ final JaxRsClientFactory jaxRsClientFactory) {
+ this.jaxRsClientFactory = jaxRsClientFactory;
+ }
+
+ @Override
+ public ClusterControllerClient createClient(
+ final Collection<? extends ServiceInstance<?>> clusterControllers,
+ final String clusterName) {
+ final ServiceInstance<?> serviceInstance = clusterControllers.stream()
+ .min(CLUSTER_CONTROLLER_INDEX_COMPARATOR)
+ .orElseThrow(() -> new IllegalArgumentException("No cluster controller instances found"));
+ final HostName controllerHostName = serviceInstance.hostName();
+ final int port = CLUSTERCONTROLLER_HARDCODED_PORT; // TODO: Get this from service monitor.
+
+ log.log(LogLevel.DEBUG, () ->
+ "For cluster '" + clusterName + "' with controllers " + clusterControllers
+ + ", creating api client for " + controllerHostName.s() + ":" + port);
+
+ final JaxRsStrategy<ClusterControllerJaxRsApi> strategy = new NoRetryJaxRsStrategy<>(
+ controllerHostName,
+ port,
+ jaxRsClientFactory,
+ ClusterControllerJaxRsApi.class,
+ CLUSTERCONTROLLER_API_PATH);
+
+ return new ClusterControllerClientImpl(strategy, clusterName);
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/package-info.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/package-info.java
new file mode 100644
index 00000000000..f426d9266b1
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/BatchHostStateChangeDeniedException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/BatchHostStateChangeDeniedException.java
new file mode 100644
index 00000000000..affe9557013
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/BatchHostStateChangeDeniedException.java
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.policy;
+
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.orchestrator.OrchestrationException;
+
+import java.util.List;
+
+public class BatchHostStateChangeDeniedException extends OrchestrationException {
+ public BatchHostStateChangeDeniedException(HostName parentHostname,
+ List<HostName> orderedHostNames,
+ HostStateChangeDeniedException e) {
+ super("Failed to suspend " + orderedHostNames + " with parent host "
+ + parentHostname + ": " + e.getMessage(), e);
+
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java
new file mode 100644
index 00000000000..6e12de9bfe7
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.policy;
+
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.orchestrator.OrchestrationException;
+
+/**
+ * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a>
+ */
+public class HostStateChangeDeniedException extends OrchestrationException {
+ private final String constraintName;
+ private final ServiceType serviceType;
+
+ public HostStateChangeDeniedException(
+ final HostName hostName,
+ final String constraintName,
+ final ServiceType serviceType,
+ final String message) {
+ super(createMessage(hostName, constraintName, serviceType, message));
+ this.constraintName = constraintName;
+ this.serviceType = serviceType;
+ }
+
+ public HostStateChangeDeniedException(
+ final HostName hostName,
+ final String constraintName,
+ final ServiceType serviceType,
+ final String message,
+ final Throwable cause) {
+ super(createMessage(hostName, constraintName, serviceType, message), cause);
+ this.constraintName = constraintName;
+ this.serviceType = serviceType;
+ }
+
+ private static String createMessage(final HostName hostName,
+ final String constraintName,
+ final ServiceType serviceType,
+ final String message) {
+ return "Changing the state of host " + hostName + " would violate " + constraintName
+ + " for service type " + serviceType + ": " + message;
+ }
+
+ public String getConstraintName() {
+ return constraintName;
+ }
+
+ public ServiceType getServiceType() {
+ return serviceType;
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
new file mode 100644
index 00000000000..88674301b21
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
@@ -0,0 +1,232 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.policy;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ClusterId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceCluster;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.orchestrator.VespaModelUtil;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerState;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getHostStatusMap;
+import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getHostsUsedByApplicationInstance;
+import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getServiceClustersUsingHost;
+
+/**
+ * @author oyving
+ */
+
+public class HostedVespaPolicy implements Policy {
+
+ public static final String ENOUGH_SERVICES_UP_CONSTRAINT = "enough-services-up";
+ public static final String SET_NODE_STATE_CONSTRAINT = "controller-set-node-state";
+ public static final String CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT = "controller-available";
+
+ private static final Logger log = Logger.getLogger(HostedVespaPolicy.class.getName());
+
+ private final ClusterControllerClientFactory clusterControllerClientFactory;
+
+ public HostedVespaPolicy(ClusterControllerClientFactory clusterControllerClientFactory) {
+ this.clusterControllerClientFactory = clusterControllerClientFactory;
+ }
+
+ private static long numContentServiceClusters(Set<? extends ServiceCluster<?>> serviceClustersOnHost) {
+ return serviceClustersOnHost.stream().filter(VespaModelUtil::isContent).count();
+ }
+
+
+ @Override
+ public void grantSuspensionRequest(
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ final HostName hostName,
+ final MutableStatusRegistry hostStatusService) throws HostStateChangeDeniedException {
+
+ Set<ServiceCluster<ServiceMonitorStatus>> serviceClustersOnHost =
+ getServiceClustersUsingHost(applicationInstance.serviceClusters(), hostName);
+
+ final Map<HostName, HostStatus> hostStatusMap = getHostStatusMap(
+ getHostsUsedByApplicationInstance(applicationInstance),
+ hostStatusService);
+
+ boolean hasUpStorageInstance = false;
+ for (final ServiceCluster<ServiceMonitorStatus> serviceCluster : serviceClustersOnHost) {
+ final Set<ServiceInstance<ServiceMonitorStatus>> instancesOnThisHost;
+ final Set<ServiceInstance<ServiceMonitorStatus>> instancesOnOtherHosts;
+ {
+ final Map<Boolean, Set<ServiceInstance<ServiceMonitorStatus>>> serviceInstancesByLocality =
+ serviceCluster.serviceInstances().stream()
+ .collect(
+ Collectors.groupingBy(
+ instance -> instance.hostName().equals(hostName),
+ Collectors.toSet()));
+ instancesOnThisHost = serviceInstancesByLocality.getOrDefault(true, Collections.emptySet());
+ instancesOnOtherHosts = serviceInstancesByLocality.getOrDefault(false, Collections.emptySet());
+ }
+
+ if (VespaModelUtil.isStorage(serviceCluster)) {
+ final boolean thisHostHasSomeUpInstances = instancesOnThisHost.stream()
+ .map(ServiceInstance::serviceStatus)
+ .anyMatch(status -> status == ServiceMonitorStatus.UP);
+ if (thisHostHasSomeUpInstances) {
+ hasUpStorageInstance = true;
+ }
+ }
+
+ final boolean thisHostHasOnlyDownInstances = instancesOnThisHost.stream()
+ .map(ServiceInstance::serviceStatus)
+ .allMatch(status -> status == ServiceMonitorStatus.DOWN);
+ if (thisHostHasOnlyDownInstances) {
+ // Suspending this host will not make a difference for this cluster, so no need to investigate further.
+ continue;
+ }
+
+ final Set<ServiceInstance<ServiceMonitorStatus>> possiblyDownInstancesOnOtherHosts =
+ instancesOnOtherHosts.stream()
+ .filter(instance -> effectivelyDown(instance, hostStatusMap))
+ .collect(Collectors.toSet());
+ if (possiblyDownInstancesOnOtherHosts.isEmpty()) {
+ // This short-circuits the percentage calculation below and ensures that we can always upgrade
+ // any cluster by allowing one host at the time to be suspended, no matter what percentage of
+ // the cluster that host amounts to.
+ continue;
+ }
+
+ // Now calculate what the service suspension percentage will be if we suspend this host.
+ final int numServiceInstancesTotal = serviceCluster.serviceInstances().size();
+ final int numInstancesThatWillBeSuspended = union(possiblyDownInstancesOnOtherHosts, instancesOnThisHost).size();
+ final int percentThatWillBeSuspended = numInstancesThatWillBeSuspended * 100 / numServiceInstancesTotal;
+ final int suspendPercentageAllowed = ServiceClusterSuspendPolicy.getSuspendPercentageAllowed(serviceCluster);
+ if (percentThatWillBeSuspended > suspendPercentageAllowed) {
+ // It may seem like this may in some cases prevent upgrading, especially for small clusters (where the
+ // percentage of service instances affected by suspending a single host may easily exceed the allowed
+ // suspension percentage). Note that we always allow progress by allowing a single host to suspend.
+ // See previous section.
+ final int currentSuspensionPercentage
+ = possiblyDownInstancesOnOtherHosts.size() * 100 / numServiceInstancesTotal;
+ final Set<HostName> otherHostsWithThisServiceCluster = instancesOnOtherHosts.stream()
+ .map(ServiceInstance::hostName)
+ .collect(Collectors.toSet());
+ final Set<HostName> hostsAllowedToBeDown = hostStatusMap.entrySet().stream()
+ .filter(entry -> entry.getValue() == HostStatus.ALLOWED_TO_BE_DOWN)
+ .map(Map.Entry::getKey)
+ .collect(Collectors.toSet());
+ final Set<HostName> otherHostsAllowedToBeDown
+ = intersection(otherHostsWithThisServiceCluster, hostsAllowedToBeDown);
+ throw new HostStateChangeDeniedException(
+ hostName,
+ ENOUGH_SERVICES_UP_CONSTRAINT,
+ serviceCluster.serviceType(),
+ "Suspension percentage would increase from " + currentSuspensionPercentage
+ + "% to " + percentThatWillBeSuspended
+ + "%, over the limit of " + suspendPercentageAllowed + "%."
+ + " These instances may be down: " + possiblyDownInstancesOnOtherHosts
+ + " and these hosts are allowed to be down: " + otherHostsAllowedToBeDown
+ );
+ }
+ }
+
+ if (hasUpStorageInstance) {
+ // If there is an UP storage service on the host, we need to make sure
+ // there's sufficient redundancy before allowing the suspension. This will
+ // also avoid redistribution (which is unavoidable if the storage instance
+ // is already down).
+ setNodeStateInController(applicationInstance, hostName, ClusterControllerState.MAINTENANCE);
+ }
+
+
+ // We have "go" for suspending the services on the host,store decision.
+ hostStatusService.setHostState(hostName, HostStatus.ALLOWED_TO_BE_DOWN);
+ log.log(LogLevel.INFO, hostName + " is now allowed to be down (suspended)");
+ }
+
+ private static <T> Set<T> union(final Set<T> setA, Set<T> setB) {
+ final Set<T> union = new HashSet<>(setA);
+ union.addAll(setB);
+ return union;
+ }
+
+ private static <T> Set<T> intersection(final Set<T> setA, Set<T> setB) {
+ final Set<T> intersection = new HashSet<>(setA);
+ intersection.retainAll(setB);
+ return intersection;
+ }
+
+ @Override
+ public void releaseSuspensionGrant(
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ final HostName hostName,
+ final MutableStatusRegistry hostStatusService) throws HostStateChangeDeniedException {
+ Set<ServiceCluster<ServiceMonitorStatus>> serviceClustersOnHost =
+ getServiceClustersUsingHost(applicationInstance.serviceClusters(), hostName);
+
+ // TODO: Always defer to Cluster Controller whether it's OK to resume host (if content node).
+ if (numContentServiceClusters(serviceClustersOnHost) > 0) {
+ setNodeStateInController(applicationInstance, hostName, ClusterControllerState.UP);
+ }
+ hostStatusService.setHostState(hostName, HostStatus.NO_REMARKS);
+ log.log(LogLevel.INFO, hostName + " is no longer allowed to be down (resumed)");
+ }
+
+ private static boolean effectivelyDown(
+ final ServiceInstance<ServiceMonitorStatus> serviceInstance,
+ final Map<HostName, HostStatus> hostStatusMap) {
+ final ServiceMonitorStatus instanceStatus = serviceInstance.serviceStatus();
+ final HostStatus hostStatus = hostStatusMap.get(serviceInstance.hostName());
+ return hostStatus == HostStatus.ALLOWED_TO_BE_DOWN || instanceStatus == ServiceMonitorStatus.DOWN;
+ }
+
+ private void setNodeStateInController(
+ ApplicationInstance<?> application,
+ HostName hostName,
+ ClusterControllerState nodeState) throws HostStateChangeDeniedException {
+ ClusterId contentClusterId = VespaModelUtil.getContentClusterName(application, hostName);
+ Set<? extends ServiceInstance<?>> clusterControllers = VespaModelUtil.getClusterControllerInstances(application, contentClusterId);
+ final ClusterControllerClient client = clusterControllerClientFactory.createClient(
+ clusterControllers,
+ contentClusterId.s());
+ int nodeIndex = VespaModelUtil.getStorageNodeIndex(application, hostName);
+
+ log.log(LogLevel.DEBUG,
+ "application " + application.applicationInstanceId() +
+ ", host " + hostName +
+ ", cluster name " + contentClusterId +
+ ", node index " + nodeIndex +
+ ", node state " + nodeState);
+
+ final ClusterControllerStateResponse response;
+ try {
+ response = client.setNodeState(nodeIndex, nodeState);
+ } catch (IOException e) {
+ throw new HostStateChangeDeniedException(
+ hostName,
+ CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT,
+ VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE,
+ "Failed to communicate with cluster controllers " + clusterControllers,
+ e);
+ }
+
+ if (!response.wasModified) {
+ throw new HostStateChangeDeniedException(
+ hostName,
+ SET_NODE_STATE_CONSTRAINT,
+ VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE,
+ "Failed to set state to " + nodeState + " in controller: " + response.reason);
+ }
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java
new file mode 100644
index 00000000000..53992436ea5
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.policy;
+
+import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+
+/**
+ * @author oyving
+ */
+public interface Policy {
+ /**
+ * Decide whether to grant a request for temporarily suspending the services on a host.
+ *
+ * @throws HostStateChangeDeniedException if the grant was not given.
+ */
+ void grantSuspensionRequest(
+ ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ HostName hostName,
+ MutableStatusRegistry hostStatusService) throws HostStateChangeDeniedException;
+
+ /**
+ * Release an earlier grant for suspension.
+ *
+ * @throws HostStateChangeDeniedException if the release failed.
+ */
+ void releaseSuspensionGrant(
+ ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ HostName hostName,
+ MutableStatusRegistry hostStatusService) throws HostStateChangeDeniedException;
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ServiceClusterSuspendPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ServiceClusterSuspendPolicy.java
new file mode 100644
index 00000000000..b2fdd7878d3
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ServiceClusterSuspendPolicy.java
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.policy;
+
+import com.yahoo.vespa.orchestrator.VespaModelUtil;
+import com.yahoo.vespa.applicationmodel.ServiceCluster;
+
+/**
+ * @author hakon
+ * @author bakksjo
+ */
+public final class ServiceClusterSuspendPolicy {
+ private static final int SUSPENSION_ALLOW_MINIMAL = 0;
+ private static final int SUSPENSION_ALLOW_TEN_PERCENT = 10;
+ private static final int SUSPENSION_ALLOW_ALL = 100;
+
+ private ServiceClusterSuspendPolicy() {} // Disallow instantiation.
+
+ public static int getSuspendPercentageAllowed(final ServiceCluster<?> serviceCluster) {
+ if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(serviceCluster.clusterId())) {
+ if (VespaModelUtil.SLOBROK_SERVICE_TYPE.equals(serviceCluster.serviceType())) {
+ return SUSPENSION_ALLOW_MINIMAL;
+ }
+
+ return SUSPENSION_ALLOW_ALL;
+ }
+
+ if (VespaModelUtil.isStorage(serviceCluster)) {
+ return SUSPENSION_ALLOW_MINIMAL;
+ }
+
+ return SUSPENSION_ALLOW_TEN_PERCENT;
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResource.java
new file mode 100644
index 00000000000..f3d87db5d20
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResource.java
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.resources;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.container.jaxrs.annotation.Component;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.orchestrator.ApplicationIdNotFoundException;
+import com.yahoo.vespa.orchestrator.ApplicationStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.OrchestratorImpl;
+import com.yahoo.vespa.orchestrator.restapi.ApplicationSuspensionApi;
+import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
+
+import javax.inject.Inject;
+import javax.ws.rs.BadRequestException;
+import javax.ws.rs.InternalServerErrorException;
+import javax.ws.rs.NotFoundException;
+import javax.ws.rs.Path;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.Response;
+import java.util.Set;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+/**
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+@Path(ApplicationSuspensionApi.PATH_PREFIX)
+public class ApplicationSuspensionResource implements ApplicationSuspensionApi {
+ private static final Logger log = Logger.getLogger(ApplicationSuspensionResource.class.getName());
+
+ private final OrchestratorImpl orchestrator;
+
+ @Inject
+ public ApplicationSuspensionResource(
+ @Component OrchestratorImpl orchestrator) {
+ this.orchestrator = orchestrator;
+ }
+
+ @Override
+ public Set<String> getApplications() {
+ Set<ApplicationId> refs = orchestrator.getAllSuspendedApplications();
+ return refs.stream().map(ApplicationId::serializedForm).collect(Collectors.toSet());
+ }
+
+ @Override
+ public void getApplication(String applicationIdString) {
+ ApplicationId appId = toApplicationId(applicationIdString);
+ ApplicationInstanceStatus status;
+
+ try {
+ status = orchestrator.getApplicationInstanceStatus(appId);
+ } catch (ApplicationIdNotFoundException e) {
+ throw new NotFoundException("Application " + applicationIdString + " could not be found");
+ }
+
+ if (status.equals(ApplicationInstanceStatus.NO_REMARKS)) {
+ throw new NotFoundException("Application " + applicationIdString + " is not suspended");
+ }
+
+ // Return void as we have nothing to return except 204 No
+ // Content. Unfortunately, Jersey outputs a warning for this case:
+ //
+ // The following warnings have been detected: HINT: A HTTP GET
+ // method, public void com.yahoo.vespa.orchestrator.resources.
+ // ApplicationSuspensionResource.getApplication(java.lang.String),
+ // returns a void type. It can be intentional and perfectly fine,
+ // but it is a little uncommon that GET method returns always "204
+ // No Content"
+ //
+ // We have whitelisted the warning for our systemtests.
+ //
+ // bakksjo has a pending jersey PR fix that avoids making the hint
+ // become a warning:
+ // https://github.com/jersey/jersey/pull/212
+ //
+ // TODO: Remove whitelisting and this comment once jersey has been
+ // fixed.
+ }
+
+ @Override
+ public void suspend(String applicationIdString) {
+ ApplicationId applicationId = toApplicationId(applicationIdString);
+ try {
+ orchestrator.suspend(applicationId);
+ } catch (ApplicationIdNotFoundException e) {
+ log.log(LogLevel.INFO, "ApplicationId " + applicationIdString + " not found.", e);
+ throw new NotFoundException(e);
+ } catch (ApplicationStateChangeDeniedException e) {
+ log.log(LogLevel.INFO, "Suspend for " + applicationIdString + " failed.", e);
+ throw new WebApplicationException(Response.Status.CONFLICT);
+ } catch (RuntimeException e) {
+ log.log(LogLevel.INFO, "Suspend for " + applicationIdString + " failed from unknown reasons", e);
+ throw new InternalServerErrorException(e);
+ }
+ }
+
+ @Override
+ public void resume(String applicationIdString) {
+ ApplicationId applicationId = toApplicationId(applicationIdString);
+ try {
+ orchestrator.resume(applicationId);
+ } catch (ApplicationIdNotFoundException e) {
+ log.log(LogLevel.INFO, "ApplicationId " + applicationIdString + " not found.", e);
+ throw new NotFoundException(e);
+ } catch (ApplicationStateChangeDeniedException e) {
+ log.log(LogLevel.INFO, "Suspend for " + applicationIdString + " failed.", e);
+ throw new WebApplicationException(Response.Status.CONFLICT);
+ } catch (RuntimeException e) {
+ log.log(LogLevel.INFO, "Suspend for " + applicationIdString + " failed from unknown reasons", e);
+ throw new InternalServerErrorException(e);
+ }
+ }
+
+ private ApplicationId toApplicationId(String applicationIdString) {
+ try {
+ return ApplicationId.fromSerializedForm(null, applicationIdString);
+ } catch (IllegalArgumentException e) {
+ throw new BadRequestException(e);
+ }
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java
new file mode 100644
index 00000000000..63689037a12
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.resources;
+
+import com.yahoo.container.jaxrs.annotation.Component;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.orchestrator.HostNameNotFoundException;
+import com.yahoo.vespa.orchestrator.Orchestrator;
+import com.yahoo.vespa.orchestrator.OrchestratorImpl;
+import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.restapi.wire.GetHostResponse;
+import com.yahoo.vespa.orchestrator.restapi.HostApi;
+import com.yahoo.vespa.orchestrator.restapi.wire.HostStateChangeDenialReason;
+import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.applicationmodel.HostName;
+import org.apache.commons.lang.exception.ExceptionUtils;
+
+import java.util.logging.Logger;
+
+import javax.inject.Inject;
+import javax.ws.rs.NotFoundException;
+import javax.ws.rs.Path;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+
+/**
+ * @author oyving
+ */
+@Path(HostApi.PATH_PREFIX)
+public class HostResource implements HostApi {
+ private static final Logger log = Logger.getLogger(HostResource.class.getName());
+
+ private final Orchestrator orchestrator;
+
+ @Inject
+ public HostResource(@Component Orchestrator orchestrator) {
+ this.orchestrator = orchestrator;
+ }
+
+ @Override
+ public GetHostResponse getHost(final String hostNameString) {
+ final HostName hostName = new HostName(hostNameString);
+ try {
+ HostStatus status = orchestrator.getNodeStatus(hostName);
+ return new GetHostResponse(hostName.s(), status.name());
+ } catch (HostNameNotFoundException e) {
+ throw new NotFoundException(e);
+ }
+ }
+
+ @Override
+ public UpdateHostResponse suspend(final String hostNameString) {
+ final HostName hostName = new HostName(hostNameString);
+ try {
+ orchestrator.suspend(hostName);
+ } catch (HostNameNotFoundException e) {
+ throw new NotFoundException(e);
+ } catch (HostStateChangeDeniedException e) {
+ log.log(LogLevel.DEBUG, "Suspend for " + hostName + " failed.", e);
+ throw webExceptionWithDenialReason(hostName, e);
+ }
+ return new UpdateHostResponse(hostName.s(), null);
+ }
+
+ @Override
+ public UpdateHostResponse resume(final String hostNameString) {
+ final HostName hostName = new HostName(hostNameString);
+ try {
+ orchestrator.resume(hostName);
+ } catch (HostNameNotFoundException e) {
+ throw new NotFoundException(e);
+ } catch (HostStateChangeDeniedException e) {
+ log.log(LogLevel.DEBUG, "Resume for " + hostName + " failed.", e);
+ throw webExceptionWithDenialReason(hostName, e);
+ }
+ return new UpdateHostResponse(hostName.s(), null);
+ }
+
+ private static WebApplicationException webExceptionWithDenialReason(HostName hostName, HostStateChangeDeniedException e) {
+ final HostStateChangeDenialReason hostStateChangeDenialReason = new HostStateChangeDenialReason(
+ e.getConstraintName(), e.getServiceType().s(), e.getMessage());
+ final UpdateHostResponse response = new UpdateHostResponse(hostName.s(), hostStateChangeDenialReason);
+ return new WebApplicationException(
+ hostStateChangeDenialReason.toString(),
+ e,
+ Response.status(Response.Status.CONFLICT)
+ .entity(response)
+ .type(MediaType.APPLICATION_JSON_TYPE)
+ .build());
+
+ }
+}
+
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostSuspensionResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostSuspensionResource.java
new file mode 100644
index 00000000000..855217c298a
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostSuspensionResource.java
@@ -0,0 +1,77 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.resources;
+
+import com.yahoo.container.jaxrs.annotation.Component;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException;
+import com.yahoo.vespa.orchestrator.BatchInternalErrorException;
+import com.yahoo.vespa.orchestrator.Orchestrator;
+import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.restapi.HostSuspensionApi;
+import com.yahoo.vespa.orchestrator.restapi.wire.BatchHostSuspendRequest;
+import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult;
+
+import javax.inject.Inject;
+import javax.ws.rs.Path;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import java.util.List;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+@Path(HostSuspensionApi.PATH_PREFIX)
+public class HostSuspensionResource implements HostSuspensionApi {
+ private static final Logger log = Logger.getLogger(HostSuspensionResource.class.getName());
+
+ private final Orchestrator orchestrator;
+
+ @Inject
+ public HostSuspensionResource(@Component Orchestrator orchestrator) {
+ this.orchestrator = orchestrator;
+ }
+
+ @Override
+ public BatchOperationResult suspendAll(BatchHostSuspendRequest request) throws WebApplicationException {
+ if (!request.getParentHostname().isPresent()) {
+ String message = "parentHostname missing in request: " + request;
+ log.log(LogLevel.DEBUG, message);
+ throw createWebApplicationException(message, Response.Status.BAD_REQUEST);
+ }
+ HostName parentHostname = new HostName(request.getParentHostname().get());
+
+ if (!request.getHostnames().isPresent()) {
+ String message = "hostnames missing in request: " + request;
+ log.log(LogLevel.DEBUG, message);
+ throw createWebApplicationException(message, Response.Status.BAD_REQUEST);
+ }
+ List<HostName> hostNames = request.getHostnames().get().stream().map(HostName::new).collect(Collectors.toList());
+
+ try {
+ orchestrator.suspendAll(parentHostname, hostNames);
+ } catch (BatchHostStateChangeDeniedException e) {
+ log.log(LogLevel.DEBUG, "Failed to suspend nodes " + hostNames + " with parent host " + parentHostname, e);
+ throw createWebApplicationException(e.getMessage(), Response.Status.CONFLICT);
+ } catch (BatchHostNameNotFoundException e) {
+ log.log(LogLevel.DEBUG, "Failed to suspend nodes " + hostNames + " with parent host " + parentHostname, e);
+ // Note that we're returning BAD_REQUEST instead of NOT_FOUND because the resource identified
+ // by the URL path was found. It's one of the hostnames in the request it failed to find.
+ throw createWebApplicationException(e.getMessage(), Response.Status.BAD_REQUEST);
+ } catch (BatchInternalErrorException e) {
+ log.log(LogLevel.DEBUG, "Failed to suspend nodes " + hostNames + " with parent host " + parentHostname, e);
+ throw createWebApplicationException(e.getMessage(), Response.Status.INTERNAL_SERVER_ERROR);
+ }
+
+ log.log(LogLevel.DEBUG, "Suspended " + hostNames + " with parent " + parentHostname);
+ return BatchOperationResult.successResult();
+ }
+
+ private WebApplicationException createWebApplicationException(String errorMessage, Response.Status status) {
+ return new WebApplicationException(
+ Response.status(status)
+ .entity(new BatchOperationResult(errorMessage))
+ .type(MediaType.APPLICATION_JSON_TYPE)
+ .build());
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java
new file mode 100644
index 00000000000..8b3862207b9
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java
@@ -0,0 +1,81 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.resources;
+
+import com.yahoo.container.jaxrs.annotation.Component;
+import com.yahoo.vespa.orchestrator.InstanceLookupService;
+import com.yahoo.vespa.orchestrator.OrchestratorUtil;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.orchestrator.status.StatusService;
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+
+import javax.inject.Inject;
+import javax.ws.rs.GET;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+
+import java.util.Map;
+import java.util.Set;
+
+import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getHostStatusMap;
+import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getHostsUsedByApplicationInstance;
+import static com.yahoo.vespa.orchestrator.OrchestratorUtil.parseAppInstanceReference;
+
+/**
+ * Provides a read-only API for looking into the current state as seen by the Orchestrator.
+ * This API can be unstable and is not meant to be used programmatically.
+ *
+ * @author andreer
+ * @author bakksjo
+ */
+@Path("/v1/instances")
+public class InstanceResource {
+
+ private final StatusService statusService;
+ private final InstanceLookupService instanceLookupService;
+
+ @Inject
+ public InstanceResource(
+ @Component final InstanceLookupService instanceLookupService,
+ @Component final StatusService statusService) {
+ this.instanceLookupService = instanceLookupService;
+ this.statusService = statusService;
+ }
+
+ @GET
+ @Produces(MediaType.APPLICATION_JSON)
+ public Set<ApplicationInstanceReference> getAllInstances() {
+ return instanceLookupService.knownInstances();
+ }
+
+ @GET
+ @Path("/{instanceId}")
+ @Produces(MediaType.APPLICATION_JSON)
+ public InstanceStatusResponse getInstance(@PathParam("instanceId") String instanceIdString) {
+ final ApplicationInstanceReference instanceId;
+ try {
+ instanceId = parseAppInstanceReference(instanceIdString);
+ } catch (IllegalArgumentException e) {
+ throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).build());
+ }
+
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance
+ = instanceLookupService.findInstanceById(instanceId)
+ .orElseThrow(() -> new WebApplicationException(Response.status(Response.Status.NOT_FOUND).build()));
+
+ final Set<HostName> hostsUsedByApplicationInstance = getHostsUsedByApplicationInstance(applicationInstance);
+ final Map<HostName, HostStatus> hostStatusMap = getHostStatusMap(
+ hostsUsedByApplicationInstance,
+ statusService.forApplicationInstance(instanceId));
+ final Map<HostName, String> hostStatusStringMap = OrchestratorUtil.mapValues(
+ hostStatusMap,
+ HostStatus::name);
+ return InstanceStatusResponse.create(applicationInstance, hostStatusStringMap);
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceStatusResponse.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceStatusResponse.java
new file mode 100644
index 00000000000..c38b8ac0437
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceStatusResponse.java
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.resources;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.auto.value.AutoValue;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+
+import java.util.Map;
+
+/*
+ * @author andreer
+ */
+@AutoValue
+public abstract class InstanceStatusResponse {
+
+ @JsonProperty("applicationInstance")
+ public abstract ApplicationInstance<ServiceMonitorStatus> applicationInstance();
+
+ @JsonProperty("hostStates")
+ public abstract Map<HostName, String> hostStates();
+
+ public static InstanceStatusResponse create(
+ ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ Map<HostName, String> hostStates) {
+ return new AutoValue_InstanceStatusResponse(applicationInstance, hostStates);
+ }
+
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ApplicationInstanceStatus.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ApplicationInstanceStatus.java
new file mode 100644
index 00000000000..9c53fb5f5ef
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ApplicationInstanceStatus.java
@@ -0,0 +1,20 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+/**
+ * Enumeration of the orchestrator status on the application level.
+ *
+ * The naming and conventions follows the same pattern as with the HostStatus and is:
+ *
+ * When the node is suspended - the orchestration state is 'allowed to be down'. The application
+ * is not necessarily suspended pr. se but it is allowed to start suspending - or is back up from suspension
+ * but the flag is not revoked yet.
+ *
+ * @see HostStatus
+ * @author andreer
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public enum ApplicationInstanceStatus {
+ NO_REMARKS,
+ ALLOWED_TO_BE_DOWN
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java
new file mode 100644
index 00000000000..f7dc183ae92
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+/**
+ * Enumeration of the different status' a host can have.
+ *
+ * @author oyving
+ */
+public enum HostStatus {
+ NO_REMARKS,
+ ALLOWED_TO_BE_DOWN;
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java
new file mode 100644
index 00000000000..cbe5295799b
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/InMemoryStatusService.java
@@ -0,0 +1,120 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+/**
+ * Implementation of the StatusService interface for testing.
+ *
+ * @author oyving
+ */
+public class InMemoryStatusService implements StatusService {
+
+ private final Map<HostName, HostStatus> hostServiceStatus = new HashMap<>();
+ private final Set<ApplicationInstanceReference> applicationStatus = new HashSet<>();
+ private final LockService<ApplicationInstanceReference> instanceLockService = new LockService<>();
+
+ private void setHostStatus(
+ HostName hostName,
+ HostStatus status) {
+
+ hostServiceStatus.put(hostName, status);
+ }
+
+ @Override
+ public ReadOnlyStatusRegistry forApplicationInstance(
+ final ApplicationInstanceReference applicationInstanceReference) {
+ return new ReadOnlyStatusRegistry() {
+ @Override
+ public HostStatus getHostStatus(final HostName hostName) {
+ return hostServiceStatus.getOrDefault(hostName, HostStatus.NO_REMARKS);
+ }
+
+ @Override
+ public ApplicationInstanceStatus getApplicationInstanceStatus() {
+ return applicationStatus.contains(applicationInstanceReference) ?
+ ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN : ApplicationInstanceStatus.NO_REMARKS;
+ }
+ };
+ }
+
+ @Override
+ public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(
+ final ApplicationInstanceReference applicationInstanceReference) {
+ final Lock lock = instanceLockService.get(applicationInstanceReference);
+ return new InMemoryMutableStatusRegistry(lock, applicationInstanceReference);
+ }
+
+ @Override
+ public Set<ApplicationInstanceReference> getAllSuspendedApplications() {
+ return applicationStatus;
+ }
+
+ private class InMemoryMutableStatusRegistry implements MutableStatusRegistry {
+ private final Lock lockHandle;
+ private final ApplicationInstanceReference ref;
+
+ public InMemoryMutableStatusRegistry(final Lock lockHandle,
+ final ApplicationInstanceReference ref) {
+ this.lockHandle = lockHandle;
+ this.ref = ref;
+ }
+
+ @Override
+ public void setHostState(HostName hostName, HostStatus status) {
+ setHostStatus(hostName, status);
+ }
+
+ @Override
+ public void setApplicationInstanceStatus(ApplicationInstanceStatus applicationInstanceStatus) {
+ if (applicationInstanceStatus == ApplicationInstanceStatus.NO_REMARKS) {
+ applicationStatus.remove(ref);
+ } else {
+ applicationStatus.add(ref);
+ }
+ }
+
+ @Override
+ public HostStatus getHostStatus(HostName hostName) {
+ return hostServiceStatus.getOrDefault(hostName, HostStatus.NO_REMARKS);
+ }
+
+ @Override
+ public ApplicationInstanceStatus getApplicationInstanceStatus() {
+ return applicationStatus.contains(ref) ? ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN :
+ ApplicationInstanceStatus.NO_REMARKS;
+ }
+
+ @Override
+ public void close() {
+ //lockHandle.unlock(); TODO this casues illeal state monitor exception - how to use it properly
+ }
+ }
+
+ private static class LockService<T> {
+ private final Map<T, Lock> locks;
+
+ public LockService() {
+ this.locks = new HashMap<>();
+ }
+
+ public Lock get(T lockId) {
+ synchronized (this) {
+ Lock lock = locks.computeIfAbsent(
+ lockId,
+ id -> new ReentrantLock()
+ );
+
+ return lock;
+ }
+ }
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java
new file mode 100644
index 00000000000..dc46a352fe7
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.vespa.applicationmodel.HostName;
+
+/**
+ * Registry of the suspension and host statuses for an application instance.
+ *
+ * @author oyving
+ * @author tonytv
+ * @author bakksjo
+ */
+public interface MutableStatusRegistry extends ReadOnlyStatusRegistry, AutoCloseable {
+ /**
+ * Sets the state for the given host.
+ */
+ void setHostState(HostName hostName, HostStatus status);
+
+ /**
+ * Sets the orchestration status for the application instance.
+ */
+ void setApplicationInstanceStatus(ApplicationInstanceStatus applicationInstanceStatus);
+
+ /**
+ * We don't want {@link AutoCloseable#close()} to throw an exception (what to do about it anyway?),
+ * so we override it here to strip the exception from the signature.
+ */
+ @Override
+ @NoThrow
+ void close();
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/NoThrow.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/NoThrow.java
new file mode 100644
index 00000000000..0b465ea0846
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/NoThrow.java
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Used to annotate methods that do not throw Exceptions.
+ * They are still allowed to throw Errors, such as AssertionError
+ *
+ * TODO: move to vespajlib or find a suitable replacement
+ * @author tonytv
+ */
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.SOURCE)
+@interface NoThrow {}
+
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ReadOnlyStatusRegistry.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ReadOnlyStatusRegistry.java
new file mode 100644
index 00000000000..c8f9bbf208b
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ReadOnlyStatusRegistry.java
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.vespa.applicationmodel.HostName;
+
+/**
+ * Read-only view of statuses for the application instance and its hosts.
+ *
+ * @author oyving
+ * @author tonytv
+ * @author bakksjo
+ */
+public interface ReadOnlyStatusRegistry {
+ /**
+ * Gets the current state for the given host.
+ */
+ HostStatus getHostStatus(HostName hostName);
+
+ /**
+ * Gets the current status for the application instance.
+ */
+ ApplicationInstanceStatus getApplicationInstanceStatus();
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
new file mode 100644
index 00000000000..927b2f9fb6a
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+
+import java.util.Set;
+
+/**
+ * Service that can produce registries for the suspension of an application
+ * and its hosts.
+ *
+ * The registry classes are pr application instance.
+ * TODO Remove readonly registry class (replace with actual methods) - only adds complexity.
+ *
+ * @author oyving
+ * @author tonytv
+ * @author smorgrav
+ */
+public interface StatusService {
+ /**
+ * Returns a readable host status registry for the given application instance. No locking is involved,
+ * so this call will never block. However, since it is possible that mutations are going on simultaneously
+ * with accessing this registry, the view obtained through the returned registry must be considered to be
+ * possibly inconsistent snapshot values. It is not recommended that this method is used for anything other
+ * than monitoring, logging, debugging, etc. It should never be used for multi-step operations (e.g.
+ * read-then-write) where consistency is required. For those cases, use
+ * {@link #lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference)}.
+ */
+ ReadOnlyStatusRegistry forApplicationInstance(ApplicationInstanceReference applicationInstanceReference);
+
+ /**
+ * Returns a mutable host status registry for a locked application instance. All operations performed on
+ * the returned registry are executed in the context of a lock, including read operations. Hence, multi-step
+ * operations (e.g. read-then-write) are guaranteed to be consistent.
+ *
+ * Some limitations/caveats apply for certain implementations, and since clients of this API must be aware of
+ * these limitations/caveats when using those implementations, they are expressed here, at interface level
+ * rather than at implementation level, because the interface represents the lowest common denominator
+ * of guarantees offered by implementations. Specifically, it is the zookeeper-based implementation's semantics
+ * that "leak through" in this spec. Now, to the specific caveats:
+ *
+ * Locking this application instance only guarantees that the holder is the only one that can mutate host statuses
+ * for the application instance.
+ * It is _not_ safe to assume that there is only one entity holding the lock for a given application instance
+ * reference at any given time.
+ *
+ * You cannot have multiple locks in a single thread, even if they are for different application instances,
+ * (i.e. different HostStatusRegistry instances). (This is due to a limitation in SessionFailRetryLoop.)
+ *
+ * While read-then-write-operations are consistent (i.e. the current value doesn't change between the read
+ * and the write), it is possible that the lock is lost before it is explicitly released by the code. In
+ * this case, subsequent mutating operations will fail, but previous mutating operations are NOT rolled back.
+ * This may leave the registry in an inconsistent state (as judged by the client code).
+ */
+ MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(ApplicationInstanceReference applicationInstanceReference);
+
+ /**
+ * Returns all application instances that are allowed to be down. The intention is to use this
+ * for visualization, informational and debugging purposes.
+ *
+ * @return A Map between the application instance and its status.
+ */
+ Set<ApplicationInstanceReference> getAllSuspendedApplications();
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
new file mode 100644
index 00000000000..d9df5800ee8
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
@@ -0,0 +1,373 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.container.jaxrs.annotation.Component;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.curator.Curator;
+import com.yahoo.vespa.orchestrator.OrchestratorUtil;
+import org.apache.curator.SessionFailRetryLoop;
+import org.apache.curator.SessionFailRetryLoop.Mode;
+import org.apache.curator.SessionFailRetryLoop.SessionFailedException;
+import org.apache.curator.framework.CuratorFramework;
+import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreMutex;
+import org.apache.zookeeper.KeeperException.NoNodeException;
+import org.apache.zookeeper.KeeperException.NodeExistsException;
+import org.apache.zookeeper.data.Stat;
+
+import javax.annotation.concurrent.GuardedBy;
+import javax.inject.Inject;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.logging.Logger;
+
+/**
+ * Stores instance suspension status and which hosts are allowed to go down in zookeeper.
+ *
+ * TODO: expiry of old application instances
+ * @author tonytv
+ */
+public class ZookeeperStatusService implements StatusService {
+ private static final Logger log = Logger.getLogger(ZookeeperStatusService.class.getName());
+
+ //For debug purposes only: Used to check that operations depending on a lock is done from a single thread,
+ //and that a threads doing operations actually owns a corresponding lock,
+ //and that a single thread only owns a single lock (across all ZookeeperStatusServices)
+ @GuardedBy("threadsHoldingLock")
+ private static final Map<Thread, ApplicationInstanceReference> threadsHoldingLock = new HashMap<>();
+
+ final static String HOST_STATUS_BASE_PATH = "/vespa/host-status-service";
+ final static String APPLICATION_STATUS_BASE_PATH = "/vespa/application-status-service";
+
+ private final CuratorFramework curatorFramework;
+
+ @Inject
+ public ZookeeperStatusService(@Component Curator curator) {
+ this(curator.framework());
+ }
+
+ /**
+ * Called via public constructor on directly on testing.
+ */
+ ZookeeperStatusService(CuratorFramework curatorFramework) {
+ this.curatorFramework = curatorFramework;
+ }
+
+ @Override
+ public ReadOnlyStatusRegistry forApplicationInstance(
+ final ApplicationInstanceReference applicationInstanceReference) {
+ return new ReadOnlyStatusRegistry() {
+ @Override
+ public HostStatus getHostStatus(final HostName hostName) {
+ return getInternalHostStatus(applicationInstanceReference, hostName);
+ }
+
+ @Override
+ public ApplicationInstanceStatus getApplicationInstanceStatus() {
+ return getInternalApplicationInstanceStatus(applicationInstanceReference);
+ }
+ };
+ }
+
+ /**
+ * 1) locks the status service for an application instance.
+ * 2) fails all operations in this thread when the session is lost,
+ * since session loss might cause the lock to be lost.
+ * Since it only fails operations in this thread,
+ * all operations depending on a lock, including the locking itself, must be done in this thread.
+ * Note that since it is the thread that fails, all status operations in this thread will fail
+ * even if they're not supposed to be guarded by this lock
+ * (i.e. the request is for another applicationInstanceReference)
+ */
+ @Override
+ public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(
+ final ApplicationInstanceReference applicationInstanceReference) {
+ return lockApplicationInstance_forCurrentThreadOnly(applicationInstanceReference, 10, TimeUnit.SECONDS);
+ }
+
+ @Override
+ public Set<ApplicationInstanceReference> getAllSuspendedApplications() {
+ try {
+ Set<ApplicationInstanceReference> resultSet = new HashSet<>();
+
+ // Return empty set if the base path does not exist
+ Stat stat = curatorFramework.checkExists().forPath(APPLICATION_STATUS_BASE_PATH);
+ if (stat == null) return resultSet;
+
+ // The path exist and we may have children
+ for (String appRefStr : curatorFramework.getChildren().forPath(APPLICATION_STATUS_BASE_PATH)) {
+ ApplicationInstanceReference appRef = OrchestratorUtil.parseAppInstanceReference(appRefStr);
+ resultSet.add(appRef);
+ }
+
+ return resultSet;
+ } catch (Exception e) {
+ log.log(LogLevel.DEBUG, "Something went wrong while listing out applications in suspend.", e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(
+ ApplicationInstanceReference applicationInstanceReference,
+ long timeout,
+ TimeUnit timeoutTimeUnit) {
+
+ final Thread currentThread = Thread.currentThread();
+
+ //Due to limitations in SessionFailRetryLoop.
+ assertThreadDoesNotHoldLock(currentThread,
+ "Can't lock " + applicationInstanceReference);
+
+ try {
+ SessionFailRetryLoop sessionFailRetryLoop =
+ curatorFramework.getZookeeperClient().newSessionFailRetryLoop(Mode.FAIL);
+ sessionFailRetryLoop.start();
+ try {
+ String lockPath = applicationInstanceLockPath(applicationInstanceReference);
+ InterProcessSemaphoreMutex mutex = acquireMutexOrThrow(timeout, timeoutTimeUnit, lockPath);
+
+ synchronized (threadsHoldingLock) {
+ threadsHoldingLock.put(currentThread, applicationInstanceReference);
+ }
+
+ return new ZkMutableStatusRegistry(mutex, sessionFailRetryLoop, applicationInstanceReference, currentThread);
+ } catch (Throwable t) {
+ sessionFailRetryLoop.close();
+ throw t;
+ }
+ } catch (Exception e) {
+ //TODO: IOException with explanation
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void assertThreadDoesNotHoldLock(Thread currentThread, String message) {
+ synchronized (threadsHoldingLock) {
+ if (threadsHoldingLock.containsKey(currentThread)) {
+ throw new AssertionError(message + ", already have a lock on " + threadsHoldingLock.get(currentThread));
+ }
+ }
+ }
+
+ private InterProcessSemaphoreMutex acquireMutexOrThrow(long timeout, TimeUnit timeoutTimeUnit, String lockPath) throws Exception {
+ InterProcessSemaphoreMutex mutex = new InterProcessSemaphoreMutex(curatorFramework, lockPath);
+
+ log.log(LogLevel.DEBUG, "Waiting for lock on " + lockPath);
+ boolean acquired = mutex.acquire(timeout, timeoutTimeUnit);
+ if (!acquired) {
+ log.log(LogLevel.DEBUG, "Timed out waiting for lock on " + lockPath);
+ throw new TimeoutException();
+ }
+ log.log(LogLevel.DEBUG, "Successfully acquired lock on " + lockPath);
+ return mutex;
+ }
+
+ private void setHostStatus(
+ ApplicationInstanceReference applicationInstanceReference,
+ HostName hostName,
+ HostStatus status) {
+ assertThreadHoldsLock(applicationInstanceReference);
+
+ String path = hostAllowedDownPath(applicationInstanceReference, hostName);
+
+ try {
+ switch (status) {
+ case NO_REMARKS:
+ deleteNode_ignoreNoNodeException(path,
+ "Host already has state NO_REMARKS, path = " + path);
+ break;
+ case ALLOWED_TO_BE_DOWN:
+ createNode_ignoreNodeExistsException(path,
+ "Host already has state ALLOWED_TO_BE_DOWN, path = " + path);
+ }
+ } catch (Exception e) {
+ //TODO: IOException with explanation
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static void assertThreadHoldsLock(ApplicationInstanceReference applicationInstanceReference) {
+ synchronized (threadsHoldingLock) {
+ ApplicationInstanceReference lockedApplicationInstanceReference =
+ threadsHoldingLock.get(Thread.currentThread());
+
+ if (lockedApplicationInstanceReference == null) {
+ throw new AssertionError("The current thread does not own any status service locks. " +
+ "Application Instance = " + applicationInstanceReference);
+ }
+
+ if (!lockedApplicationInstanceReference.equals(applicationInstanceReference)) {
+ throw new AssertionError("The current thread does not have a lock on " +
+ "application instance " + applicationInstanceReference +
+ ", but instead have a lock on " + lockedApplicationInstanceReference);
+ }
+ }
+ }
+
+ private void deleteNode_ignoreNoNodeException(String path, String debugLogMessageIfNotExists) throws Exception {
+ try {
+ curatorFramework.delete().forPath(path);
+ } catch (NoNodeException e) {
+ log.log(LogLevel.DEBUG, debugLogMessageIfNotExists, e);
+ }
+ }
+
+ private void createNode_ignoreNodeExistsException(String path, String debugLogMessageIfExists) throws Exception {
+ try {
+ curatorFramework.create()
+ .creatingParentsIfNeeded()
+ .forPath(path);
+ } catch (NodeExistsException e) {
+ log.log(LogLevel.DEBUG, debugLogMessageIfExists, e);
+ }
+ }
+
+ //TODO: Eliminate repeated calls to getHostStatus, replace with bulk operation.
+ private HostStatus getInternalHostStatus(ApplicationInstanceReference applicationInstanceReference, HostName hostName) {
+ try {
+ Stat statOrNull = curatorFramework.checkExists().forPath(
+ hostAllowedDownPath(applicationInstanceReference, hostName));
+
+ return (statOrNull == null) ? HostStatus.NO_REMARKS : HostStatus.ALLOWED_TO_BE_DOWN;
+ } catch (Exception e) {
+ //TODO: IOException with explanation - Should we only catch IOExceptions or are they a special case?
+ throw new RuntimeException(e);
+ }
+ }
+
+ /** Common implementation for the two internal classes that sets ApplicationInstanceStatus. */
+ private ApplicationInstanceStatus getInternalApplicationInstanceStatus(ApplicationInstanceReference applicationInstanceReference) {
+ try {
+ Stat statOrNull = curatorFramework.checkExists().forPath(
+ applicationInstanceSuspendedPath(applicationInstanceReference));
+
+ return (statOrNull == null) ? ApplicationInstanceStatus.NO_REMARKS : ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private HostStatus getHostStatusWithLock(
+ final ApplicationInstanceReference applicationInstanceReference,
+ final HostName hostName) {
+ assertThreadHoldsLock(applicationInstanceReference);
+ return getInternalHostStatus(applicationInstanceReference, hostName);
+ }
+
+ private static String applicationInstancePath(ApplicationInstanceReference applicationInstanceReference) {
+ return HOST_STATUS_BASE_PATH + '/' +
+ applicationInstanceReference.tenantId() + ":" + applicationInstanceReference.applicationInstanceId();
+ }
+
+ private static String hostsAllowedDownPath(ApplicationInstanceReference applicationInstanceReference) {
+ return applicationInstancePath(applicationInstanceReference) + "/hosts-allowed-down";
+ }
+
+ private static String applicationInstanceLockPath(ApplicationInstanceReference applicationInstanceReference) {
+ return applicationInstancePath(applicationInstanceReference) + "/lock";
+ }
+
+ private String applicationInstanceSuspendedPath(ApplicationInstanceReference applicationInstanceReference) {
+ return APPLICATION_STATUS_BASE_PATH + "/" + OrchestratorUtil.toRestApiFormat(applicationInstanceReference);
+ }
+
+ private static String hostAllowedDownPath(ApplicationInstanceReference applicationInstanceReference, HostName hostname) {
+ return hostsAllowedDownPath(applicationInstanceReference) + '/' + hostname.s();
+ }
+
+ private class ZkMutableStatusRegistry implements MutableStatusRegistry {
+ private final InterProcessSemaphoreMutex mutex;
+ private final SessionFailRetryLoop sessionFailRetryLoop;
+ private final ApplicationInstanceReference applicationInstanceReference;
+ private final Thread lockingThread;
+
+ public ZkMutableStatusRegistry(
+ InterProcessSemaphoreMutex mutex,
+ SessionFailRetryLoop sessionFailRetryLoop,
+ ApplicationInstanceReference applicationInstanceReference,
+ Thread lockingThread) {
+
+ this.mutex = mutex;
+ this.sessionFailRetryLoop = sessionFailRetryLoop;
+ this.applicationInstanceReference = applicationInstanceReference;
+ this.lockingThread = lockingThread;
+ }
+
+ @Override
+ public void setHostState(final HostName hostName, final HostStatus status) {
+ setHostStatus(applicationInstanceReference, hostName, status);
+ }
+
+ @Override
+ public void setApplicationInstanceStatus(ApplicationInstanceStatus applicationInstanceStatus) {
+ assertThreadHoldsLock(applicationInstanceReference);
+
+ String path = applicationInstanceSuspendedPath(applicationInstanceReference);
+
+ try {
+ switch (applicationInstanceStatus) {
+ case NO_REMARKS:
+ deleteNode_ignoreNoNodeException(path,
+ "Instance is already in state NO_REMARKS, path = " + path);
+ break;
+ case ALLOWED_TO_BE_DOWN:
+ createNode_ignoreNodeExistsException(path,
+ "Instance is already in state ALLOWED_TO_BE_DOWN, path = " + path);
+ break;
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public HostStatus getHostStatus(final HostName hostName) {
+ return getHostStatusWithLock(applicationInstanceReference, hostName);
+ }
+
+ @Override
+ public ApplicationInstanceStatus getApplicationInstanceStatus() {
+ return getInternalApplicationInstanceStatus(applicationInstanceReference);
+ }
+
+ @Override
+ @NoThrow
+ public void close() {
+ synchronized (threadsHoldingLock) {
+ threadsHoldingLock.remove(lockingThread, applicationInstanceReference);
+ }
+
+ try {
+ mutex.release();
+ } catch (Exception e) {
+ if (e.getCause() instanceof SessionFailedException) {
+ log.log(LogLevel.DEBUG, "Session expired, mutex should be freed automatically", e);
+ } else {
+ //Failing to unlock the mutex should not fail the request,
+ //since the status database has already been updated at this point.
+ log.log(LogLevel.WARNING, "Failed unlocking application instance " + applicationInstanceReference, e);
+ }
+ }
+
+ //Similar precondition checked in sessionFailRetryLoop.close,
+ //but this has more useful debug output.
+ if (lockingThread != Thread.currentThread()) {
+ throw new AssertionError("LockHandle should only be used from a single thread. "
+ + "Application instance = " + applicationInstanceReference
+ + " Locking thread = " + lockingThread
+ + " Current thread = " + Thread.currentThread());
+ }
+
+ try {
+ sessionFailRetryLoop.close();
+ } catch (Exception e) {
+ log.log(LogLevel.ERROR, "Failed closing SessionRetryLoop", e);
+ }
+ }
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/package-info.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/package-info.java
new file mode 100644
index 00000000000..55dfdf9573d
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/orchestrator/src/main/resources/configdefinitions/orchestrator.def b/orchestrator/src/main/resources/configdefinitions/orchestrator.def
new file mode 100644
index 00000000000..4d569e0f55c
--- /dev/null
+++ b/orchestrator/src/main/resources/configdefinitions/orchestrator.def
@@ -0,0 +1,5 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+namespace=vespa.orchestrator
+
+# TODO: Change the default to actual latency in real setup.
+serviceMonitorConvergenceLatencySeconds int default=0
diff --git a/orchestrator/src/test/application/services.xml b/orchestrator/src/test/application/services.xml
new file mode 100644
index 00000000000..8c68f7d0b50
--- /dev/null
+++ b/orchestrator/src/test/application/services.xml
@@ -0,0 +1,16 @@
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<services>
+ <jdisc version="1.0" jetty="true">
+ <config name="container.handler.threadpool">
+ <maxthreads>10</maxthreads>
+ </config>
+ <component id="com.yahoo.vespa.orchestrator.status.InMemoryStatusService" bundle="orchestrator" />
+ <component id="com.yahoo.vespa.orchestrator.DummyInstanceLookupService" bundle="orchestrator" />
+ <component id="com.yahoo.vespa.orchestrator.OrchestratorImpl" bundle="orchestrator" />
+ <component id="com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock" bundle="orchestrator" />
+
+ <rest-api path="orchestrator" jersey2="true">
+ <components bundle="orchestrator" />
+ </rest-api>
+ </jdisc>
+</services>
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyInstanceLookupService.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyInstanceLookupService.java
new file mode 100644
index 00000000000..e1b73c1fe65
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyInstanceLookupService.java
@@ -0,0 +1,166 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.ClusterId;
+import com.yahoo.vespa.applicationmodel.ConfigId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceCluster;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.applicationmodel.TenantId;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * A hardcoded set of applications with one storage cluster with two nodes each.
+ *
+ * @author oyving
+ * @author smorgrav
+ */
+public class DummyInstanceLookupService implements InstanceLookupService {
+
+ public static final HostName TEST1_HOST_NAME = new HostName("test1.prod.utpoia-1.vespahosted.ut1.yahoo.com");
+ public static final HostName TEST3_HOST_NAME = new HostName("test3.prod.utpoia-1.vespahosted.ut1.yahoo.com");
+ public static final HostName TEST6_HOST_NAME = new HostName("test6.prod.us-east-1.vespahosted.ne1.yahoo.com");
+
+ private static final Set<ApplicationInstance<ServiceMonitorStatus>> apps = new HashSet<>();
+
+
+ static {
+ apps.add(new ApplicationInstance<>(
+ new TenantId("test-tenant-id"),
+ new ApplicationInstanceId("application:instance"),
+ TestUtil.makeServiceClusterSet(
+ new ServiceCluster<>(
+ new ClusterId("test-cluster-id-1"),
+ new ServiceType("storagenode"),
+ TestUtil.makeServiceInstanceSet(
+ new ServiceInstance<>(
+ new ConfigId("storage/storage/1"),
+ TEST1_HOST_NAME,
+ ServiceMonitorStatus.UP),
+ new ServiceInstance<>(
+ new ConfigId("storage/storage/2"),
+ new HostName("test2.prod.utpoia-1.vespahosted.ut1.yahoo.com"),
+ ServiceMonitorStatus.UP))),
+ new ServiceCluster<>(
+ new ClusterId("clustercontroller"),
+ new ServiceType("container-clustercontroller"),
+ TestUtil.makeServiceInstanceSet(
+ new ServiceInstance<>(
+ new ConfigId("clustercontroller-1"),
+ new HostName("myclustercontroller.prod.utopia-1.vespahosted.ut1.yahoo.com"),
+ ServiceMonitorStatus.UP)))
+
+ )
+ ));
+
+ apps.add(new ApplicationInstance<>(
+ new TenantId("mediasearch"),
+ new ApplicationInstanceId("imagesearch:default"),
+ TestUtil.makeServiceClusterSet(
+ new ServiceCluster<>(
+ new ClusterId("image"),
+ new ServiceType("storagenode"),
+ TestUtil.makeServiceInstanceSet(
+ new ServiceInstance<>(
+ new ConfigId("storage/storage/3"),
+ TEST3_HOST_NAME,
+ ServiceMonitorStatus.UP),
+ new ServiceInstance<>(
+ new ConfigId("storage/storage/4"),
+ new HostName("test4.prod.utpoia-1.vespahosted.ut1.yahoo.com"),
+ ServiceMonitorStatus.UP))),
+ new ServiceCluster<>(
+ new ClusterId("clustercontroller"),
+ new ServiceType("container-clustercontroller"),
+ TestUtil.makeServiceInstanceSet(
+ new ServiceInstance<>(
+ new ConfigId("clustercontroller-1"),
+ new HostName("myclustercontroller2.prod.utopia-1.vespahosted.ut1.yahoo.com"),
+ ServiceMonitorStatus.UP)))
+ )
+ )
+ );
+
+ apps.add(new ApplicationInstance<>(
+ new TenantId("tenant-id-3"),
+ new ApplicationInstanceId("application-instance-3:default"),
+ TestUtil.makeServiceClusterSet(
+ new ServiceCluster<>(
+ new ClusterId("cluster-id-3"),
+ new ServiceType("storagenode"),
+ TestUtil.makeServiceInstanceSet(
+ new ServiceInstance<>(
+ new ConfigId("storage/storage/1"),
+ TEST6_HOST_NAME,
+ ServiceMonitorStatus.UP),
+ new ServiceInstance<>(
+ new ConfigId("storage/storage/4"),
+ new HostName("test4.prod.utpoia-1.vespahosted.ut1.yahoo.com"),
+ ServiceMonitorStatus.UP))),
+ new ServiceCluster<>(
+ new ClusterId("clustercontroller"),
+ new ServiceType("container-clustercontroller"),
+ TestUtil.makeServiceInstanceSet(
+ new ServiceInstance<>(
+ new ConfigId("clustercontroller-1"),
+ new HostName("myclustercontroller3.prod.utopia-1.vespahosted.ut1.yahoo.com"),
+ ServiceMonitorStatus.UP)))
+ )
+ ));
+ }
+
+
+ @Override
+ public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceById(
+ final ApplicationInstanceReference applicationInstanceReference) {
+ for (ApplicationInstance<ServiceMonitorStatus> app : apps) {
+ if (app.reference().equals(applicationInstanceReference)) return Optional.of(app);
+ }
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceByHost(HostName hostName) {
+ for (ApplicationInstance<ServiceMonitorStatus> app : apps) {
+ for (ServiceCluster<ServiceMonitorStatus> cluster : app.serviceClusters()) {
+ for (ServiceInstance<ServiceMonitorStatus> service : cluster.serviceInstances()) {
+ if (hostName.equals(service.hostName())) return Optional.of(app);
+ }
+ }
+ }
+ return Optional.empty();
+ }
+
+ @Override
+ public Set<ApplicationInstanceReference> knownInstances() {
+ return apps.stream().map(a ->
+ new ApplicationInstanceReference(a.tenantId(),a.applicationInstanceId())).collect(Collectors.toSet());
+
+ }
+
+ public static Set<HostName> getContentHosts(ApplicationInstanceReference appRef) {
+ Set<HostName> hosts = apps.stream()
+ .filter(application -> application.reference().equals(appRef))
+ .flatMap(appReference -> appReference.serviceClusters().stream())
+ .filter(VespaModelUtil::isContent)
+ .flatMap(serviceCluster -> serviceCluster.serviceInstances().stream())
+ .map(ServiceInstance::hostName)
+ .collect(Collectors.toSet());
+
+ return hosts;
+ }
+
+ public static Set<ApplicationInstance<ServiceMonitorStatus>> getApplications() {
+ return apps;
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
new file mode 100644
index 00000000000..5c1d9bd45be
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
@@ -0,0 +1,300 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock;
+import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.orchestrator.status.InMemoryStatusService;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.InOrder;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import static com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN;
+import static com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus.NO_REMARKS;
+import static org.hamcrest.collection.IsEmptyCollection.empty;
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.core.IsCollectionContaining.hasItem;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.inOrder;
+import static org.mockito.Mockito.spy;
+
+/**
+ * Test Orchestrator with a mock backend (the InMemoryStatusService)
+ *
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public class OrchestratorImplTest {
+
+ private ApplicationId app1;
+ private ApplicationId app2;
+ private HostName app1_host1;
+
+ private OrchestratorImpl orchestrator;
+ private ClusterControllerClientFactoryMock clustercontroller;
+
+ @Before
+ public void setUp() throws Exception {
+ // Extract applications and hosts from dummy instance lookup service
+ Iterator<ApplicationInstance<ServiceMonitorStatus>> iterator = DummyInstanceLookupService.getApplications().iterator();
+ ApplicationInstanceReference app1_ref = iterator.next().reference();
+ app1 = OrchestratorUtil.toApplicationId(app1_ref);
+ app1_host1 = DummyInstanceLookupService.getContentHosts(app1_ref).iterator().next();
+ app2 = OrchestratorUtil.toApplicationId(iterator.next().reference());
+
+ clustercontroller = new ClusterControllerClientFactoryMock();
+ orchestrator = new OrchestratorImpl(
+ clustercontroller,
+ new InMemoryStatusService(),
+ new OrchestratorConfig(new OrchestratorConfig.Builder()),
+ new DummyInstanceLookupService());
+
+ clustercontroller.setAllDummyNodesAsUp();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ orchestrator = null;
+ clustercontroller = null;
+ }
+
+ @Test
+ public void application_has_initially_no_remarks() throws Exception {
+ assertThat(orchestrator.getApplicationInstanceStatus(app1), is(NO_REMARKS));
+ }
+
+ @Test
+ public void application_can_be_set_in_suspend() throws Exception {
+ orchestrator.suspend(app1);
+ assertThat(orchestrator.getApplicationInstanceStatus(app1), is(ALLOWED_TO_BE_DOWN));
+ }
+
+ @Test
+ public void application_can_be_removed_from_suspend() throws Exception {
+ orchestrator.suspend(app1);
+ orchestrator.resume(app1);
+ assertThat(orchestrator.getApplicationInstanceStatus(app1), is(NO_REMARKS));
+ }
+
+ @Test
+ public void appliations_list_returns_empty_initially() throws Exception {
+ assertThat(orchestrator.getAllSuspendedApplications(), is(empty()));
+ }
+
+ @Test
+ public void appliations_list_returns_suspended_apps() throws Exception {
+ // One suspended app
+ orchestrator.suspend(app1);
+ assertThat(orchestrator.getAllSuspendedApplications().size(), is(1));
+ assertThat(orchestrator.getAllSuspendedApplications(), hasItem(app1));
+
+ // Two suspended apps
+ orchestrator.suspend(app2);
+ assertThat(orchestrator.getAllSuspendedApplications().size(), is(2));
+ assertThat(orchestrator.getAllSuspendedApplications(), hasItem(app1));
+ assertThat(orchestrator.getAllSuspendedApplications(), hasItem(app2));
+
+ // Back to one when resetting one app to no_remarks
+ orchestrator.resume(app1);
+ assertThat(orchestrator.getAllSuspendedApplications().size(), is(1));
+ assertThat(orchestrator.getAllSuspendedApplications(), hasItem(app2));
+ }
+
+
+ @Test
+ public void application_operations_are_idempotent() throws Exception {
+ // Two suspends
+ orchestrator.suspend(app1);
+ orchestrator.suspend(app1);
+ assertThat(orchestrator.getApplicationInstanceStatus(app1), is(ALLOWED_TO_BE_DOWN));
+ assertThat(orchestrator.getApplicationInstanceStatus(app2), is(NO_REMARKS));
+
+ // Three no_remarks
+ orchestrator.resume(app1);
+ orchestrator.resume(app1);
+ orchestrator.resume(app1);
+ assertThat(orchestrator.getApplicationInstanceStatus(app1), is(NO_REMARKS));
+ assertThat(orchestrator.getApplicationInstanceStatus(app2), is(NO_REMARKS));
+
+ // Two suspends and two on two applications interleaved
+ orchestrator.suspend(app2);
+ orchestrator.resume(app1);
+ orchestrator.suspend(app2);
+ orchestrator.resume(app1);
+ assertThat(orchestrator.getApplicationInstanceStatus(app1), is(NO_REMARKS));
+ assertThat(orchestrator.getApplicationInstanceStatus(app2), is(ALLOWED_TO_BE_DOWN));
+ }
+
+
+ @Test
+ public void application_suspend_sets_application_nodes_in_maintenance_and_allowed_to_be_down() throws Exception {
+ // Pre condition
+ assertEquals(NO_REMARKS, orchestrator.getApplicationInstanceStatus(app1));
+ assertEquals(HostStatus.NO_REMARKS, orchestrator.getNodeStatus(app1_host1));
+ assertFalse(isInMaintenance(app1, app1_host1));
+
+ orchestrator.suspend(app1);
+
+ assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1));
+ assertTrue(isInMaintenance(app1, app1_host1));
+ }
+
+ @Test
+ public void node_suspend_while_app_is_resumed_set_allowed_to_be_down_and_set_it_in_maintenance() throws Exception {
+ // Pre condition
+ assertEquals(NO_REMARKS, orchestrator.getApplicationInstanceStatus(app1));
+ assertEquals(HostStatus.NO_REMARKS, orchestrator.getNodeStatus(app1_host1));
+ assertFalse(isInMaintenance(app1, app1_host1));
+
+ orchestrator.suspend(app1_host1);
+
+ assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1));
+ assertTrue(isInMaintenance(app1, app1_host1));
+ }
+
+ @Test
+ public void node_suspend_while_app_is_suspended_does_nothing() throws Exception {
+ // Pre condition
+ orchestrator.suspend(app1);
+ assertEquals(ALLOWED_TO_BE_DOWN, orchestrator.getApplicationInstanceStatus(app1));
+ assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1));
+ assertTrue(isInMaintenance(app1, app1_host1));
+
+ orchestrator.suspend(app1_host1);
+
+ // Should not change anything
+ assertEquals(ALLOWED_TO_BE_DOWN, orchestrator.getApplicationInstanceStatus(app1));
+ assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1));
+ assertTrue(isInMaintenance(app1, app1_host1));
+ }
+
+ @Test
+ public void node_resume_after_app_is_resumed_removes_allowed_be_down_and_set_it_up() throws Exception {
+ // Pre condition
+ orchestrator.suspend(app1);
+ assertEquals(ALLOWED_TO_BE_DOWN, orchestrator.getApplicationInstanceStatus(app1));
+ assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1));
+ assertTrue(isInMaintenance(app1, app1_host1));
+
+ orchestrator.resume(app1);
+ orchestrator.resume(app1_host1);
+
+ assertEquals(HostStatus.NO_REMARKS, orchestrator.getNodeStatus(app1_host1));
+ assertFalse(isInMaintenance(app1, app1_host1));
+ }
+
+ @Test
+ public void node_resume_while_app_is_suspended_does_nothing() throws Exception {
+ orchestrator.suspend(app1_host1);
+ orchestrator.suspend(app1);
+
+ orchestrator.resume(app1_host1);
+
+ assertEquals(HostStatus.ALLOWED_TO_BE_DOWN, orchestrator.getNodeStatus(app1_host1));
+ assertTrue(isInMaintenance(app1, app1_host1));
+ }
+
+ @Test
+ public void applicationReferenceHasTenantAndAppInstance() {
+ InstanceLookupService service = new DummyInstanceLookupService();
+ String applicationInstanceId = service.findInstanceByHost(DummyInstanceLookupService.TEST1_HOST_NAME).get()
+ .reference().toString();
+ assertEquals("test-tenant-id:application:instance", applicationInstanceId);
+ }
+
+ @Test
+ public void sortHostNamesForSuspend() throws Exception {
+ HostName parentHostName = new HostName("parentHostName");
+ List<HostName> expectedOrder = Arrays.asList(
+ DummyInstanceLookupService.TEST3_HOST_NAME,
+ DummyInstanceLookupService.TEST1_HOST_NAME);
+
+ assertEquals(expectedOrder, orchestrator.sortHostNamesForSuspend(Arrays.asList(
+ DummyInstanceLookupService.TEST1_HOST_NAME,
+ DummyInstanceLookupService.TEST3_HOST_NAME)));
+
+ assertEquals(expectedOrder, orchestrator.sortHostNamesForSuspend(Arrays.asList(
+ DummyInstanceLookupService.TEST3_HOST_NAME,
+ DummyInstanceLookupService.TEST1_HOST_NAME)));
+ }
+
+ @Test
+ public void rollbackWorks() throws Exception {
+ // A spy is preferential because suspendAll() relies on delegating the hard work to suspend() and resume().
+ OrchestratorImpl orchestrator = spy(this.orchestrator);
+
+ doNothing().when(orchestrator).suspend(DummyInstanceLookupService.TEST3_HOST_NAME);
+
+ Throwable supensionFailure = new HostStateChangeDeniedException(
+ DummyInstanceLookupService.TEST6_HOST_NAME,
+ "some-constraint",
+ new ServiceType("foo"),
+ "error message");
+ doThrow(supensionFailure).when(orchestrator).suspend(DummyInstanceLookupService.TEST1_HOST_NAME);
+
+ doThrow(new HostStateChangeDeniedException(DummyInstanceLookupService.TEST1_HOST_NAME, "foo1-constraint", new ServiceType("foo1-service"), "foo1-message"))
+ .when(orchestrator).resume(DummyInstanceLookupService.TEST1_HOST_NAME);
+ doNothing().when(orchestrator).resume(DummyInstanceLookupService.TEST6_HOST_NAME);
+ doNothing().when(orchestrator).resume(DummyInstanceLookupService.TEST3_HOST_NAME);
+
+ try {
+ orchestrator.suspendAll(
+ new HostName("parentHostname"),
+ Arrays.asList(
+ DummyInstanceLookupService.TEST1_HOST_NAME,
+ DummyInstanceLookupService.TEST3_HOST_NAME,
+ DummyInstanceLookupService.TEST6_HOST_NAME));
+ fail();
+ } catch (BatchHostStateChangeDeniedException e) {
+ assertEquals(e.getSuppressed().length, 1);
+ assertEquals("Failed to suspend [test3.prod.utpoia-1.vespahosted.ut1.yahoo.com, " +
+ "test6.prod.us-east-1.vespahosted.ne1.yahoo.com, test1.prod.utpoia-1.vespahosted.ut1.yahoo.com] " +
+ "with parent host parentHostname: Changing the state of host " +
+ "test6.prod.us-east-1.vespahosted.ne1.yahoo.com would violate some-constraint for " +
+ "service type foo: error message; " +
+ "With suppressed throwable com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException: " +
+ "Changing the state of host test1.prod.utpoia-1.vespahosted.ut1.yahoo.com would violate " +
+ "foo1-constraint for service type foo1-service: foo1-message", e.getMessage());
+ }
+
+ InOrder order = inOrder(orchestrator);
+ order.verify(orchestrator).suspend(DummyInstanceLookupService.TEST3_HOST_NAME);
+ order.verify(orchestrator).suspend(DummyInstanceLookupService.TEST6_HOST_NAME);
+
+ // As of 2016-06-07:
+ // TEST1_HOST_NAME: test-tenant-id:application:instance
+ // TEST3_HOST_NAME: mediasearch:imagesearch:default
+ // TEST6_HOST_NAME: tenant-id-3:application-instance-3:default
+ // Meaning the order is 3, 6, then 1. For rollback/resume the order is reversed.
+ order.verify(orchestrator).resume(DummyInstanceLookupService.TEST1_HOST_NAME);
+ order.verify(orchestrator).resume(DummyInstanceLookupService.TEST6_HOST_NAME);
+ order.verify(orchestrator).resume(DummyInstanceLookupService.TEST3_HOST_NAME);
+ order.verifyNoMoreInteractions();
+ }
+
+ private boolean isInMaintenance(ApplicationId appId, HostName hostName) throws ApplicationIdNotFoundException {
+ for (ApplicationInstance<ServiceMonitorStatus> app : DummyInstanceLookupService.getApplications()) {
+ if (app.reference().equals(OrchestratorUtil.toApplicationInstanceReference(appId))) {
+ return clustercontroller.isInMaintenance(app, hostName);
+ }
+ }
+ throw new ApplicationIdNotFoundException();
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorUtilTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorUtilTest.java
new file mode 100644
index 00000000000..0626bf72e60
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorUtilTest.java
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ApplicationName;
+import com.yahoo.config.provision.InstanceName;
+import com.yahoo.config.provision.TenantName;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.TenantId;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public class OrchestratorUtilTest {
+
+ private static final ApplicationId APPID_1 = ApplicationId.from(
+ TenantName.from("mediasearch"),
+ ApplicationName.from("tumblr-search"),
+ InstanceName.defaultName());
+
+ private static final ApplicationInstanceReference APPREF_1 = new ApplicationInstanceReference(
+ new TenantId("test-tenant"),
+ new ApplicationInstanceId("test-application:test-environment:test-region:test-instance-key"));
+
+ /**
+ * Here we don't care how the internal of the different application
+ * id/reference look like as long as we get back to exactly where we
+ * started from a round trip. I.e I'm not testing validity of the
+ * different representations.
+ */
+ @Test
+ public void applicationid_conversion_are_symmetric() throws Exception {
+
+ // From appId to appRef and back
+ ApplicationInstanceReference appRef = OrchestratorUtil.toApplicationInstanceReference(APPID_1);
+ ApplicationId appIdRoundTrip = OrchestratorUtil.toApplicationId(appRef);
+
+ Assert.assertEquals(APPID_1, appIdRoundTrip);
+
+ // From appRef to appId and back
+ ApplicationId appId = OrchestratorUtil.toApplicationId(APPREF_1);
+ ApplicationInstanceReference appRefRoundTrip = OrchestratorUtil.toApplicationInstanceReference(appId);
+
+ Assert.assertEquals(APPREF_1, appRefRoundTrip);
+ }
+} \ No newline at end of file
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestIds.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestIds.java
new file mode 100644
index 00000000000..e9d8b498f32
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestIds.java
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.TenantId;
+
+/**
+ * @author tonytv
+ */
+public class TestIds {
+ public static final ApplicationInstanceReference APPLICATION_INSTANCE_REFERENCE =
+ new ApplicationInstanceReference(
+ new TenantId("test-tenant"),
+ new ApplicationInstanceId("test-application:test-environment:test-region:test-instance-key"));
+
+ public static final ApplicationInstanceReference APPLICATION_INSTANCE_REFERENCE2 =
+ new ApplicationInstanceReference(
+ new TenantId("test-tenant2"),
+ new ApplicationInstanceId("test-application2:test-environment:test-region:test-instance-key"));
+
+ public static final HostName HOST_NAME1 = new HostName("host1.test.corp.yahoo.com");
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java
new file mode 100644
index 00000000000..244ddfc858d
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.vespa.applicationmodel.ConfigId;
+import com.yahoo.vespa.applicationmodel.ServiceCluster;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Utility methods for creating test setups.
+ *
+ * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a>
+ */
+public class TestUtil {
+ @SafeVarargs
+ public static <S> Set<ServiceInstance<S>> makeServiceInstanceSet(
+ final ServiceInstance<S>... serviceInstances) {
+ return new HashSet<>(Arrays.asList(serviceInstances));
+ }
+
+ @SafeVarargs
+ public static <S> Set<ServiceCluster<S>> makeServiceClusterSet(
+ final ServiceCluster<S>... serviceClusters) {
+ return new HashSet<>(Arrays.asList(serviceClusters));
+ }
+
+ public static ConfigId storageNodeConfigId(int index) {
+ return new ConfigId("storage/storage/" + index);
+ }
+
+ public static ConfigId clusterControllerConfigId(int index) {
+ return new ConfigId("admin/cluster-controllers/" + index);
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/VespaModelUtilTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/VespaModelUtilTest.java
new file mode 100644
index 00000000000..ab533528cd4
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/VespaModelUtilTest.java
@@ -0,0 +1,223 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ClusterId;
+import com.yahoo.vespa.applicationmodel.ConfigId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceCluster;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.applicationmodel.TenantId;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+import org.junit.Test;
+
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.Set;
+
+import static com.google.common.collect.Sets.newHashSet;
+import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceClusterSet;
+import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceInstanceSet;
+import static junit.framework.TestCase.assertFalse;
+import static junit.framework.TestCase.assertTrue;
+import static org.fest.assertions.Assertions.assertThat;
+
+/**
+ * @author hakon
+ */
+public class VespaModelUtilTest {
+ // Cluster Controller Service Cluster
+
+ private static final ClusterId CONTENT_CLUSTER_ID = new ClusterId("content-cluster-0");
+
+ public static final HostName controller0Host = new HostName("controller-0");
+
+ private static final ServiceInstance<ServiceMonitorStatus> controller0 = new ServiceInstance<>(
+ TestUtil.clusterControllerConfigId(0),
+ controller0Host,
+ ServiceMonitorStatus.UP);
+ private static final ServiceInstance<ServiceMonitorStatus> controller1 = new ServiceInstance<>(
+ TestUtil.clusterControllerConfigId(1),
+ new HostName("controller-1"),
+ ServiceMonitorStatus.UP);
+
+ private static final ServiceCluster<ServiceMonitorStatus> controllerCluster =
+ new ServiceCluster<>(
+ new ClusterId(CONTENT_CLUSTER_ID.s() + "-controller"),
+ VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE,
+ makeServiceInstanceSet(controller1, controller0));
+
+ // Distributor Service Cluster
+
+ private static final ServiceInstance<ServiceMonitorStatus> distributor0 = new ServiceInstance<>(
+ new ConfigId("distributor-config-id"),
+ new HostName("distributor-0"),
+ ServiceMonitorStatus.UP);
+
+
+ private static final ServiceCluster<ServiceMonitorStatus> distributorCluster =
+ new ServiceCluster<>(
+ CONTENT_CLUSTER_ID,
+ VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE,
+ makeServiceInstanceSet(distributor0));
+
+ // Storage Node Service Cluster
+
+ public static final HostName storage0Host = new HostName("storage-0");
+ private static final ServiceInstance<ServiceMonitorStatus> storage0 = new ServiceInstance<>(
+ new ConfigId("storage-config-id"),
+ storage0Host,
+ ServiceMonitorStatus.UP);
+
+ private static final ServiceCluster<ServiceMonitorStatus> storageCluster =
+ new ServiceCluster<>(
+ CONTENT_CLUSTER_ID,
+ VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ makeServiceInstanceSet(storage0));
+
+ // Secondary Distributor Service Cluster
+
+ private static final ServiceInstance<ServiceMonitorStatus> secondaryDistributor0 = new ServiceInstance<>(
+ new ConfigId("secondary-distributor-config-id"),
+ new HostName("secondary-distributor-0"),
+ ServiceMonitorStatus.UP);
+
+ private static final ClusterId SECONDARY_CONTENT_CLUSTER_ID = new ClusterId("secondary-content-cluster-0");
+ private static final ServiceCluster<ServiceMonitorStatus> secondaryDistributorCluster =
+ new ServiceCluster<>(
+ SECONDARY_CONTENT_CLUSTER_ID,
+ VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE,
+ makeServiceInstanceSet(secondaryDistributor0));
+
+ // Secondary Storage Node Service Cluster
+
+ public static final HostName secondaryStorage0Host = new HostName("secondary-storage-0");
+ private static final ServiceInstance<ServiceMonitorStatus> secondaryStorage0 = new ServiceInstance<>(
+ new ConfigId("secondary-storage-config-id"),
+ secondaryStorage0Host,
+ ServiceMonitorStatus.UP);
+
+ private static final ServiceCluster<ServiceMonitorStatus> secondaryStorageCluster =
+ new ServiceCluster<>(
+ SECONDARY_CONTENT_CLUSTER_ID,
+ VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ makeServiceInstanceSet(secondaryStorage0));
+
+ // The Application Instance
+
+ public static final ApplicationInstance<ServiceMonitorStatus> application =
+ new ApplicationInstance<>(
+ new TenantId("tenant-0"),
+ new ApplicationInstanceId("application-0"),
+ makeServiceClusterSet(
+ controllerCluster,
+ distributorCluster,
+ storageCluster,
+ secondaryDistributorCluster,
+ secondaryStorageCluster));
+
+ private ServiceCluster<?> createServiceCluster(ServiceType serviceType) {
+ return new ServiceCluster<ServiceMonitorStatus>(
+ new ClusterId("cluster-id"),
+ serviceType,
+ new HashSet<>());
+ }
+
+ @Test
+ public void verifyControllerClusterIsRecognized() {
+ ServiceCluster<?> cluster = createServiceCluster(VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE);
+ assertTrue(VespaModelUtil.isClusterController(cluster));
+ }
+
+ @Test
+ public void verifyNonControllerClusterIsNotRecognized() {
+ ServiceCluster<?> cluster = createServiceCluster(new ServiceType("foo"));
+ assertFalse(VespaModelUtil.isClusterController(cluster));
+ }
+
+ @Test
+ public void verifyStorageClusterIsRecognized() {
+ ServiceCluster<?> cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE);
+ assertTrue(VespaModelUtil.isStorage(cluster));
+ cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE);
+ assertTrue(VespaModelUtil.isStorage(cluster));
+ }
+
+ @Test
+ public void verifyNonStorageClusterIsNotRecognized() {
+ ServiceCluster<?> cluster = createServiceCluster(new ServiceType("foo"));
+ assertFalse(VespaModelUtil.isStorage(cluster));
+ }
+
+ @Test
+ public void verifyContentClusterIsRecognized() {
+ ServiceCluster<?> cluster = createServiceCluster(VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE);
+ assertTrue(VespaModelUtil.isContent(cluster));
+ cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE);
+ assertTrue(VespaModelUtil.isContent(cluster));
+ cluster = createServiceCluster(VespaModelUtil.SEARCHNODE_SERVICE_TYPE);
+ assertTrue(VespaModelUtil.isContent(cluster));
+ }
+
+ @Test
+ public void verifyNonContentClusterIsNotRecognized() {
+ ServiceCluster<?> cluster = createServiceCluster(new ServiceType("foo"));
+ assertFalse(VespaModelUtil.isContent(cluster));
+ }
+
+ @Test
+ public void testGettingClusterControllerInstances() {
+ Set<ServiceInstance<?>> controllers =
+ new HashSet<>(VespaModelUtil.getClusterControllerInstances(application, CONTENT_CLUSTER_ID));
+ Set<ServiceInstance<ServiceMonitorStatus>> expectedControllers = newHashSet(controller0, controller1);
+
+ assertThat(controllers).isEqualTo(expectedControllers);
+ }
+
+ @Test
+ public void testGetControllerHostName() {
+ HostName host = VespaModelUtil.getControllerHostName(application, CONTENT_CLUSTER_ID);
+ assertThat(host).isEqualTo(controller0Host);
+ }
+
+ @Test
+ public void testGetContentClusterName() {
+ ClusterId contentClusterName = VespaModelUtil.getContentClusterName(application, distributor0.hostName());
+ assertThat(CONTENT_CLUSTER_ID).isEqualTo(contentClusterName);
+ }
+
+ @Test
+ public void testGetContentClusterNameForSecondaryContentCluster() {
+ ClusterId contentClusterName = VespaModelUtil.getContentClusterName(application, secondaryDistributor0.hostName());
+ assertThat(SECONDARY_CONTENT_CLUSTER_ID).isEqualTo(contentClusterName);
+ }
+
+ @Test
+ public void testGetStorageNodeAtHost() {
+ Optional<ServiceInstance<ServiceMonitorStatus>> service =
+ VespaModelUtil.getStorageNodeAtHost(application, storage0Host);
+ assertTrue(service.isPresent());
+ assertThat(service.get()).isEqualTo(storage0);
+ }
+
+ @Test
+ public void testGetStorageNodeAtHostWithUnknownHost() {
+ Optional<ServiceInstance<ServiceMonitorStatus>> service =
+ VespaModelUtil.getStorageNodeAtHost(application, new HostName("storage-1"));
+ assertFalse(service.isPresent());
+ }
+
+ @Test
+ public void testGetClusterControllerIndex() {
+ ConfigId configId = new ConfigId("admin/cluster-controllers/2");
+ assertThat(VespaModelUtil.getClusterControllerIndex(configId)).isEqualTo(2);
+ }
+
+ @Test
+ public void testGetStorageNodeIndex() {
+ ConfigId configId = TestUtil.storageNodeConfigId(3);
+ assertThat(VespaModelUtil.getStorageNodeIndex(configId)).isEqualTo(3);
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java
new file mode 100644
index 00000000000..a682bfe8856
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ClusterId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.orchestrator.DummyInstanceLookupService;
+import com.yahoo.vespa.orchestrator.VespaModelUtil;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Mock implementation of ClusterControllerClient
+ * <p>
+ *
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public class ClusterControllerClientFactoryMock implements ClusterControllerClientFactory {
+ Map<String, ClusterControllerState> nodes = new HashMap<>();
+
+ public boolean isInMaintenance(ApplicationInstance<ServiceMonitorStatus> appInstance, HostName hostName) {
+ try {
+ ClusterId clusterName = VespaModelUtil.getContentClusterName(appInstance, hostName);
+ int storageNodeIndex = VespaModelUtil.getStorageNodeIndex(appInstance, hostName);
+ String globalMapKey = clusterName.s() + storageNodeIndex;
+ return nodes.getOrDefault(globalMapKey, ClusterControllerState.UP) == ClusterControllerState.MAINTENANCE;
+ } catch (Exception e) {
+ //Catch all - meant to catch cases where the node is not part of a storage cluster
+ return false;
+ }
+ }
+
+ public void setAllDummyNodesAsUp() {
+ for (ApplicationInstance<ServiceMonitorStatus> app : DummyInstanceLookupService.getApplications()) {
+ Set<HostName> hosts = DummyInstanceLookupService.getContentHosts(app.reference());
+ for (HostName host : hosts) {
+ ClusterId clusterName = VespaModelUtil.getContentClusterName(app, host);
+ int storageNodeIndex = VespaModelUtil.getStorageNodeIndex(app, host);
+ String globalMapKey = clusterName.s() + storageNodeIndex;
+ nodes.put(globalMapKey, ClusterControllerState.UP);
+ }
+ }
+ }
+
+ @Override
+ public ClusterControllerClient createClient(Collection<? extends ServiceInstance<?>> clusterControllers, String clusterName) {
+ return new ClusterControllerClient() {
+
+ @Override
+ public ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerState wantedState) throws IOException {
+ nodes.put(clusterName + storageNodeIndex, wantedState);
+ return new ClusterControllerStateResponse(true, "Yes");
+ }
+
+ @Override
+ public ClusterControllerStateResponse setApplicationState(ClusterControllerState wantedState) throws IOException {
+ Set<String> keyCopy = new HashSet<>(nodes.keySet());
+ for (String s : keyCopy) {
+ if (s.startsWith(clusterName)) {
+ nodes.put(s, wantedState);
+ }
+ }
+ return new ClusterControllerStateResponse(true, "It works");
+ }
+ };
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
new file mode 100644
index 00000000000..68df4dce393
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.yahoo.vespa.jaxrs.client.JaxRsStrategy;
+import com.yahoo.vespa.jaxrs.client.LocalPassThroughJaxRsStrategy;
+import org.junit.Test;
+
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+public class ClusterControllerClientTest {
+ private static final String CLUSTER_NAME = "clusterName";
+ private static final int STORAGE_NODE_INDEX = 0;
+
+ @Test
+ public void correctParametersArePassedThrough() throws Exception {
+ final ClusterControllerJaxRsApi clusterControllerApi = mock(ClusterControllerJaxRsApi.class);
+ final JaxRsStrategy<ClusterControllerJaxRsApi> strategyMock = new LocalPassThroughJaxRsStrategy<>(clusterControllerApi);
+ final ClusterControllerClient clusterControllerClient = new ClusterControllerClientImpl(
+ strategyMock,
+ CLUSTER_NAME);
+
+ final ClusterControllerState wantedState = ClusterControllerState.MAINTENANCE;
+
+ clusterControllerClient.setNodeState(STORAGE_NODE_INDEX, wantedState);
+
+ final ClusterControllerStateRequest expectedNodeStateRequest = new ClusterControllerStateRequest(
+ new ClusterControllerStateRequest.State(wantedState, ClusterControllerClientImpl.REQUEST_REASON),
+ ClusterControllerStateRequest.Condition.SAFE);
+ verify(clusterControllerApi, times(1))
+ .setNodeState(
+ eq(CLUSTER_NAME),
+ eq(STORAGE_NODE_INDEX),
+ eq(expectedNodeStateRequest));
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java
new file mode 100644
index 00000000000..cc217885047
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java
@@ -0,0 +1,117 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.controller;
+
+import com.yahoo.vespa.jaxrs.client.JaxRsClientFactory;
+import com.yahoo.vespa.orchestrator.TestUtil;
+import com.yahoo.vespa.applicationmodel.ConfigId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Collection;
+import java.util.Collections;
+
+import static org.hamcrest.CoreMatchers.anyOf;
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Matchers.argThat;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class SingleInstanceClusterControllerClientFactoryTest {
+ private static final int PORT = SingleInstanceClusterControllerClientFactory.CLUSTERCONTROLLER_HARDCODED_PORT;
+ private static final String PATH = SingleInstanceClusterControllerClientFactory.CLUSTERCONTROLLER_API_PATH;
+
+ private static final HostName HOST_NAME_1 = new HostName("host1");
+ private static final HostName HOST_NAME_2 = new HostName("host2");
+ private static final HostName HOST_NAME_3 = new HostName("host3");
+
+ private final ClusterControllerJaxRsApi mockApi = mock(ClusterControllerJaxRsApi.class);
+ private final JaxRsClientFactory jaxRsClientFactory = mock(JaxRsClientFactory.class);
+ private final ClusterControllerClientFactory clientFactory
+ = new SingleInstanceClusterControllerClientFactory(jaxRsClientFactory);
+
+ @Before
+ public void setup() {
+ when(
+ jaxRsClientFactory.createClient(
+ eq(ClusterControllerJaxRsApi.class),
+ any(HostName.class),
+ anyInt(),
+ anyString()))
+ .thenReturn(mockApi);
+ }
+
+ @Test
+ public void testCreateClientWithNoClusterControllerInstances() throws Exception {
+ final Collection<ServiceInstance<ServiceMonitorStatus>> clusterControllers = Collections.emptySet();
+
+ try {
+ clientFactory.createClient(clusterControllers, "clusterName");
+ fail();
+ } catch (IllegalArgumentException e) {
+ // As expected.
+ }
+ }
+
+ @Test
+ public void testCreateClientWithSingleClusterControllerInstance() throws Exception {
+ final Collection<ServiceInstance<ServiceMonitorStatus>> clusterControllers = Collections.singleton(
+ new ServiceInstance<>(clusterControllerConfigId(1), HOST_NAME_1, ServiceMonitorStatus.UP));
+
+ clientFactory.createClient(clusterControllers, "clusterName")
+ .setNodeState(0, ClusterControllerState.MAINTENANCE);
+
+ verify(jaxRsClientFactory).createClient(
+ ClusterControllerJaxRsApi.class,
+ HOST_NAME_1,
+ PORT,
+ PATH);
+ }
+
+ @Test
+ public void testCreateClientWithTwoNonClusterControllerInstances() throws Exception {
+ final Collection<ServiceInstance<ServiceMonitorStatus>> clusterControllers = TestUtil.makeServiceInstanceSet(
+ new ServiceInstance<>(new ConfigId("not-a-cluster-controller-1"), HOST_NAME_1, ServiceMonitorStatus.UP),
+ new ServiceInstance<>(new ConfigId("not-a-cluster-controller-2"), HOST_NAME_2, ServiceMonitorStatus.UP));
+
+ try {
+ clientFactory.createClient(clusterControllers, "clusterName");
+ fail();
+ } catch (IllegalArgumentException e) {
+ // As expected.
+ }
+ }
+
+ @Test
+ public void testCreateClientWithThreeClusterControllerInstances() throws Exception {
+ final Collection<ServiceInstance<ServiceMonitorStatus>> clusterControllers = TestUtil.makeServiceInstanceSet(
+ new ServiceInstance<>(clusterControllerConfigId(1), HOST_NAME_1, ServiceMonitorStatus.UP),
+ new ServiceInstance<>(clusterControllerConfigId(2), HOST_NAME_2, ServiceMonitorStatus.UP),
+ new ServiceInstance<>(clusterControllerConfigId(3), HOST_NAME_3, ServiceMonitorStatus.UP));
+
+ clientFactory.createClient(clusterControllers, "clusterName")
+ .setNodeState(0, ClusterControllerState.MAINTENANCE);
+
+ verify(jaxRsClientFactory).createClient(
+ eq(ClusterControllerJaxRsApi.class),
+ argThat(is(anyOf(
+ equalTo(HOST_NAME_1),
+ equalTo(HOST_NAME_2),
+ equalTo(HOST_NAME_3)))),
+ eq(PORT),
+ eq(PATH));
+ }
+
+ private static ConfigId clusterControllerConfigId(final int index) {
+ return new ConfigId("admin/cluster-controllers/" + index);
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java
new file mode 100644
index 00000000000..45d605b6d8a
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java
@@ -0,0 +1,738 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.policy;
+
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ClusterId;
+import com.yahoo.vespa.applicationmodel.ConfigId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceCluster;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.applicationmodel.TenantId;
+import com.yahoo.vespa.orchestrator.TestUtil;
+import com.yahoo.vespa.orchestrator.VespaModelUtil;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerState;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+import org.junit.Test;
+
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.Set;
+
+import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceClusterSet;
+import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceInstanceSet;
+import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.DOWN;
+import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.NOT_CHECKED;
+import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.UP;
+import static org.fest.assertions.Assertions.assertThat;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+/**
+ * @author oyving
+ * @author bakksjo
+ */
+public class HostedVespaPolicyTest {
+ private static final TenantId TENANT_ID = new TenantId("tenantId");
+ private static final ApplicationInstanceId APPLICATION_INSTANCE_ID = new ApplicationInstanceId("applicationId");
+ private static final HostName HOST_NAME_1 = new HostName("host-1");
+ private static final HostName HOST_NAME_2 = new HostName("host-2");
+ private static final HostName HOST_NAME_3 = new HostName("host-3");
+ private static final HostName HOST_NAME_4 = new HostName("host-4");
+ private static final HostName HOST_NAME_5 = new HostName("host-5");
+ private static final ServiceType SERVICE_TYPE_1 = new ServiceType("service-1");
+ private static final ServiceType SERVICE_TYPE_2 = new ServiceType("service-2");
+
+ private final ClusterControllerClientFactory clusterControllerClientFactory
+ = mock(ClusterControllerClientFactory.class);
+ private final ClusterControllerClient client = mock(ClusterControllerClient.class);
+ {
+ when(clusterControllerClientFactory.createClient(any(), any())).thenReturn(client);
+ }
+
+ private final HostedVespaPolicy policy
+ = new HostedVespaPolicy(clusterControllerClientFactory);
+
+ private final MutableStatusRegistry mutablestatusRegistry = mock(MutableStatusRegistry.class);
+ {
+ when(mutablestatusRegistry.getHostStatus(any())).thenReturn(HostStatus.NO_REMARKS);
+ }
+
+ @Test
+ public void test_policy_everyone_agrees_everything_is_up() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, UP)
+ .instance(HOST_NAME_3, UP))
+ .build();
+
+ policy.grantSuspensionRequest(
+ applicationInstance,
+ HOST_NAME_1,
+ mutablestatusRegistry
+ );
+
+ verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_1, HostStatus.ALLOWED_TO_BE_DOWN);
+ }
+
+ private void grantWithAdminCluster(
+ ServiceMonitorStatus statusParallelInstanceOtherHost,
+ ServiceMonitorStatus statusInstanceOtherHost,
+ ServiceType serviceType,
+ boolean expectGranted) throws HostStateChangeDeniedException {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, statusInstanceOtherHost)
+ .instance(HOST_NAME_3, UP))
+ .addCluster(new ClusterBuilder(VespaModelUtil.ADMIN_CLUSTER_ID, serviceType)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, statusParallelInstanceOtherHost))
+ .build();
+
+ if (expectGranted) {
+ policy.grantSuspensionRequest(
+ applicationInstance,
+ HOST_NAME_1,
+ mutablestatusRegistry
+ );
+
+ verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_1, HostStatus.ALLOWED_TO_BE_DOWN);
+ } else {
+ try {
+ policy.grantSuspensionRequest(
+ applicationInstance,
+ HOST_NAME_1,
+ mutablestatusRegistry);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ // As expected.
+ assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
+ }
+
+ verify(mutablestatusRegistry, never()).setHostState(any(), any());
+ }
+ }
+
+ @Test
+ public void test_parallel_cluster_down_is_ok() throws Exception {
+ grantWithAdminCluster(DOWN, UP, new ServiceType("some-service-type"), true);
+ }
+
+ @Test
+ public void test_slobrok_cluster_down_is_not_ok() throws Exception {
+ grantWithAdminCluster(DOWN, UP, VespaModelUtil.SLOBROK_SERVICE_TYPE, false);
+ }
+
+ @Test
+ public void test_other_cluster_instance_down_is_not_ok() throws Exception {
+ grantWithAdminCluster(DOWN, DOWN, new ServiceType("some-service-type"), false);
+ }
+
+ @Test
+ public void test_all_up_is_ok() throws Exception {
+ grantWithAdminCluster(UP, UP, new ServiceType("some-service-type"), true);
+ }
+
+ @Test
+ public void test_policy_other_host_allowed_to_be_down() {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, UP)
+ .instance(HOST_NAME_3, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_5, UP))
+ .build();
+
+ when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
+
+ try {
+ policy.grantSuspensionRequest(
+ applicationInstance,
+ HOST_NAME_3,
+ mutablestatusRegistry);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ // As expected.
+ assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
+ assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1);
+ }
+
+ verify(mutablestatusRegistry, never()).setHostState(any(), any());
+ }
+
+ @Test
+ public void test_policy_this_host_allowed_to_be_down() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, UP)
+ .instance(HOST_NAME_3, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_5, UP))
+ .build();
+
+ when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_3))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
+
+ policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
+
+ verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_3, HostStatus.ALLOWED_TO_BE_DOWN);
+ }
+
+ @Test
+ public void from_five_to_ten_percent_suspension() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, UP)
+ .instance(HOST_NAME_3, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP))
+ .build();
+
+ when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
+
+ policy.grantSuspensionRequest(
+ applicationInstance,
+ HOST_NAME_3,
+ mutablestatusRegistry);
+
+ verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_3, HostStatus.ALLOWED_TO_BE_DOWN);
+ }
+
+ @Test
+ public void from_ten_to_fifteen_percent_suspension() {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, UP)
+ .instance(HOST_NAME_2, UP)
+ .instance(HOST_NAME_3, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP))
+ .build();
+
+ when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
+
+ try {
+ policy.grantSuspensionRequest(
+ applicationInstance,
+ HOST_NAME_3,
+ mutablestatusRegistry);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ // As expected.
+ assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
+ assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1);
+ }
+
+ verify(mutablestatusRegistry, never()).setHostState(any(), any());
+ }
+
+ @Test
+ public void from_five_to_fifteen_percent_suspension() {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, UP)
+ .instance(HOST_NAME_3, UP)
+ .instance(HOST_NAME_3, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP)
+ .instance(HOST_NAME_5, UP))
+ .build();
+
+ when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
+
+ try {
+ policy.grantSuspensionRequest(
+ applicationInstance,
+ HOST_NAME_3,
+ mutablestatusRegistry);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ // As expected.
+ assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
+ assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1);
+ }
+
+ verify(mutablestatusRegistry, never()).setHostState(any(), any());
+ }
+
+ @Test
+ public void test_policy_no_services() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder().build();
+
+ HostName hostName = new HostName("test-hostname");
+ policy.grantSuspensionRequest(
+ applicationInstance,
+ hostName,
+ mutablestatusRegistry
+ );
+
+ verify(mutablestatusRegistry, times(1)).setHostState(hostName, HostStatus.ALLOWED_TO_BE_DOWN);
+ }
+
+ // The cluster name happens to be the cluster id of any of the content service clusters.
+ private static final String CONTENT_CLUSTER_NAME = "content-cluster-id";
+ private static final HostName CLUSTER_CONTROLLER_HOST = new HostName("controller-0");
+ private static final HostName STORAGE_NODE_HOST = new HostName("storage-2");
+ private static final int STORAGE_NODE_INDEX = 2;
+
+ private static final ServiceCluster<ServiceMonitorStatus> CLUSTER_CONTROLLER_SERVICE_CLUSTER = new ServiceCluster<>(
+ new ClusterId("cluster-0"),
+ VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE,
+ makeServiceInstanceSet(
+ new ServiceInstance<>(
+ TestUtil.clusterControllerConfigId(0),
+ CLUSTER_CONTROLLER_HOST,
+ UP)));
+
+ private static final ServiceCluster<ServiceMonitorStatus> DISTRIBUTOR_SERVICE_CLUSTER = new ServiceCluster<>(
+ new ClusterId(CONTENT_CLUSTER_NAME),
+ VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE,
+ makeServiceInstanceSet(
+ new ServiceInstance<>(
+ new ConfigId("distributor-id-1"),
+ new HostName("distributor-1"),
+ UP)));
+
+ private static final ServiceCluster<ServiceMonitorStatus> STORAGE_SERVICE_CLUSTER = new ServiceCluster<>(
+ new ClusterId(CONTENT_CLUSTER_NAME),
+ VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ makeServiceInstanceSet(
+ new ServiceInstance<>(
+ TestUtil.storageNodeConfigId(STORAGE_NODE_INDEX),
+ STORAGE_NODE_HOST,
+ UP)));
+
+ private static final ApplicationInstance<ServiceMonitorStatus> APPLICATION_INSTANCE =
+ new ApplicationInstance<>(
+ TENANT_ID,
+ APPLICATION_INSTANCE_ID,
+ makeServiceClusterSet(
+ CLUSTER_CONTROLLER_SERVICE_CLUSTER,
+ DISTRIBUTOR_SERVICE_CLUSTER,
+ STORAGE_SERVICE_CLUSTER));
+
+ // The grantSuspensionRequest() and releaseSuspensionGrant() functions happen to have similar signature,
+ // which allows us to reuse test code for testing both functions. The actual call to one of these two functions
+ // is encapsulated into the following functional interface.
+ interface PolicyFunction {
+ void grant(
+ final HostedVespaPolicy policy,
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ final HostName hostName,
+ final MutableStatusRegistry hostStatusRegistry) throws HostStateChangeDeniedException;
+ }
+
+ /**
+ * Since grantSuspensionRequest and releaseSuspensionGrant is quite similar, this test util contains the bulk
+ * of the test code used to test their common functionality.
+ *
+ * @param grantFunction Encapsulates the grant function to call
+ * @param currentHostStatus The current HostStatus of the host
+ * @param expectedNodeStateSentToClusterController The NodeState the test expects to be sent to the controller,
+ * or null if no CC request is expected to be sent.
+ * @param expectedHostStateSetOnHostStatusService The HostState the test expects to be set on the host service.
+ */
+ private void testCommonGrantFunctionality(
+ PolicyFunction grantFunction,
+ ApplicationInstance<ServiceMonitorStatus> application,
+ HostStatus currentHostStatus,
+ Optional<ClusterControllerState> expectedNodeStateSentToClusterController,
+ HostStatus expectedHostStateSetOnHostStatusService) throws Exception {
+ // There is only one service running on the host, which is a storage node.
+ // Therefore, the corresponding cluster controller will have to be contacted
+ // to ask for permission.
+
+ ClusterControllerStateResponse response = new ClusterControllerStateResponse(true, "ok");
+ // MOTD: anyInt() MUST be used for an int field, otherwise a NullPointerException is thrown!
+ // In general, special anyX() must be used for primitive fields.
+ when(client.setNodeState(anyInt(), any())).thenReturn(response);
+
+ when(mutablestatusRegistry.getHostStatus(any())).thenReturn(currentHostStatus);
+
+ // Execution phase.
+ grantFunction.grant(policy, application, STORAGE_NODE_HOST, mutablestatusRegistry);
+
+ // Verification phase.
+
+ if (expectedNodeStateSentToClusterController.isPresent()) {
+ verify(clusterControllerClientFactory, times(1))
+ .createClient(
+ CLUSTER_CONTROLLER_SERVICE_CLUSTER.serviceInstances(),
+ CONTENT_CLUSTER_NAME);
+ verify(client, times(1))
+ .setNodeState(
+ STORAGE_NODE_INDEX,
+ expectedNodeStateSentToClusterController.get());
+ } else {
+ verify(client, never()).setNodeState(anyInt(), any());
+ }
+
+ verify(mutablestatusRegistry, times(1))
+ .setHostState(
+ STORAGE_NODE_HOST,
+ expectedHostStateSetOnHostStatusService);
+ }
+
+ @Test
+ public void test_defer_to_controller() throws Exception {
+ HostStatus currentHostStatus = HostStatus.NO_REMARKS;
+ ClusterControllerState expectedNodeStateSentToClusterController = ClusterControllerState.MAINTENANCE;
+ HostStatus expectedHostStateSetOnHostStatusService = HostStatus.ALLOWED_TO_BE_DOWN;
+ testCommonGrantFunctionality(
+ HostedVespaPolicy::grantSuspensionRequest,
+ APPLICATION_INSTANCE,
+ currentHostStatus,
+ Optional.of(expectedNodeStateSentToClusterController),
+ expectedHostStateSetOnHostStatusService);
+ }
+
+ @Test
+ public void test_release_suspension_grant_gives_no_remarks() throws Exception {
+ HostStatus currentHostStatus = HostStatus.ALLOWED_TO_BE_DOWN;
+ ClusterControllerState expectedNodeStateSentToClusterController = ClusterControllerState.UP;
+ HostStatus expectedHostStateSetOnHostStatusService = HostStatus.NO_REMARKS;
+ testCommonGrantFunctionality(
+ HostedVespaPolicy::releaseSuspensionGrant,
+ APPLICATION_INSTANCE,
+ currentHostStatus,
+ Optional.of(expectedNodeStateSentToClusterController),
+ expectedHostStateSetOnHostStatusService);
+ }
+
+ @Test
+ public void okToSuspendHostWithNoConfiguredServices() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, DOWN)
+ .instance(HOST_NAME_2, DOWN))
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
+ .instance(HOST_NAME_4, DOWN)
+ .instance(HOST_NAME_5, DOWN))
+ .build();
+
+ policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
+ }
+
+ @Test
+ public void okToSuspendHostWithAllItsServicesDownEvenIfOthersAreDownToo() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, DOWN)
+ .instance(HOST_NAME_3, DOWN))
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
+ .instance(HOST_NAME_3, DOWN)
+ .instance(HOST_NAME_4, DOWN)
+ .instance(HOST_NAME_5, UP))
+ .build();
+
+ policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
+ }
+
+ @Test
+ public void okToSuspendStorageNodeWhenStorageIsDown() throws Exception {
+ ServiceMonitorStatus storageNodeStatus = DOWN;
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new StorageClusterBuilder()
+ // DOWN storage service => ok to suspend and no cluster controller call
+ .instance(STORAGE_NODE_HOST, DOWN, STORAGE_NODE_INDEX)
+ .instance(HOST_NAME_2, DOWN, STORAGE_NODE_INDEX + 1)
+ .instance(HOST_NAME_3, DOWN, STORAGE_NODE_INDEX + 2))
+ .addCluster(CLUSTER_CONTROLLER_SERVICE_CLUSTER)
+ .addCluster(DISTRIBUTOR_SERVICE_CLUSTER)
+ // This service has one down service on another host, which should
+ // not block us from suspension because STORAGE_NODE_HOST down too.
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
+ .instance(STORAGE_NODE_HOST, DOWN)
+ .instance(HOST_NAME_4, DOWN)
+ .instance(HOST_NAME_5, UP))
+ .build();
+
+ HostStatus currentHostStatus = HostStatus.NO_REMARKS;
+ Optional<ClusterControllerState> dontExpectAnyCallsToClusterController = Optional.empty();
+ testCommonGrantFunctionality(
+ HostedVespaPolicy::grantSuspensionRequest,
+ applicationInstance,
+ currentHostStatus,
+ dontExpectAnyCallsToClusterController,
+ HostStatus.ALLOWED_TO_BE_DOWN);
+ }
+
+ @Test
+ public void denySuspendOfStorageIfOthersAreDown() throws Exception {
+ // If the storage service is up, but other hosts' storage services are down,
+ // we should be denied permission to suspend. This behavior is common for
+ // storage service and non-storage service (they differ when it comes to
+ // the cluster controller).
+ ServiceMonitorStatus storageNodeStatus = UP;
+
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new StorageClusterBuilder()
+ .instance(STORAGE_NODE_HOST, storageNodeStatus, STORAGE_NODE_INDEX)
+ .instance(HOST_NAME_2, DOWN, STORAGE_NODE_INDEX + 1)
+ .instance(HOST_NAME_3, DOWN, STORAGE_NODE_INDEX + 2))
+ .addCluster(CLUSTER_CONTROLLER_SERVICE_CLUSTER)
+ .addCluster(DISTRIBUTOR_SERVICE_CLUSTER)
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
+ .instance(STORAGE_NODE_HOST, DOWN)
+ .instance(HOST_NAME_4, DOWN)
+ .instance(HOST_NAME_5, UP))
+ .build();
+
+ when(mutablestatusRegistry.getHostStatus(any())).thenReturn(HostStatus.NO_REMARKS);
+
+ try {
+ policy.grantSuspensionRequest(applicationInstance, STORAGE_NODE_HOST, mutablestatusRegistry);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ // As expected.
+ assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
+ assertThat(e.getServiceType()).isEqualTo(VespaModelUtil.STORAGENODE_SERVICE_TYPE);
+ }
+ }
+
+ // In this test we verify the storage service cluster suspend policy of allowing at most 1
+ // storage service to be effectively down. The normal policy (and the one used previously for storage)
+ // is to allow 10%. Therefore, the test verifies we disallow suspending 2 hosts = 5% (some random number <10%).
+ //
+ // Since the Orchestrator doesn't allow suspending the host, the Orchestrator doesn't even bother calling the
+ // Cluster Controller. The CC also has a policy of max 1, so it's just an optimization and safety guard.
+ @Test
+ public void dontBotherCallingClusterControllerIfOtherStorageNodesAreDown() throws Exception {
+ StorageClusterBuilder clusterBuilder = new StorageClusterBuilder();
+ for (int i = 0; i < 40; ++i) {
+ clusterBuilder.instance(new HostName("host-" + i), UP, i);
+ }
+ ApplicationInstance<ServiceMonitorStatus> applicationInstance =
+ new AppBuilder().addCluster(clusterBuilder).build();
+
+ HostName host_1 = new HostName("host-1");
+ when(mutablestatusRegistry.getHostStatus(eq(host_1))).thenReturn(HostStatus.NO_REMARKS);
+
+ HostName host_2 = new HostName("host-2");
+ when(mutablestatusRegistry.getHostStatus(eq(host_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
+
+ try {
+ policy.grantSuspensionRequest(applicationInstance, host_1, mutablestatusRegistry);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ // As expected.
+ assertThat(e.getConstraintName())
+ .isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
+ assertThat(e.getServiceType()).isEqualTo(VespaModelUtil.STORAGENODE_SERVICE_TYPE);
+ }
+
+ verify(mutablestatusRegistry, never()).setHostState(any(), any());
+ }
+
+ @Test
+ public void ownServiceInstanceDown() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, DOWN)
+ .instance(HOST_NAME_3, DOWN))
+ .build();
+
+ policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
+ }
+
+ @Test
+ public void ownServiceInstanceDown_otherServiceIsAllNotChecked() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, DOWN)
+ .instance(HOST_NAME_3, DOWN))
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
+ .instance(HOST_NAME_3, NOT_CHECKED)
+ .instance(HOST_NAME_4, NOT_CHECKED)
+ .instance(HOST_NAME_5, NOT_CHECKED))
+ .build();
+
+ policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
+ }
+
+ @Test
+ public void ownServiceInstanceDown_otherServiceIsAllNotChecked_oneHostDown() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, DOWN)
+ .instance(HOST_NAME_3, DOWN))
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
+ .instance(HOST_NAME_3, NOT_CHECKED)
+ .instance(HOST_NAME_4, NOT_CHECKED)
+ .instance(HOST_NAME_5, NOT_CHECKED))
+ .build();
+
+ when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_4))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
+ try {
+ policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
+ fail("Should not be allowed to set " + HOST_NAME_3 + " down when " + HOST_NAME_4 + " is already down.");
+ } catch (HostStateChangeDeniedException e) {
+ // As expected.
+ assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
+ assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_2);
+ }
+ }
+
+ @Test
+ public void ownServiceInstanceDown_otherServiceIsAllUp() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, DOWN)
+ .instance(HOST_NAME_3, DOWN))
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
+ .instance(HOST_NAME_3, UP)
+ .instance(HOST_NAME_4, UP)
+ .instance(HOST_NAME_5, UP))
+ .build();
+
+ policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
+ }
+
+ @Test
+ public void hostHasTwoInstances_oneDownOneUp() throws Exception {
+ final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
+ .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
+ .instance(HOST_NAME_1, UP)
+ .instance(HOST_NAME_2, UP)
+ .instance(HOST_NAME_3, UP)
+ .instance(HOST_NAME_3, DOWN))
+ .build();
+
+ policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
+ }
+
+ // Helper classes for terseness.
+
+ private static class AppBuilder {
+ private final Set<ServiceCluster<ServiceMonitorStatus>> serviceClusters = new HashSet<>();
+
+ public AppBuilder addCluster(final ServiceCluster<ServiceMonitorStatus> cluster) {
+ serviceClusters.add(cluster);
+ return this;
+ }
+
+ public AppBuilder addCluster(final ClusterBuilder clusterBuilder) {
+ serviceClusters.add(clusterBuilder.build());
+ return this;
+ }
+
+ public AppBuilder addCluster(final StorageClusterBuilder clusterBuilder) {
+ serviceClusters.add(clusterBuilder.build());
+ return this;
+ }
+
+ public ApplicationInstance<ServiceMonitorStatus> build() {
+ return new ApplicationInstance<>(
+ TENANT_ID,
+ APPLICATION_INSTANCE_ID,
+ serviceClusters);
+ }
+ }
+
+ private static class ClusterBuilder {
+ private final ServiceType serviceType;
+ private final Set<ServiceInstance<ServiceMonitorStatus>> instances = new HashSet<>();
+ private final ClusterId clusterId;
+ private int instanceIndex = 0;
+
+ public ClusterBuilder(final ClusterId clusterId, final ServiceType serviceType) {
+ this.clusterId = clusterId;
+ this.serviceType = serviceType;
+ }
+
+ public ClusterBuilder(final ServiceType serviceType) {
+ this.clusterId = new ClusterId("clusterId");
+ this.serviceType = serviceType;
+ }
+
+ public ClusterBuilder instance(final HostName hostName, final ServiceMonitorStatus status) {
+ instances.add(new ServiceInstance<>(new ConfigId("configId-" + instanceIndex), hostName, status));
+ ++instanceIndex;
+ return this;
+ }
+
+ public ServiceCluster<ServiceMonitorStatus> build() {
+ return new ServiceCluster<>(clusterId, serviceType, instances);
+ }
+ }
+
+ private static class StorageClusterBuilder {
+ private final Set<ServiceInstance<ServiceMonitorStatus>> instances = new HashSet<>();
+
+ public StorageClusterBuilder instance(final HostName hostName, final ServiceMonitorStatus status, int index) {
+ instances.add(new ServiceInstance<>(TestUtil.storageNodeConfigId(index), hostName, status));
+ return this;
+ }
+
+ public ServiceCluster<ServiceMonitorStatus> build() {
+ return new ServiceCluster<>(new ClusterId(CONTENT_CLUSTER_NAME), VespaModelUtil.STORAGENODE_SERVICE_TYPE, instances);
+ }
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java
new file mode 100644
index 00000000000..b8396a51e1e
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java
@@ -0,0 +1,143 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.resources;
+
+import com.yahoo.application.Application;
+import com.yahoo.application.Networking;
+import com.yahoo.container.Container;
+import com.yahoo.jdisc.http.server.jetty.JettyHttpServer;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import javax.ws.rs.client.Client;
+import javax.ws.rs.client.ClientBuilder;
+import javax.ws.rs.client.Entity;
+import javax.ws.rs.client.WebTarget;
+import javax.ws.rs.core.GenericType;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import java.net.URI;
+import java.nio.file.Paths;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests the implementation of the orchestrators ApplicationAPI.
+ *
+ * @author <a href="mailto:smorgrav@yahoo-inc.com">Toby</a>
+ */
+public class ApplicationSuspensionResourceTest {
+
+ static final String BASE_PATH = "/orchestrator/v1/suspensions/applications";
+ static final String RESOURCE_1 = "mediasearch:imagesearch:default";
+ static final String RESOURCE_2 = "test-tenant-id:application:instance";
+ static final String INVALID_RESOURCE_NAME = "something_without_colons";
+
+ Application jdiscApplication;
+ WebTarget webTarget;
+
+ @Before
+ public void setup() throws Exception {
+ jdiscApplication = Application.fromApplicationPackage(Paths.get("src/test/application"),
+ Networking.enable);
+ Client client = ClientBuilder.newClient();
+
+ JettyHttpServer serverProvider = (JettyHttpServer) Container.get().getServerProviderRegistry().allComponents().get(0);
+ String url = "http://localhost:" + serverProvider.getListenPort() + BASE_PATH;
+ webTarget = client.target(new URI(url));
+ }
+
+ @After
+ public void teardown() throws Exception {
+ jdiscApplication.close();
+ webTarget = null;
+ }
+
+ @Ignore
+ @Test
+ public void run_application_locally_for_manual_browser_testing() throws Exception {
+ System.out.println(webTarget.getUri());
+ Thread.sleep(3600 * 1000);
+ }
+
+ @Test
+ public void get_all_suspended_applications_return_empty_list_initially() throws Exception {
+ Response reply = webTarget.request().get();
+ assertEquals(200, reply.getStatus());
+ assertEquals("[]", reply.readEntity(String.class));
+ }
+
+ @Test
+ public void invalid_application_id_throws_http_400() throws Exception {
+ Response reply = webTarget.request().post(Entity.entity(INVALID_RESOURCE_NAME,MediaType.APPLICATION_JSON_TYPE));
+ assertEquals(400, reply.getStatus());
+ }
+
+ @Test
+ public void get_application_status_returns_404_for_notsuspended_and_204_for_suspended() throws Exception {
+ // Get on application that is not suspended
+ Response reply = webTarget.path(RESOURCE_1).request().get();
+ assertEquals(404, reply.getStatus());
+
+ // Post application
+ reply = webTarget.request().post(Entity.entity(RESOURCE_1,MediaType.APPLICATION_JSON_TYPE));
+ assertEquals(204, reply.getStatus());
+
+ // Get on the application that now should be in suspended
+ reply = webTarget.path(RESOURCE_1).request().get();
+ assertEquals(204, reply.getStatus());
+ }
+
+
+ @Test
+ public void delete_works_on_suspended_and_not_suspended_applications() throws Exception {
+ // Delete an application that is not suspended
+ Response reply = webTarget.path(RESOURCE_1).request().delete();
+ assertEquals(204, reply.getStatus());
+
+ // Put application in suspend
+ reply = webTarget.request().post(Entity.entity(RESOURCE_1,MediaType.APPLICATION_JSON_TYPE));
+ assertEquals(204, reply.getStatus());
+
+ // Check that it is in suspend
+ reply = webTarget.path(RESOURCE_1).request(MediaType.APPLICATION_JSON).get();
+ assertEquals(204, reply.getStatus());
+
+ // Delete it
+ reply = webTarget.path(RESOURCE_1).request().delete();
+ assertEquals(204, reply.getStatus());
+
+ // Check that it is not in suspend anymore
+ reply = webTarget.path(RESOURCE_1).request(MediaType.APPLICATION_JSON).get();
+ assertEquals(404, reply.getStatus());
+ }
+
+ @Test
+ public void list_applications_returns_the_correct_list_of_suspended_applications() throws Exception {
+ // Test that initially we have the empty set
+ Response reply = webTarget.request(MediaType.APPLICATION_JSON).get();
+ assertEquals(200, reply.getStatus());
+ assertEquals("[]", reply.readEntity(String.class));
+
+ // Add a couple of applications to maintenance
+ webTarget.request().post(Entity.entity(RESOURCE_1,MediaType.APPLICATION_JSON_TYPE));
+ webTarget.request().post(Entity.entity(RESOURCE_2,MediaType.APPLICATION_JSON_TYPE));
+ assertEquals(200, reply.getStatus());
+
+ // Test that we get them back
+ Set<String> responses = webTarget.request(MediaType.APPLICATION_JSON_TYPE)
+ .get(new GenericType<Set<String>>() {});
+ assertEquals(2, responses.size());
+
+ // Remove suspend for the first resource
+ webTarget.path(RESOURCE_1).request().delete();
+
+ // Test that we are back to the start with the empty set
+ responses = webTarget.request(MediaType.APPLICATION_JSON_TYPE)
+ .get(new GenericType<Set<String>>() {});
+ assertEquals(1, responses.size());
+ assertEquals(RESOURCE_2, responses.iterator().next());
+ }
+} \ No newline at end of file
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
new file mode 100644
index 00000000000..fb046f978c7
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
@@ -0,0 +1,227 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.resources;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.applicationmodel.TenantId;
+import com.yahoo.vespa.orchestrator.InstanceLookupService;
+import com.yahoo.vespa.orchestrator.OrchestratorImpl;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock;
+import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.policy.Policy;
+import com.yahoo.vespa.orchestrator.restapi.wire.BatchHostSuspendRequest;
+import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult;
+import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse;
+import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
+import com.yahoo.vespa.orchestrator.status.StatusService;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+import org.junit.Test;
+
+import javax.ws.rs.WebApplicationException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Optional;
+import java.util.Set;
+
+import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceClusterSet;
+import static org.fest.assertions.Assertions.assertThat;
+import static org.fest.assertions.Fail.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class HostResourceTest {
+ private static final int SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS = 0;
+ private static final TenantId TENANT_ID = new TenantId("tenantId");
+ private static final ApplicationInstanceId APPLICATION_INSTANCE_ID = new ApplicationInstanceId("applicationId");
+ private static final ApplicationInstanceReference APPLICATION_INSTANCE_REFERENCE =
+ new ApplicationInstanceReference(TENANT_ID, APPLICATION_INSTANCE_ID);
+
+ private static final StatusService EVERY_HOST_IS_UP_HOST_STATUS_SERVICE = mock(StatusService.class);
+ private static final MutableStatusRegistry EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY = mock(MutableStatusRegistry.class);
+ static {
+ when(EVERY_HOST_IS_UP_HOST_STATUS_SERVICE.forApplicationInstance(eq(APPLICATION_INSTANCE_REFERENCE)))
+ .thenReturn(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY);
+ when(EVERY_HOST_IS_UP_HOST_STATUS_SERVICE.lockApplicationInstance_forCurrentThreadOnly(eq(APPLICATION_INSTANCE_REFERENCE)))
+ .thenReturn(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY);
+ when(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY.getHostStatus(any()))
+ .thenReturn(HostStatus.NO_REMARKS);
+ when(EVERY_HOST_IS_UP_MUTABLE_HOST_STATUS_REGISTRY.getApplicationInstanceStatus())
+ .thenReturn(ApplicationInstanceStatus.NO_REMARKS);
+ }
+
+ private static final InstanceLookupService mockInstanceLookupService = mock(InstanceLookupService.class);
+ static {
+ when(mockInstanceLookupService.findInstanceByHost(any()))
+ .thenReturn(Optional.of(
+ new ApplicationInstance<>(
+ TENANT_ID,
+ APPLICATION_INSTANCE_ID,
+ makeServiceClusterSet())));
+ }
+
+
+ private static final InstanceLookupService alwaysEmptyInstanceLookUpService = new InstanceLookupService() {
+ @Override
+ public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceById(
+ final ApplicationInstanceReference applicationInstanceReference) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional<ApplicationInstance<ServiceMonitorStatus>> findInstanceByHost(final HostName hostName) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Set<ApplicationInstanceReference> knownInstances() {
+ return Collections.emptySet();
+ }
+ };
+
+ private static class AlwaysAllowPolicy implements Policy {
+ @Override
+ public void grantSuspensionRequest(
+ ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ HostName hostName,
+ MutableStatusRegistry hostStatusRegistry) {
+ }
+ @Override
+ public void releaseSuspensionGrant(
+ ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ HostName hostName,
+ MutableStatusRegistry hostStatusRegistry) {
+ }
+ }
+
+ private static final OrchestratorImpl alwaysAllowOrchestrator = new OrchestratorImpl(
+ new AlwaysAllowPolicy(),
+ new ClusterControllerClientFactoryMock(),
+ EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, mockInstanceLookupService,
+ SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS
+ );
+
+ private static final OrchestratorImpl hostNotFoundOrchestrator = new OrchestratorImpl(
+ new AlwaysAllowPolicy(),
+ new ClusterControllerClientFactoryMock(),
+ EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, alwaysEmptyInstanceLookUpService,
+ SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS
+ );
+
+ @Test
+ public void returns_200_on_success() throws Exception {
+ HostResource hostResource =
+ new HostResource(alwaysAllowOrchestrator);
+
+ final String hostName = "hostname";
+
+ UpdateHostResponse response = hostResource.suspend(hostName);
+
+ assertThat(response.hostname()).isEqualTo(hostName);
+ }
+
+ @Test
+ public void returns_200_on_success_batch() throws Exception {
+ HostSuspensionResource hostSuspensionResource = new HostSuspensionResource(alwaysAllowOrchestrator);
+ BatchHostSuspendRequest request =
+ new BatchHostSuspendRequest("parentHostname", Arrays.asList("hostname1", "hostname2"));
+ BatchOperationResult response = hostSuspensionResource.suspendAll(request);
+ assertThat(response.success());
+ }
+
+ @Test
+ public void throws_404_when_host_unknown() throws Exception {
+ try {
+ HostResource hostResource =
+ new HostResource(hostNotFoundOrchestrator);
+ hostResource.suspend("hostname");
+ fail();
+ } catch (WebApplicationException w) {
+ assertThat(w.getResponse().getStatus()).isEqualTo(404);
+ }
+ }
+
+ // Note: Missing host is 404 for a single-host, but 400 for multi-host (batch).
+ // This is so because the hostname is part of the URL path for single-host, while the
+ // hostnames are part of the request body for multi-host.
+ @Test
+ public void throws_400_when_host_unknown_for_batch() throws Exception {
+ try {
+ HostSuspensionResource hostSuspensionResource = new HostSuspensionResource(hostNotFoundOrchestrator);
+ BatchHostSuspendRequest request =
+ new BatchHostSuspendRequest("parentHostname", Arrays.asList("hostname1", "hostname2"));
+ hostSuspensionResource.suspendAll(request);
+ fail();
+ } catch (WebApplicationException w) {
+ assertThat(w.getResponse().getStatus()).isEqualTo(400);
+ }
+ }
+
+ private static class AlwaysFailPolicy implements Policy {
+ @Override
+ public void grantSuspensionRequest(
+ ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ HostName hostName,
+ MutableStatusRegistry hostStatusRegistry) throws HostStateChangeDeniedException {
+ doThrow();
+ }
+ @Override
+ public void releaseSuspensionGrant(
+ ApplicationInstance<ServiceMonitorStatus> applicationInstance,
+ HostName hostName,
+ MutableStatusRegistry hostStatusRegistry) throws HostStateChangeDeniedException {
+ doThrow();
+ }
+
+ private static void doThrow() throws HostStateChangeDeniedException {
+ throw new HostStateChangeDeniedException(
+ new HostName("some-host"),
+ "impossible-policy",
+ new ServiceType("silly-service"),
+ "This policy rejects all requests");
+ }
+ }
+
+ @Test
+ public void throws_409_when_request_rejected_by_policies() throws Exception {
+ final OrchestratorImpl alwaysRejectResolver = new OrchestratorImpl(
+ new AlwaysFailPolicy(),
+ new ClusterControllerClientFactoryMock(),
+ EVERY_HOST_IS_UP_HOST_STATUS_SERVICE,mockInstanceLookupService,
+ SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS);
+
+ try {
+ HostResource hostResource = new HostResource(alwaysRejectResolver);
+ hostResource.suspend("hostname");
+ fail();
+ } catch (WebApplicationException w) {
+ assertThat(w.getResponse().getStatus()).isEqualTo(409);
+ }
+ }
+
+ @Test
+ public void throws_409_when_request_rejected_by_policies_for_batch() throws Exception {
+ final OrchestratorImpl alwaysRejectResolver = new OrchestratorImpl(
+ new AlwaysFailPolicy(),
+ new ClusterControllerClientFactoryMock(),
+ EVERY_HOST_IS_UP_HOST_STATUS_SERVICE,
+ mockInstanceLookupService,
+ SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS);
+
+ try {
+ HostSuspensionResource hostSuspensionResource = new HostSuspensionResource(alwaysRejectResolver);
+ BatchHostSuspendRequest request =
+ new BatchHostSuspendRequest("parentHostname", Arrays.asList("hostname1", "hostname2"));
+ hostSuspensionResource.suspendAll(request);
+ fail();
+ } catch (WebApplicationException w) {
+ assertThat(w.getResponse().getStatus()).isEqualTo(409);
+ }
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
new file mode 100644
index 00000000000..ac73e264477
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
@@ -0,0 +1,323 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.orchestrator.TestIds;
+import org.apache.commons.lang.exception.ExceptionUtils;
+import org.apache.curator.SessionFailRetryLoop.SessionFailedException;
+import org.apache.curator.framework.CuratorFramework;
+import org.apache.curator.framework.CuratorFrameworkFactory;
+import org.apache.curator.retry.ExponentialBackoffRetry;
+import org.apache.curator.test.KillSession;
+import org.apache.curator.test.TestingServer;
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.TypeSafeMatcher;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.logging.Logger;
+
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.core.IsCollectionContaining.hasItem;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+public class ZookeeperStatusServiceTest {
+ private TestingServer testingServer;
+ private ZookeeperStatusService zookeeperStatusService;
+ private CuratorFramework curatorFramework;
+
+ @Before
+ public void setUp() throws Exception {
+ Logger.getLogger("").setLevel(LogLevel.WARNING);
+
+ testingServer = new TestingServer();
+ curatorFramework = createConnectedCuratorFramework(testingServer);
+ zookeeperStatusService = new ZookeeperStatusService(curatorFramework);
+ }
+
+ private static CuratorFramework createConnectedCuratorFramework(TestingServer server) throws InterruptedException {
+ CuratorFramework curatorFramework = CuratorFrameworkFactory.builder()
+ .connectString(server.getConnectString())
+ .retryPolicy(new ExponentialBackoffRetry(1000, 3))
+ .build();
+
+ curatorFramework.start();
+ curatorFramework.blockUntilConnected(1, TimeUnit.MINUTES);
+ return curatorFramework;
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ if (curatorFramework != null) { //teardown is called even if setUp fails.
+ curatorFramework.close();
+ }
+ if (testingServer != null) {
+ testingServer.close();
+ }
+ }
+
+ @Test
+ public void host_state_for_unknown_hosts_is_no_remarks() {
+ assertThat(
+ zookeeperStatusService.forApplicationInstance(TestIds.APPLICATION_INSTANCE_REFERENCE)
+ .getHostStatus(TestIds.HOST_NAME1),
+ is(HostStatus.NO_REMARKS));
+ }
+
+ @Test
+ public void setting_host_state_is_idempotent() {
+ try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
+ TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+
+ //shuffling to catch "clean database" failures for all cases.
+ for (HostStatus hostStatus: shuffledList(HostStatus.values())) {
+ doTimes(2, () -> {
+ statusRegistry.setHostState(
+ TestIds.HOST_NAME1,
+ hostStatus);
+
+ assertThat(statusRegistry.getHostStatus(
+ TestIds.HOST_NAME1),
+ is(hostStatus));
+ });
+ }
+ }
+ }
+
+ @Test
+ public void locks_are_exclusive() throws Exception {
+ try (CuratorFramework curatorFramework2 = createConnectedCuratorFramework(testingServer)) {
+ ZookeeperStatusService zookeeperStatusService2 = new ZookeeperStatusService(curatorFramework2);
+
+ final CompletableFuture<Void> lockedSuccessfullyFuture;
+ try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
+ TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+
+ lockedSuccessfullyFuture = CompletableFuture.runAsync(() -> {
+ try (MutableStatusRegistry statusRegistry2 = zookeeperStatusService2
+ .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE))
+ {
+ }
+ });
+
+ try {
+ lockedSuccessfullyFuture.get(3, TimeUnit.SECONDS);
+ fail("Both zookeeper host status services locked simultaneously for the same application instance");
+ } catch (TimeoutException ignored) {
+ }
+ }
+
+ lockedSuccessfullyFuture.get(1, TimeUnit.MINUTES);
+ }
+ }
+
+ @Test
+ public void session_expiry_when_holding_lock_causes_operations_to_fail() throws Exception {
+ try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
+ TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+
+ KillSession.kill(curatorFramework.getZookeeperClient().getZooKeeper(), testingServer.getConnectString());
+
+ assertSessionFailed(() ->
+ statusRegistry.setHostState(
+ TestIds.HOST_NAME1,
+ HostStatus.ALLOWED_TO_BE_DOWN));
+
+
+ assertSessionFailed(() ->
+ statusRegistry.getHostStatus(
+ TestIds.HOST_NAME1));
+
+ }
+ }
+
+ @Test
+ public void failing_to_get_lock_closes_SessionFailRetryLoop() throws Exception {
+ try (CuratorFramework curatorFramework2 = createConnectedCuratorFramework(testingServer)) {
+ ZookeeperStatusService zookeeperStatusService2 = new ZookeeperStatusService(curatorFramework2);
+
+ try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
+ TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+
+ //must run in separate thread, since having 2 locks in the same thread fails
+ CompletableFuture<Void> resultOfZkOperationAfterLockFailure = CompletableFuture.runAsync(() -> {
+ try {
+ zookeeperStatusService2.lockApplicationInstance_forCurrentThreadOnly(
+ TestIds.APPLICATION_INSTANCE_REFERENCE,
+ 1, TimeUnit.SECONDS);
+ fail("Both zookeeper host status services locked simultaneously for the same application instance");
+ } catch (RuntimeException e) {
+ }
+
+ killSession(curatorFramework2, testingServer);
+
+ //Throws SessionFailedException if the SessionFailRetryLoop has not been closed.
+ zookeeperStatusService2.forApplicationInstance(TestIds.APPLICATION_INSTANCE_REFERENCE)
+ .getHostStatus(TestIds.HOST_NAME1);
+ });
+
+ assertThat(resultOfZkOperationAfterLockFailure, notHoldsException());
+ }
+ }
+ }
+
+ //IsNot does not delegate to matcher.describeMismatch. See the related issue
+ //https://code.google.com/p/hamcrest/issues/detail?id=107 Confusing failure description when using negation
+ //Creating not(holdsException) directly instead.
+ private Matcher<Future<?>> notHoldsException() {
+ return new TypeSafeMatcher<Future<?>>() {
+ @Override
+ protected boolean matchesSafely(Future<?> item) {
+ return !getException(item).isPresent();
+ }
+
+ private Optional<Throwable> getException(Future<?> item) {
+ try {
+ item.get();
+ return Optional.empty();
+ } catch (ExecutionException e) {
+ return Optional.of(e.getCause());
+ } catch (InterruptedException e) {
+ return Optional.of(e);
+ }
+ }
+
+ @Override
+ public void describeTo(Description description) {
+ description.appendText("notHoldsException()");
+ }
+
+ @Override
+ protected void describeMismatchSafely(Future<?> item, Description mismatchDescription) {
+ getException(item).ifPresent( throwable ->
+ mismatchDescription
+ .appendText("Got exception: ")
+ .appendText(ExceptionUtils.getMessage(throwable))
+ .appendText(ExceptionUtils.getFullStackTrace(throwable)));
+ }
+ };
+ }
+
+ private static void killSession(CuratorFramework curatorFramework, TestingServer testingServer) {
+ try {
+ KillSession.kill(curatorFramework.getZookeeperClient().getZooKeeper(), testingServer.getConnectString());
+ } catch (Exception e) {
+ throw new RuntimeException("Failed killing session. ", e);
+ }
+ }
+
+ /**
+ * This requirement is due to limitations in SessionFailRetryLoop
+ */
+ @Test(expected = AssertionError.class)
+ public void multiple_locks_in_a_single_thread_gives_error() throws InterruptedException {
+ try (CuratorFramework curatorFramework2 = createConnectedCuratorFramework(testingServer)) {
+ ZookeeperStatusService zookeeperStatusService2 = new ZookeeperStatusService(curatorFramework2);
+
+ try (MutableStatusRegistry statusRegistry1 = zookeeperStatusService
+ .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE);
+ MutableStatusRegistry statusRegistry2 = zookeeperStatusService2
+ .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE2))
+ {
+ }
+ }
+ }
+
+ @Test
+ public void suspend_and_resume_application_works_and_is_symmetric() {
+
+ // Initial state is NO_REMARK
+ assertThat(
+ zookeeperStatusService
+ .forApplicationInstance(TestIds.APPLICATION_INSTANCE_REFERENCE)
+ .getApplicationInstanceStatus(),
+ is(ApplicationInstanceStatus.NO_REMARKS));
+
+ // Suspend
+ try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
+ TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
+ }
+
+ assertThat(
+ zookeeperStatusService
+ .forApplicationInstance(TestIds.APPLICATION_INSTANCE_REFERENCE)
+ .getApplicationInstanceStatus(),
+ is(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN));
+
+ // Resume
+ try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
+ TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.NO_REMARKS);
+ }
+
+ assertThat(
+ zookeeperStatusService
+ .forApplicationInstance(TestIds.APPLICATION_INSTANCE_REFERENCE)
+ .getApplicationInstanceStatus(),
+ is(ApplicationInstanceStatus.NO_REMARKS));
+ }
+
+ @Test
+ public void suspending_two_applications_returns_two_applications() {
+ Set<ApplicationInstanceReference> suspendedApps
+ = zookeeperStatusService.getAllSuspendedApplications();
+ assertThat(suspendedApps.size(), is(0));
+
+ try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
+ TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
+ }
+
+ try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
+ TestIds.APPLICATION_INSTANCE_REFERENCE2)) {
+ statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
+ }
+
+ suspendedApps = zookeeperStatusService.getAllSuspendedApplications();
+ assertThat(suspendedApps.size(), is(2));
+ assertThat(suspendedApps, hasItem(TestIds.APPLICATION_INSTANCE_REFERENCE));
+ assertThat(suspendedApps, hasItem(TestIds.APPLICATION_INSTANCE_REFERENCE2));
+ }
+
+ private static void assertSessionFailed(Runnable statusServiceOperations) {
+ try {
+ statusServiceOperations.run();
+ fail("Expected session expired exception");
+ } catch (RuntimeException e) {
+ if (!(e.getCause() instanceof SessionFailedException)) {
+ throw e;
+ }
+ }
+ }
+
+ //TODO: move to vespajlib
+ private static <T> List<T> shuffledList(T[] values) {
+ //new ArrayList necessary to avoid "write through" behaviour
+ List<T> list = new ArrayList<>(Arrays.asList(values));
+ Collections.shuffle(list);
+ return list;
+ }
+
+ //TODO: move to vespajlib
+ private static void doTimes(int numberOfIterations, Runnable runnable) {
+ for (int i = 0; i < numberOfIterations; i++) {
+ runnable.run();
+ }
+ }
+}