summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImplTest.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java98
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java69
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json3
-rw-r--r--orchestrator-restapi/src/main/java/com/yahoo/vespa/orchestrator/restapi/wire/HostStateChangeDenialReason.java12
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java6
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java21
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java4
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java4
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerNodeState.java (renamed from orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java)7
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java4
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java9
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java21
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNode.java4
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNodeImpl.java8
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java10
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java34
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java35
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java38
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java7
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java15
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java10
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java10
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java2
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java4
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java113
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java744
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java11
30 files changed, 529 insertions, 788 deletions
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImplTest.java
index c618b5c4648..660b86fb4ed 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImplTest.java
@@ -48,7 +48,7 @@ public class OrchestratorImplTest {
OrchestratorImpl.WEB_SERVICE_PORT,
Optional.empty(),
UpdateHostResponse.class
- )).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "service", "fail")));
+ )).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "fail")));
orchestrator.suspend(hostName);
}
@@ -95,7 +95,7 @@ public class OrchestratorImplTest {
OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended",
OrchestratorImpl.WEB_SERVICE_PORT,
UpdateHostResponse.class
- )).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "service", "fail")));
+ )).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "fail")));
orchestrator.resume(hostName);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index c5aa78fefad..d35a9c158b7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -40,6 +40,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final ReservationExpirer reservationExpirer;
private final InactiveExpirer inactiveExpirer;
private final RetiredExpirer retiredExpirer;
+ private final RetiredEarlyExpirer retiredEarlyExpirer;
private final FailedExpirer failedExpirer;
private final DirtyExpirer dirtyExpirer;
private final NodeRebooter nodeRebooter;
@@ -66,6 +67,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
zooKeeperAccessMaintainer = new ZooKeeperAccessMaintainer(nodeRepository, curator, durationFromEnv("zookeeper_access_maintenance_interval").orElse(defaults.zooKeeperAccessMaintenanceInterval), jobControl);
reservationExpirer = new ReservationExpirer(nodeRepository, clock, durationFromEnv("reservation_expiry").orElse(defaults.reservationExpiry), jobControl);
retiredExpirer = new RetiredExpirer(nodeRepository, deployer, clock, durationFromEnv("retired_expiry").orElse(defaults.retiredExpiry), jobControl);
+ retiredEarlyExpirer = new RetiredEarlyExpirer(nodeRepository, zone, durationFromEnv("retired_early_interval").orElse(defaults.retiredEarlyInterval), jobControl, deployer, orchestrator);
inactiveExpirer = new InactiveExpirer(nodeRepository, clock, durationFromEnv("inactive_expiry").orElse(defaults.inactiveExpiry), jobControl);
failedExpirer = new FailedExpirer(nodeRepository, zone, clock, durationFromEnv("failed_expiry").orElse(defaults.failedExpiry), jobControl);
dirtyExpirer = new DirtyExpirer(nodeRepository, clock, durationFromEnv("dirty_expiry").orElse(defaults.dirtyExpiry), jobControl);
@@ -91,6 +93,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
reservationExpirer.deconstruct();
inactiveExpirer.deconstruct();
retiredExpirer.deconstruct();
+ retiredEarlyExpirer.deconstruct();
failedExpirer.deconstruct();
dirtyExpirer.deconstruct();
nodeRebooter.deconstruct();
@@ -135,6 +138,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration rebootInterval;
private final Duration nodeRetirerInterval;
private final Duration metricsInterval;
+ private final Duration retiredEarlyInterval;
private final NodeFailer.ThrottlePolicy throttlePolicy;
@@ -149,6 +153,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
reservationExpiry = Duration.ofMinutes(20); // same as deployment timeout
inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy
retiredExpiry = Duration.ofDays(4); // enough time to migrate data
+ retiredEarlyInterval = Duration.ofMinutes(29);
failedExpiry = Duration.ofDays(4); // enough time to recover data even if it happens friday night
dirtyExpiry = Duration.ofHours(2); // enough time to clean the node
rebootInterval = Duration.ofDays(30);
@@ -165,6 +170,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
reservationExpiry = Duration.ofMinutes(10); // Need to be long enough for deployment to be finished for all config model versions
inactiveExpiry = Duration.ofSeconds(2); // support interactive wipe start over
retiredExpiry = Duration.ofMinutes(1);
+ retiredEarlyInterval = Duration.ofMinutes(5);
failedExpiry = Duration.ofMinutes(10);
dirtyExpiry = Duration.ofMinutes(30);
rebootInterval = Duration.ofDays(30);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java
new file mode 100644
index 00000000000..f936c1e06ba
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java
@@ -0,0 +1,98 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.collections.ListMap;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.Deployer;
+import com.yahoo.config.provision.Deployment;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.SystemName;
+import com.yahoo.config.provision.Zone;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.orchestrator.OrchestrationException;
+import com.yahoo.vespa.orchestrator.Orchestrator;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.logging.Level;
+import java.util.stream.Collectors;
+
+public class RetiredEarlyExpirer extends Maintainer {
+ private final Deployer deployer;
+ private final Orchestrator orchestrator;
+
+ public RetiredEarlyExpirer(NodeRepository nodeRepository,
+ Zone zone,
+ Duration interval,
+ JobControl jobControl,
+ Deployer deployer,
+ Orchestrator orchestrator) {
+ super(nodeRepository, interval, jobControl);
+ this.deployer = deployer;
+ this.orchestrator = orchestrator;
+
+ List<Zone> applies = Arrays.asList(new Zone(SystemName.cd, Environment.dev, RegionName.from("cd-us-central-1")));
+ if (!applies.contains(zone)) {
+ String targetZones = applies.stream().map(Zone::toString).collect(Collectors.joining(", "));
+ log.info(RetiredEarlyExpirer.class.getName() + " only runs in " + targetZones + ", stopping.");
+ deconstruct();
+ }
+ }
+
+ @Override
+ protected void maintain() {
+ List<Node> activeNodes = nodeRepository().getNodes(Node.State.active);
+
+ ListMap<ApplicationId, Node> retiredNodesByApplication = new ListMap<>();
+ for (Node node : activeNodes) {
+ if (node.allocation().isPresent() && node.allocation().get().membership().retired()) {
+ retiredNodesByApplication.put(node.allocation().get().owner(), node);
+ }
+ }
+
+ for (Map.Entry<ApplicationId, List<Node>> entry : retiredNodesByApplication.entrySet()) {
+ ApplicationId application = entry.getKey();
+ List<Node> retiredNodes = entry.getValue();
+
+ try {
+ Optional<Deployment> deployment = deployer.deployFromLocalActive(application, Duration.ofMinutes(30));
+ if ( ! deployment.isPresent()) continue; // this will be done at another config server
+
+ List<Node> nodesToRemove = new ArrayList<>();
+ for (Node node : retiredNodes) {
+ if (nodeCanBeRemoved(node)) {
+ nodesToRemove.add(node);
+ }
+ }
+
+ nodeRepository().setRemovable(application, nodesToRemove);
+
+ deployment.get().activate();
+
+ String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", "));
+ log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList);
+ } catch (RuntimeException e) {
+ String nodeList = retiredNodes.stream().map(Node::hostname).collect(Collectors.joining(", "));
+ log.log(Level.WARNING, "Exception trying to deactivate retired nodes from " + application
+ + ": " + nodeList, e);
+ }
+ }
+ }
+
+ boolean nodeCanBeRemoved(Node node) {
+ try {
+ orchestrator.acquirePermissionToRemove(new HostName(node.hostname()));
+ return true;
+ } catch (OrchestrationException e) {
+ log.info("Did not get permission to remove retired node " + node + ": " + e.getMessage());
+ return false;
+ }
+ }
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
index b29b5ee813e..538feeb042d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
@@ -7,6 +7,7 @@ import com.yahoo.vespa.orchestrator.ApplicationStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException;
import com.yahoo.vespa.orchestrator.BatchInternalErrorException;
import com.yahoo.vespa.orchestrator.HostNameNotFoundException;
+import com.yahoo.vespa.orchestrator.OrchestrationException;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.model.NodeGroup;
import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
@@ -62,6 +63,9 @@ public class OrchestratorMock implements Orchestrator {
}
@Override
+ public void acquirePermissionToRemove(HostName hostName) throws OrchestrationException {}
+
+ @Override
public void suspendAll(HostName parentHostname, List<HostName> hostNames) throws BatchInternalErrorException, BatchHostStateChangeDeniedException, BatchHostNameNotFoundException {
throw new UnsupportedOperationException("Not implemented");
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
index f034490b3f7..9bfeccb9a5d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
@@ -9,6 +9,7 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.InstanceName;
+import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.TenantName;
@@ -20,11 +21,12 @@ import com.yahoo.vespa.curator.mock.MockCurator;
import com.yahoo.vespa.curator.transaction.CuratorTransaction;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
-import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner;
import com.yahoo.vespa.hosted.provision.testutils.FlavorConfigBuilder;
import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
+import com.yahoo.vespa.orchestrator.OrchestrationException;
+import com.yahoo.vespa.orchestrator.Orchestrator;
import org.junit.Test;
import java.time.Duration;
@@ -35,6 +37,11 @@ import java.util.Optional;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
/**
* @author bratseth
@@ -58,7 +65,7 @@ public class RetiredExpirerTest {
ApplicationId applicationId = ApplicationId.from(TenantName.from("foo"), ApplicationName.from("bar"), InstanceName.from("fuz"));
// Allocate content cluster of sizes 7 -> 2 -> 3:
- // Should end up with 3 nodes in the cluster (one previously retired), and 3 retired
+ // Should end up with 3 nodes in the cluster (one previously retired), and 4 retired
ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Version.fromString("6.42"));
int wantedNodes;
activate(applicationId, cluster, wantedNodes=7, 1, provisioner);
@@ -117,6 +124,64 @@ public class RetiredExpirerTest {
assertFalse(node.allocation().get().membership().retired());
}
+ @Test
+ public void ensure_early_inactivation() throws OrchestrationException {
+ ManualClock clock = new ManualClock();
+ Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
+ NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default");
+ NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone,
+ new MockNameResolver().mockAnyLookup());
+ NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone);
+
+ createReadyNodes(7, nodeRepository, nodeFlavors);
+ createHostNodes(4, nodeRepository, nodeFlavors);
+
+ ApplicationId applicationId = ApplicationId.from(TenantName.from("foo"), ApplicationName.from("bar"), InstanceName.from("fuz"));
+
+ // Allocate content cluster of sizes 7 -> 2 -> 3:
+ // Should end up with 3 nodes in the cluster (one previously retired), and 4 retired
+ ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Version.fromString("6.42"));
+ int wantedNodes;
+ activate(applicationId, cluster, wantedNodes=7, 1, provisioner);
+ activate(applicationId, cluster, wantedNodes=2, 1, provisioner);
+ activate(applicationId, cluster, wantedNodes=3, 1, provisioner);
+ assertEquals(7, nodeRepository.getNodes(applicationId, Node.State.active).size());
+ assertEquals(0, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
+
+ // Cause inactivation of retired nodes
+ clock.advance(Duration.ofHours(30)); // Retire period spent
+ MockDeployer deployer =
+ new MockDeployer(provisioner,
+ Collections.singletonMap(
+ applicationId,
+ new MockDeployer.ApplicationContext(applicationId, cluster, Capacity.fromNodeCount(wantedNodes, Optional.of("default")), 1)));
+
+ Orchestrator orchestrator = mock(Orchestrator.class);
+ // Allow the 1st and 3rd retired nodes permission to inactivate
+ doNothing()
+ .doThrow(new OrchestrationException("Permission not granted 1"))
+ .doNothing()
+ .doThrow(new OrchestrationException("Permission not granted 2"))
+ .when(orchestrator).acquirePermissionToRemove(any());
+
+ new RetiredEarlyExpirer(
+ nodeRepository,
+ zone,
+ Duration.ofDays(30),
+ new JobControl(nodeRepository.database()),
+ deployer,
+ orchestrator).run();
+ assertEquals(5, nodeRepository.getNodes(applicationId, Node.State.active).size());
+ assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
+ assertEquals(1, deployer.redeployments);
+
+ verify(orchestrator, times(4)).acquirePermissionToRemove(any());
+
+ // inactivated nodes are not retired
+ for (Node node : nodeRepository.getNodes(applicationId, Node.State.inactive))
+ assertFalse(node.allocation().get().membership().retired());
+ }
+
private void activate(ApplicationId applicationId, ClusterSpec cluster, int nodes, int groups, NodeRepositoryProvisioner provisioner) {
List<HostSpec> hosts = provisioner.prepare(applicationId, cluster, Capacity.fromNodeCount(nodes), groups, null);
NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator));
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
index a7a2fe6b677..fea4fb8d4d2 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
@@ -28,6 +28,9 @@
"name":"OperatorChangeApplicationMaintainer"
},
{
+ "name":"RetiredEarlyExpirer"
+ },
+ {
"name":"MetricsReporter"
},
{
diff --git a/orchestrator-restapi/src/main/java/com/yahoo/vespa/orchestrator/restapi/wire/HostStateChangeDenialReason.java b/orchestrator-restapi/src/main/java/com/yahoo/vespa/orchestrator/restapi/wire/HostStateChangeDenialReason.java
index 37366e6efd3..8345455d68a 100644
--- a/orchestrator-restapi/src/main/java/com/yahoo/vespa/orchestrator/restapi/wire/HostStateChangeDenialReason.java
+++ b/orchestrator-restapi/src/main/java/com/yahoo/vespa/orchestrator/restapi/wire/HostStateChangeDenialReason.java
@@ -13,20 +13,16 @@ import java.util.Objects;
public class HostStateChangeDenialReason {
public static final String FIELD_NAME_CONSTRAINT = "constraint";
- public static final String FIELD_NAME_SERVICE_TYPE = "service-type";
public static final String FIELD_NAME_MESSAGE = "message";
private final String constraintName;
- private final String serviceType;
private final String message;
@JsonCreator
public HostStateChangeDenialReason(
@JsonProperty(FIELD_NAME_CONSTRAINT) final String constraintName,
- @JsonProperty(FIELD_NAME_SERVICE_TYPE) final String serviceType,
@JsonProperty(FIELD_NAME_MESSAGE) final String message) {
this.constraintName = constraintName;
- this.serviceType = serviceType;
this.message = message;
}
@@ -35,11 +31,6 @@ public class HostStateChangeDenialReason {
return constraintName;
}
- @JsonProperty(FIELD_NAME_SERVICE_TYPE)
- public String serviceType() {
- return serviceType;
- }
-
@JsonProperty(FIELD_NAME_MESSAGE)
public String message() {
return message;
@@ -53,7 +44,6 @@ public class HostStateChangeDenialReason {
final HostStateChangeDenialReason other = (HostStateChangeDenialReason) o;
if (!Objects.equals(this.constraintName, other.constraintName)) return false;
- if (!Objects.equals(this.serviceType, other.serviceType)) return false;
if (!Objects.equals(this.message, other.message)) return false;
return true;
@@ -61,6 +51,6 @@ public class HostStateChangeDenialReason {
@Override
public int hashCode() {
- return Objects.hash(constraintName, serviceType, message);
+ return Objects.hash(constraintName, message);
}
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java
index e3451ad9576..1a1ffe13b37 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java
@@ -60,6 +60,12 @@ public interface Orchestrator {
void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException;
/**
+ * Acquire permission to remove a node permanently from the application, or otherwise throw
+ * {@link OrchestrationException}.
+ */
+ void acquirePermissionToRemove(HostName hostName) throws OrchestrationException;
+
+ /**
* Suspend normal operations for a group of nodes in the same application.
*
* @param nodeGroup The group of nodes in an application.
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
index 72b644be464..5cd5f0ab638 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
@@ -11,7 +11,7 @@ import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceCluster;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
-import com.yahoo.vespa.orchestrator.controller.ClusterControllerState;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse;
import com.yahoo.vespa.orchestrator.model.ApplicationApi;
import com.yahoo.vespa.orchestrator.model.ApplicationApiImpl;
@@ -124,6 +124,21 @@ public class OrchestratorImpl implements Orchestrator {
suspendGroup(nodeGroup);
}
+ @Override
+ public void acquirePermissionToRemove(HostName hostName) throws OrchestrationException {
+ ApplicationInstance<ServiceMonitorStatus> appInstance = getApplicationInstance(hostName);
+ NodeGroup nodeGroup = new NodeGroup(appInstance, hostName);
+
+ try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(appInstance.reference())) {
+ ApplicationApi applicationApi = new ApplicationApiImpl(
+ nodeGroup,
+ statusRegistry,
+ clusterControllerClientFactory);
+
+ policy.acquirePermissionToRemove(applicationApi);
+ }
+ }
+
// Public for testing purposes
@Override
public void suspendGroup(NodeGroup nodeGroup) throws HostStateChangeDeniedException, HostNameNotFoundException {
@@ -291,7 +306,7 @@ public class OrchestratorImpl implements Orchestrator {
// If the clustercontroller throws an error the nodes will be marked as allowed to be down
// and be set back up on next resume invocation.
- setClusterStateInController(application, ClusterControllerState.MAINTENANCE);
+ setClusterStateInController(application, ClusterControllerNodeState.MAINTENANCE);
}
statusRegistry.setApplicationInstanceStatus(status);
@@ -299,7 +314,7 @@ public class OrchestratorImpl implements Orchestrator {
}
private void setClusterStateInController(ApplicationInstance<ServiceMonitorStatus> application,
- ClusterControllerState state)
+ ClusterControllerNodeState state)
throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException {
// Get all content clusters for this application
Set<ClusterId> contentClusterIds = application.serviceClusters().stream()
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java
index 9de7bfab627..b51ecfd1331 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java
@@ -13,13 +13,13 @@ public interface ClusterControllerClient {
*
* @throws IOException if there was a problem communicating with the cluster controller
*/
- ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerState wantedState) throws IOException;
+ ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerNodeState wantedState) throws IOException;
/**
* Requests that a cluster controller sets all nodes in the cluster to the requested state.
*
* @throws IOException if there was a problem communicating with the cluster controller
*/
- ClusterControllerStateResponse setApplicationState(ClusterControllerState wantedState) throws IOException;
+ ClusterControllerStateResponse setApplicationState(ClusterControllerNodeState wantedState) throws IOException;
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
index 01ae6dadfc7..87fb22b882c 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
@@ -29,7 +29,7 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{
* @throws IOException if there was a problem communicating with the cluster controller
*/
@Override
- public ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerState wantedState) throws IOException {
+ public ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerNodeState wantedState) throws IOException {
ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON);
ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.SAFE);
@@ -52,7 +52,7 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{
* @throws IOException if there was a problem communicating with the cluster controller
*/
@Override
- public ClusterControllerStateResponse setApplicationState(final ClusterControllerState wantedState) throws IOException {
+ public ClusterControllerStateResponse setApplicationState(final ClusterControllerNodeState wantedState) throws IOException {
final ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON);
final ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.FORCE);
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerNodeState.java
index e5db0ec9795..5886a44af02 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerNodeState.java
@@ -11,13 +11,14 @@ import com.fasterxml.jackson.annotation.JsonValue;
*
* @author hakonhall
*/
-public enum ClusterControllerState {
+public enum ClusterControllerNodeState {
MAINTENANCE("maintenance"),
- UP("up");
+ UP("up"),
+ DOWN("down");
private final String wireName;
- ClusterControllerState(final String wireName) {
+ ClusterControllerNodeState(final String wireName) {
this.wireName = wireName;
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java
index 2ea489337d8..c3b4498a52d 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java
@@ -50,7 +50,7 @@ public class ClusterControllerStateRequest {
public static class State {
@JsonProperty("state")
- public final ClusterControllerState state;
+ public final ClusterControllerNodeState state;
/**
* The reason the client is making the request to set the node state.
@@ -59,7 +59,7 @@ public class ClusterControllerStateRequest {
@JsonProperty("reason")
public final String reason;
- public State(ClusterControllerState state, String reason) {
+ public State(ClusterControllerNodeState state, String reason) {
this.state = state;
this.reason = reason;
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java
index 0ac602b106e..62c2bb372e3 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.orchestrator.model;
import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
import com.yahoo.vespa.orchestrator.status.HostStatus;
import java.util.List;
@@ -12,11 +13,19 @@ import java.util.List;
public interface ApplicationApi {
String applicationInfo();
+ /**
+ * The policy acts on some subset of nodes in the application.
+ */
+ NodeGroup getNodeGroup();
+
List<ClusterApi> getClusters();
+ ApplicationInstanceStatus getApplicationStatus();
+
void setHostState(HostName hostName, HostStatus status);
List<HostName> getNodesInGroupWithStatus(HostStatus status);
+ List<StorageNode> getStorageNodesInGroupInClusterOrder();
List<StorageNode> getUpStorageNodesInGroupInClusterOrder();
List<StorageNode> getStorageNodesAllowedToBeDownInGroupInReverseClusterOrder();
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java
index 5f84c80a051..889455044b1 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java
@@ -6,6 +6,7 @@ import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceCluster;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
+import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
import com.yahoo.vespa.orchestrator.status.HostStatus;
import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
import com.yahoo.vespa.orchestrator.status.ReadOnlyStatusRegistry;
@@ -67,12 +68,23 @@ public class ApplicationApiImpl implements ApplicationApi {
@Override
public List<StorageNode> getStorageNodesAllowedToBeDownInGroupInReverseClusterOrder() {
+ return getStorageNodesInGroupInClusterOrder().stream()
+ .filter(storageNode -> getHostStatus(storageNode.hostName()) == HostStatus.ALLOWED_TO_BE_DOWN)
+ .sorted(Comparator.reverseOrder())
+ .collect(Collectors.toList());
+ }
+
+ @Override
+ public NodeGroup getNodeGroup() {
+ return nodeGroup;
+ }
+
+ @Override
+ public List<StorageNode> getStorageNodesInGroupInClusterOrder() {
return clusterInOrder.stream()
.map(ClusterApi::storageNodeInGroup)
.filter(Optional::isPresent)
.map(Optional::get)
- .filter(storageNode -> getHostStatus(storageNode.hostName()) == HostStatus.ALLOWED_TO_BE_DOWN)
- .sorted(Comparator.reverseOrder())
.collect(Collectors.toList());
}
@@ -86,6 +98,11 @@ public class ApplicationApiImpl implements ApplicationApi {
}
@Override
+ public ApplicationInstanceStatus getApplicationStatus() {
+ return hostStatusService.getApplicationInstanceStatus();
+ }
+
+ @Override
public void setHostState(HostName hostName, HostStatus status) {
hostStatusService.setHostState(hostName, status);
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNode.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNode.java
index 1698770904f..ffd3cf5b39e 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNode.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNode.java
@@ -2,10 +2,10 @@
package com.yahoo.vespa.orchestrator.model;
import com.yahoo.vespa.applicationmodel.HostName;
-import com.yahoo.vespa.orchestrator.controller.ClusterControllerState;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState;
import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
public interface StorageNode extends Comparable<StorageNode> {
HostName hostName();
- void setNodeState(ClusterControllerState wantedState) throws HostStateChangeDeniedException;
+ void setNodeState(ClusterControllerNodeState wantedState) throws HostStateChangeDeniedException;
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNodeImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNodeImpl.java
index 1984c5cca40..879c7b8f0e8 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNodeImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNodeImpl.java
@@ -9,7 +9,7 @@ import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
-import com.yahoo.vespa.orchestrator.controller.ClusterControllerState;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse;
import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy;
@@ -44,7 +44,7 @@ public class StorageNodeImpl implements StorageNode {
}
@Override
- public void setNodeState(ClusterControllerState wantedNodeState)
+ public void setNodeState(ClusterControllerNodeState wantedNodeState)
throws HostStateChangeDeniedException {
// The "cluster name" used by the Cluster Controller IS the cluster ID.
String clusterId = this.clusterId.s();
@@ -72,7 +72,6 @@ public class StorageNodeImpl implements StorageNode {
throw new HostStateChangeDeniedException(
hostName(),
HostedVespaPolicy.CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT,
- VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE,
"Failed to communicate with cluster controllers " + clusterControllers + ": " + e,
e);
}
@@ -81,8 +80,7 @@ public class StorageNodeImpl implements StorageNode {
throw new HostStateChangeDeniedException(
hostName(),
HostedVespaPolicy.SET_NODE_STATE_CONSTRAINT,
- VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE,
- "Failed to set state to " + wantedNodeState + " in controller: " + response.reason);
+ "Failed to set state to " + wantedNodeState + " in cluster controller: " + response.reason);
}
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java
index d0d7cd83e60..98062640ab0 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java
@@ -7,8 +7,14 @@ public interface ClusterPolicy {
/**
* There's an implicit group of nodes known to clusterApi. This method answers whether
* it would be fine, just looking at this cluster (and disregarding Cluster Controller/storage
- * which is handled separately), whether it would be fine to allow all services on all the nodes
- * in the group to go down.
+ * which is handled separately), to allow all services on all the nodes in the group to go down.
*/
void verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException;
+
+ /**
+ * There's an implicit group of nodes known to clusterApi. This method answers whether
+ * it would be fine, just looking at this cluster, to allow all services on all the nodes
+ * in the group to go down PERMANENTLY (removed from application).
+ */
+ void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException;
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java
index 9bd584d82af..6062fef7189 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java
@@ -3,37 +3,33 @@ package com.yahoo.vespa.orchestrator.policy;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceType;
-import com.yahoo.vespa.orchestrator.model.NodeGroup;
import com.yahoo.vespa.orchestrator.OrchestrationException;
+import com.yahoo.vespa.orchestrator.model.NodeGroup;
+
+import java.util.Optional;
/**
* @author bakksjo
*/
public class HostStateChangeDeniedException extends OrchestrationException {
-
private final String constraintName;
- private final ServiceType serviceType;
+ private final Optional<ServiceType> serviceType;
- public HostStateChangeDeniedException(HostName hostName, String constraintName,
- ServiceType serviceType, String message) {
- this(hostName, constraintName, serviceType, message, null);
+ public HostStateChangeDeniedException(HostName hostName, String constraintName, String message) {
+ this(hostName, constraintName, message, null);
}
- public HostStateChangeDeniedException(HostName hostName, String constraintName,
- ServiceType serviceType, String message, Throwable cause) {
- this(hostName.toString(), constraintName, serviceType, message, cause);
+ public HostStateChangeDeniedException(HostName hostName, String constraintName, String message, Exception e) {
+ this(hostName.s(), constraintName, Optional.empty(), message, e);
}
- public HostStateChangeDeniedException(NodeGroup nodeGroup,
- String constraintName,
- ServiceType serviceType,
- String message) {
- this(nodeGroup.toCommaSeparatedString(), constraintName, serviceType, message, null);
+ public HostStateChangeDeniedException(NodeGroup nodeGroup, String constraintName, String message) {
+ this(nodeGroup.toCommaSeparatedString(), constraintName, Optional.empty(), message, null);
}
private HostStateChangeDeniedException(String nodes,
String constraintName,
- ServiceType serviceType,
+ Optional<ServiceType> serviceType,
String message,
Throwable cause) {
super(createMessage(nodes, constraintName, serviceType, message), cause);
@@ -43,18 +39,18 @@ public class HostStateChangeDeniedException extends OrchestrationException {
private static String createMessage(String nodes,
String constraintName,
- ServiceType serviceType,
+ Optional<ServiceType> serviceType,
String message) {
return "Changing the state of " + nodes + " would violate " + constraintName
- + " for service type " + serviceType + ": " + message;
+ + (serviceType.isPresent() ? " for service type " + serviceType.get() : "")
+ + ": " + message;
}
public String getConstraintName() {
return constraintName;
}
- public ServiceType getServiceType() {
+ public Optional<ServiceType> getServiceType() {
return serviceType;
}
-
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
index eb2fb9dc1b9..a081236f4bc 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
@@ -7,6 +7,7 @@ import com.yahoo.vespa.orchestrator.model.ClusterApi;
import static com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT;
public class HostedVespaClusterPolicy implements ClusterPolicy {
+ @Override
public void verifyGroupGoingDownIsFine(ClusterApi clusterApi)
throws HostStateChangeDeniedException {
if (clusterApi.noServicesOutsideGroupIsDown()) {
@@ -25,8 +26,8 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
throw new HostStateChangeDeniedException(
clusterApi.getNodeGroup(),
ENOUGH_SERVICES_UP_CONSTRAINT,
- clusterApi.serviceType(),
- "Suspension percentage would increase from " + clusterApi.percentageOfServicesDown()
+ "Suspension percentage for service type " + clusterApi.serviceType()
+ + " would increase from " + clusterApi.percentageOfServicesDown()
+ "% to " + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()
+ "%, over the limit of " + percentageOfServicesAllowedToBeDown + "%."
+ " These instances may be down: " + clusterApi.servicesDownAndNotInGroupDescription()
@@ -34,7 +35,35 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
+ clusterApi.nodesAllowedToBeDownNotInGroupDescription());
}
- static ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi) {
+ @Override
+ public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi)
+ throws HostStateChangeDeniedException {
+ // This policy is similar to verifyGroupGoingDownIsFine, except that services being down in the group
+ // is no excuse to allow suspension (like it is for verifyGroupGoingDownIsFine), since if we grant
+ // suspension in this case they will permanently be down/removed.
+
+ if (clusterApi.noServicesOutsideGroupIsDown()) {
+ return;
+ }
+
+ int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage();
+ if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) {
+ return;
+ }
+
+ throw new HostStateChangeDeniedException(
+ clusterApi.getNodeGroup(),
+ ENOUGH_SERVICES_UP_CONSTRAINT,
+ "Down percentage for service type " + clusterApi.serviceType()
+ + " would increase to " + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()
+ + "%, over the limit of " + percentageOfServicesAllowedToBeDown + "%."
+ + " These instances may be down: " + clusterApi.servicesDownAndNotInGroupDescription()
+ + " and these hosts are allowed to be down: "
+ + clusterApi.nodesAllowedToBeDownNotInGroupDescription());
+ }
+
+ // Non-private for testing purposes
+ ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi) {
if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) {
if (VespaModelUtil.SLOBROK_SERVICE_TYPE.equals(clusterApi.serviceType())) {
return ConcurrentSuspensionLimitForCluster.ONE_NODE;
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
index 515a46cb1da..ac1587ce7de 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
@@ -5,12 +5,13 @@ import com.yahoo.log.LogLevel;
import com.yahoo.vespa.applicationmodel.ApplicationInstance;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
-import com.yahoo.vespa.orchestrator.controller.ClusterControllerState;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState;
import com.yahoo.vespa.orchestrator.model.ApplicationApi;
import com.yahoo.vespa.orchestrator.model.ApplicationApiImpl;
import com.yahoo.vespa.orchestrator.model.ClusterApi;
import com.yahoo.vespa.orchestrator.model.NodeGroup;
import com.yahoo.vespa.orchestrator.model.StorageNode;
+import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
import com.yahoo.vespa.orchestrator.status.HostStatus;
import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
@@ -23,6 +24,7 @@ import java.util.logging.Logger;
public class HostedVespaPolicy implements Policy {
+ public static final String APPLICATION_SUSPENDED_CONSTRAINT = "application-suspended";
public static final String ENOUGH_SERVICES_UP_CONSTRAINT = "enough-services-up";
public static final String SET_NODE_STATE_CONSTRAINT = "controller-set-node-state";
public static final String CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT = "controller-available";
@@ -48,7 +50,7 @@ public class HostedVespaPolicy implements Policy {
// Ask Cluster Controller to set UP storage nodes in maintenance.
// These storage nodes are guaranteed to be NO_REMARKS
for (StorageNode storageNode : application.getUpStorageNodesInGroupInClusterOrder()) {
- storageNode.setNodeState(ClusterControllerState.MAINTENANCE);
+ storageNode.setNodeState(ClusterControllerNodeState.MAINTENANCE);
log.log(LogLevel.INFO, "The storage node on " + storageNode.hostName() + " has been set to MAINTENANCE");
}
@@ -63,7 +65,7 @@ public class HostedVespaPolicy implements Policy {
public void releaseSuspensionGrant(ApplicationApi application) throws HostStateChangeDeniedException {
// Always defer to Cluster Controller whether it's OK to resume storage node
for (StorageNode storageNode : application.getStorageNodesAllowedToBeDownInGroupInReverseClusterOrder()) {
- storageNode.setNodeState(ClusterControllerState.UP);
+ storageNode.setNodeState(ClusterControllerNodeState.UP);
log.log(LogLevel.INFO, "The storage node on " + storageNode.hostName() + " has been set to UP");
}
@@ -73,6 +75,36 @@ public class HostedVespaPolicy implements Policy {
}
}
+ @Override
+ public void acquirePermissionToRemove(ApplicationApi applicationApi) throws HostStateChangeDeniedException {
+ ApplicationInstanceStatus applicationStatus = applicationApi.getApplicationStatus();
+ if (applicationStatus == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) {
+ throw new HostStateChangeDeniedException(
+ applicationApi.getNodeGroup(),
+ HostedVespaPolicy.APPLICATION_SUSPENDED_CONSTRAINT,
+ "Unable to test availability constraints as the application " + applicationApi.applicationInfo()
+ + " is allowed to be down");
+ }
+
+ // Apply per-cluster policy
+ for (ClusterApi cluster : applicationApi.getClusters()) {
+ clusterPolicy.verifyGroupGoingDownPermanentlyIsFine(cluster);
+ }
+
+ // Ask Cluster Controller to set storage nodes to DOWN.
+ // These storage nodes are guaranteed to be NO_REMARKS
+ for (StorageNode storageNode : applicationApi.getStorageNodesInGroupInClusterOrder()) {
+ storageNode.setNodeState(ClusterControllerNodeState.DOWN);
+ log.log(LogLevel.INFO, "The storage node on " + storageNode.hostName() + " has been set DOWN");
+ }
+
+ // Ensure all nodes in the group are marked as allowed to be down
+ for (HostName hostName : applicationApi.getNodesInGroupWithStatus(HostStatus.NO_REMARKS)) {
+ applicationApi.setHostState(hostName, HostStatus.ALLOWED_TO_BE_DOWN);
+ log.log(LogLevel.INFO, hostName + " is now allowed to be down (suspended)");
+ }
+ }
+
// TODO: Remove later - currently used for backward compatibility testing
@Override
public void grantSuspensionRequest(ApplicationInstance<ServiceMonitorStatus> applicationInstance,
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java
index fee2940efda..2db842274b3 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java
@@ -30,6 +30,13 @@ public interface Policy {
void releaseSuspensionGrant(ApplicationApi application) throws HostStateChangeDeniedException;
/**
+ * Give all hosts in a group permission to be removed from the application.
+ *
+ * @param applicationApi
+ */
+ void acquirePermissionToRemove(ApplicationApi applicationApi) throws HostStateChangeDeniedException;
+
+ /**
* Release an earlier grant for suspension.
*
* @throws HostStateChangeDeniedException if the release failed.
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java
index b6dac92206c..c6e9e5415ce 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java
@@ -57,7 +57,7 @@ public class HostResource implements HostApi {
throw new NotFoundException(e);
} catch (HostStateChangeDeniedException e) {
log.log(LogLevel.INFO, "Failed to suspend " + hostName + ": " + e.getMessage());
- throw webExceptionWithDenialReason(hostName, e);
+ throw webExceptionWithDenialReason("suspend", hostName, e);
}
return new UpdateHostResponse(hostName.s(), null);
}
@@ -72,14 +72,19 @@ public class HostResource implements HostApi {
throw new NotFoundException(e);
} catch (HostStateChangeDeniedException e) {
log.log(LogLevel.INFO, "Failed to resume " + hostName + ": " + e.getMessage());
- throw webExceptionWithDenialReason(hostName, e);
+ throw webExceptionWithDenialReason("resume", hostName, e);
}
return new UpdateHostResponse(hostName.s(), null);
}
- private static WebApplicationException webExceptionWithDenialReason(HostName hostName, HostStateChangeDeniedException e) {
- HostStateChangeDenialReason hostStateChangeDenialReason = new HostStateChangeDenialReason(
- e.getConstraintName(), e.getServiceType().s(), e.getMessage());
+ private static WebApplicationException webExceptionWithDenialReason(
+ String operationDescription,
+ HostName hostName,
+ HostStateChangeDeniedException e) {
+ HostStateChangeDenialReason hostStateChangeDenialReason =
+ new HostStateChangeDenialReason(
+ e.getConstraintName(),
+ operationDescription + " failed: " + e.getMessage());
UpdateHostResponse response = new UpdateHostResponse(hostName.s(), hostStateChangeDenialReason);
return new WebApplicationException(
hostStateChangeDenialReason.toString(),
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
index 09aa37f9610..ccee70630af 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
@@ -5,7 +5,6 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.vespa.applicationmodel.ApplicationInstance;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
import com.yahoo.vespa.applicationmodel.HostName;
-import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock;
import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
@@ -228,11 +227,10 @@ public class OrchestratorImplTest {
Throwable supensionFailure = new HostStateChangeDeniedException(
DummyInstanceLookupService.TEST6_HOST_NAME,
"some-constraint",
- new ServiceType("foo"),
"error message");
doThrow(supensionFailure).when(orchestrator).suspendGroup(DummyInstanceLookupService.TEST6_NODE_GROUP);
- doThrow(new HostStateChangeDeniedException(DummyInstanceLookupService.TEST1_HOST_NAME, "foo1-constraint", new ServiceType("foo1-service"), "foo1-message"))
+ doThrow(new HostStateChangeDeniedException(DummyInstanceLookupService.TEST1_HOST_NAME, "foo1-constraint", "foo1-message"))
.when(orchestrator).resume(DummyInstanceLookupService.TEST1_HOST_NAME);
doNothing().when(orchestrator).resume(DummyInstanceLookupService.TEST6_HOST_NAME);
doNothing().when(orchestrator).resume(DummyInstanceLookupService.TEST3_HOST_NAME);
@@ -251,10 +249,10 @@ public class OrchestratorImplTest {
assertEquals("Failed to suspend NodeGroup{application=tenant-id-3:application-instance-3:prod:utopia-1:default, " +
"hostNames=[test6.prod.us-east-1.vespahosted.ne1.yahoo.com]} with parent host parentHostname: " +
"Changing the state of test6.prod.us-east-1.vespahosted.ne1.yahoo.com would violate " +
- "some-constraint for service type foo: error message; With suppressed throwable " +
+ "some-constraint: error message; With suppressed throwable " +
"com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException: " +
- "Changing the state of test1.prod.utpoia-1.vespahosted.ut1.yahoo.com would violate " +
- "foo1-constraint for service type foo1-service: foo1-message",
+ "Changing the state of test1.prod.utpoia-1.vespahosted.ut1.yahoo.com " +
+ "would violate foo1-constraint: foo1-message",
e.getMessage());
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java
index 93dcb5815c6..a36a468a345 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java
@@ -22,14 +22,14 @@ import java.util.Set;
* @author smorgrav
*/
public class ClusterControllerClientFactoryMock implements ClusterControllerClientFactory {
- Map<String, ClusterControllerState> nodes = new HashMap<>();
+ Map<String, ClusterControllerNodeState> nodes = new HashMap<>();
public boolean isInMaintenance(ApplicationInstance<ServiceMonitorStatus> appInstance, HostName hostName) {
try {
ClusterId clusterName = VespaModelUtil.getContentClusterName(appInstance, hostName);
int storageNodeIndex = VespaModelUtil.getStorageNodeIndex(appInstance, hostName);
String globalMapKey = clusterName.s() + storageNodeIndex;
- return nodes.getOrDefault(globalMapKey, ClusterControllerState.UP) == ClusterControllerState.MAINTENANCE;
+ return nodes.getOrDefault(globalMapKey, ClusterControllerNodeState.UP) == ClusterControllerNodeState.MAINTENANCE;
} catch (Exception e) {
//Catch all - meant to catch cases where the node is not part of a storage cluster
return false;
@@ -43,7 +43,7 @@ public class ClusterControllerClientFactoryMock implements ClusterControllerClie
ClusterId clusterName = VespaModelUtil.getContentClusterName(app, host);
int storageNodeIndex = VespaModelUtil.getStorageNodeIndex(app, host);
String globalMapKey = clusterName.s() + storageNodeIndex;
- nodes.put(globalMapKey, ClusterControllerState.UP);
+ nodes.put(globalMapKey, ClusterControllerNodeState.UP);
}
}
}
@@ -53,13 +53,13 @@ public class ClusterControllerClientFactoryMock implements ClusterControllerClie
return new ClusterControllerClient() {
@Override
- public ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerState wantedState) throws IOException {
+ public ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerNodeState wantedState) throws IOException {
nodes.put(clusterName + storageNodeIndex, wantedState);
return new ClusterControllerStateResponse(true, "Yes");
}
@Override
- public ClusterControllerStateResponse setApplicationState(ClusterControllerState wantedState) throws IOException {
+ public ClusterControllerStateResponse setApplicationState(ClusterControllerNodeState wantedState) throws IOException {
Set<String> keyCopy = new HashSet<>(nodes.keySet());
for (String s : keyCopy) {
if (s.startsWith(clusterName)) {
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
index 68df4dce393..2841677ec04 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
@@ -22,7 +22,7 @@ public class ClusterControllerClientTest {
strategyMock,
CLUSTER_NAME);
- final ClusterControllerState wantedState = ClusterControllerState.MAINTENANCE;
+ final ClusterControllerNodeState wantedState = ClusterControllerNodeState.MAINTENANCE;
clusterControllerClient.setNodeState(STORAGE_NODE_INDEX, wantedState);
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java
index cde644e7e8b..1081858812b 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java
@@ -64,7 +64,7 @@ public class SingleInstanceClusterControllerClientFactoryTest {
final List<HostName> clusterControllers = Arrays.asList(HOST_NAME_1);
clientFactory.createClient(clusterControllers, "clusterName")
- .setNodeState(0, ClusterControllerState.MAINTENANCE);
+ .setNodeState(0, ClusterControllerNodeState.MAINTENANCE);
verify(jaxRsClientFactory).createClient(
ClusterControllerJaxRsApi.class,
@@ -90,7 +90,7 @@ public class SingleInstanceClusterControllerClientFactoryTest {
final List<HostName> clusterControllers = Arrays.asList(HOST_NAME_1, HOST_NAME_2, HOST_NAME_3);
clientFactory.createClient(clusterControllers, "clusterName")
- .setNodeState(0, ClusterControllerState.MAINTENANCE);
+ .setNodeState(0, ClusterControllerNodeState.MAINTENANCE);
verify(jaxRsClientFactory).createClient(
eq(ClusterControllerJaxRsApi.class),
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
new file mode 100644
index 00000000000..e6e60900906
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
@@ -0,0 +1,113 @@
+package com.yahoo.vespa.orchestrator.policy;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ClusterId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.applicationmodel.TenantId;
+import com.yahoo.vespa.orchestrator.model.ClusterApi;
+import com.yahoo.vespa.orchestrator.model.NodeGroup;
+import com.yahoo.vespa.orchestrator.model.VespaModelUtil;
+import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.when;
+
+public class HostedVespaClusterPolicyTest {
+ private ClusterApi clusterApi = mock(ClusterApi.class);
+ private HostedVespaClusterPolicy policy = spy(new HostedVespaClusterPolicy());
+
+ @Test
+ public void testSlobrokSuspensionLimit() {
+ when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID);
+ when(clusterApi.serviceType()).thenReturn(VespaModelUtil.SLOBROK_SERVICE_TYPE);
+ assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE,
+ policy.getConcurrentSuspensionLimit(clusterApi));
+ }
+
+ @Test
+ public void testAdminSuspensionLimit() {
+ when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID);
+ when(clusterApi.serviceType()).thenReturn(new ServiceType("non-slobrok-service-type"));
+ assertEquals(ConcurrentSuspensionLimitForCluster.ALL_NODES,
+ policy.getConcurrentSuspensionLimit(clusterApi));
+ }
+
+ @Test
+ public void testStorageSuspensionLimit() {
+ when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id"));
+ when(clusterApi.isStorageCluster()).thenReturn(true);
+ assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE,
+ policy.getConcurrentSuspensionLimit(clusterApi));
+ }
+
+ @Test
+ public void testDefaultSuspensionLimit() {
+ when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id"));
+ when(clusterApi.isStorageCluster()).thenReturn(false);
+ assertEquals(ConcurrentSuspensionLimitForCluster.TEN_PERCENT,
+ policy.getConcurrentSuspensionLimit(clusterApi));
+ }
+
+ @Test
+ public void verifyGroupGoingDownIsFine_noServicesOutsideGroupIsDownIsFine() {
+ verifyGroupGoingDownIsFine(true, false, 13, true);
+ }
+
+ @Test
+ public void verifyGroupGoingDownIsFine_noServicesInGroupIsUp() {
+ verifyGroupGoingDownIsFine(false, true, 13, true);
+ }
+
+ @Test
+ public void verifyGroupGoingDownIsFine_percentageIsFine() {
+ verifyGroupGoingDownIsFine(false, false, 9, true);
+ }
+
+ @Test
+ public void verifyGroupGoingDownIsFine_fails() {
+ verifyGroupGoingDownIsFine(false, false, 13, false);
+ }
+
+ private void verifyGroupGoingDownIsFine(boolean noServicesOutsideGroupIsDown,
+ boolean noServicesInGroupIsUp,
+ int percentageOfServicesDownIfGroupIsAllowedToBeDown,
+ boolean expectSuccess) {
+ when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown);
+ when(clusterApi.noServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp);
+ when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20);
+ doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi);
+
+ when(clusterApi.serviceType()).thenReturn(new ServiceType("service-type"));
+ when(clusterApi.percentageOfServicesDown()).thenReturn(5);
+ when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(percentageOfServicesDownIfGroupIsAllowedToBeDown);
+ when(clusterApi.servicesDownAndNotInGroupDescription()).thenReturn("services-down-and-not-in-group");
+ when(clusterApi.nodesAllowedToBeDownNotInGroupDescription()).thenReturn("allowed-to-be-down");
+
+ NodeGroup nodeGroup = mock(NodeGroup.class);
+ when(clusterApi.getNodeGroup()).thenReturn(nodeGroup);
+ when(nodeGroup.toCommaSeparatedString()).thenReturn("node-group");
+
+ when(clusterApi.noServicesInGroupIsUp()).thenReturn(false);
+ try {
+ policy.verifyGroupGoingDownIsFine(clusterApi);
+ if (!expectSuccess) {
+ fail();
+ }
+ } catch (HostStateChangeDeniedException e) {
+ if (!expectSuccess) {
+ assertEquals("Changing the state of node-group would violate enough-services-up: "
+ + "Suspension percentage for service type service-type would increase from "
+ + "5% to 13%, over the limit of 10%. These instances may be down: "
+ + "services-down-and-not-in-group and these hosts are allowed to be down: "
+ + "allowed-to-be-down", e.getMessage());
+ assertEquals("enough-services-up", e.getConstraintName());
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java
index 5cba6375717..fe2d9a60acb 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java
@@ -2,53 +2,25 @@
package com.yahoo.vespa.orchestrator.policy;
-import com.yahoo.vespa.applicationmodel.ApplicationInstance;
-import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
-import com.yahoo.vespa.applicationmodel.ClusterId;
-import com.yahoo.vespa.applicationmodel.ConfigId;
import com.yahoo.vespa.applicationmodel.HostName;
-import com.yahoo.vespa.applicationmodel.ServiceCluster;
-import com.yahoo.vespa.applicationmodel.ServiceInstance;
-import com.yahoo.vespa.applicationmodel.ServiceType;
-import com.yahoo.vespa.applicationmodel.TenantId;
-import com.yahoo.vespa.orchestrator.TestUtil;
+import com.yahoo.vespa.orchestrator.OrchestrationException;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
-import com.yahoo.vespa.orchestrator.controller.ClusterControllerState;
-import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse;
+import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState;
import com.yahoo.vespa.orchestrator.model.ApplicationApi;
import com.yahoo.vespa.orchestrator.model.ClusterApi;
import com.yahoo.vespa.orchestrator.model.StorageNode;
-import com.yahoo.vespa.orchestrator.model.VespaModelUtil;
import com.yahoo.vespa.orchestrator.status.HostStatus;
-import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
-import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
import org.junit.Before;
import org.junit.Test;
import org.mockito.InOrder;
import java.util.Arrays;
-import java.util.HashSet;
import java.util.List;
-import java.util.Optional;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceClusterSet;
-import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceInstanceSet;
-import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.DOWN;
-import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.NOT_CHECKED;
-import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.UP;
-import static org.fest.assertions.Assertions.assertThat;
-import static org.junit.Assert.fail;
+
import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyInt;
-import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.never;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
/**
@@ -105,8 +77,8 @@ public class HostedVespaPolicyTest {
order.verify(clusterPolicy).verifyGroupGoingDownIsFine(clusterApi3);
order.verify(applicationApi).getUpStorageNodesInGroupInClusterOrder();
- order.verify(storageNode1).setNodeState(ClusterControllerState.MAINTENANCE);
- order.verify(storageNode3).setNodeState(ClusterControllerState.MAINTENANCE);
+ order.verify(storageNode1).setNodeState(ClusterControllerNodeState.MAINTENANCE);
+ order.verify(storageNode3).setNodeState(ClusterControllerNodeState.MAINTENANCE);
order.verify(applicationApi).getNodesInGroupWithStatus(HostStatus.NO_REMARKS);
order.verify(applicationApi).setHostState(hostName1, HostStatus.ALLOWED_TO_BE_DOWN);
@@ -116,689 +88,53 @@ public class HostedVespaPolicyTest {
order.verifyNoMoreInteractions();
}
- private static final TenantId TENANT_ID = new TenantId("tenantId");
- private static final ApplicationInstanceId APPLICATION_INSTANCE_ID = new ApplicationInstanceId("applicationId");
- private static final HostName HOST_NAME_1 = new HostName("host-1");
- private static final HostName HOST_NAME_2 = new HostName("host-2");
- private static final HostName HOST_NAME_3 = new HostName("host-3");
- private static final HostName HOST_NAME_4 = new HostName("host-4");
- private static final HostName HOST_NAME_5 = new HostName("host-5");
- private static final ServiceType SERVICE_TYPE_1 = new ServiceType("service-1");
- private static final ServiceType SERVICE_TYPE_2 = new ServiceType("service-2");
-
- // TODO: Replace HostedVespaClusterPolicy with ClusterPolicy mock
- private final HostedVespaPolicy policy =
- new HostedVespaPolicy(new HostedVespaClusterPolicy(), clientFactory);
-
- private final MutableStatusRegistry mutablestatusRegistry = mock(MutableStatusRegistry.class);
- {
- when(mutablestatusRegistry.getHostStatus(any())).thenReturn(HostStatus.NO_REMARKS);
- }
-
- @Test
- public void test_policy_everyone_agrees_everything_is_up() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, UP)
- .instance(HOST_NAME_3, UP))
- .build();
-
- policy.grantSuspensionRequest(
- applicationInstance,
- HOST_NAME_1,
- mutablestatusRegistry
- );
-
- verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_1, HostStatus.ALLOWED_TO_BE_DOWN);
- }
-
- private void grantWithAdminCluster(
- ServiceMonitorStatus statusParallelInstanceOtherHost,
- ServiceMonitorStatus statusInstanceOtherHost,
- ServiceType serviceType,
- boolean expectGranted) throws HostStateChangeDeniedException {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, statusInstanceOtherHost)
- .instance(HOST_NAME_3, UP))
- .addCluster(new ClusterBuilder(VespaModelUtil.ADMIN_CLUSTER_ID, serviceType)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, statusParallelInstanceOtherHost))
- .build();
-
- if (expectGranted) {
- policy.grantSuspensionRequest(
- applicationInstance,
- HOST_NAME_1,
- mutablestatusRegistry
- );
-
- verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_1, HostStatus.ALLOWED_TO_BE_DOWN);
- } else {
- try {
- policy.grantSuspensionRequest(
- applicationInstance,
- HOST_NAME_1,
- mutablestatusRegistry);
- fail();
- } catch (HostStateChangeDeniedException e) {
- // As expected.
- assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
- }
-
- verify(mutablestatusRegistry, never()).setHostState(any(), any());
- }
- }
-
- @Test
- public void test_parallel_cluster_down_is_ok() throws Exception {
- grantWithAdminCluster(DOWN, UP, new ServiceType("some-service-type"), true);
- }
-
- @Test
- public void test_slobrok_cluster_down_is_not_ok() throws Exception {
- grantWithAdminCluster(DOWN, UP, VespaModelUtil.SLOBROK_SERVICE_TYPE, false);
- }
-
- @Test
- public void test_other_cluster_instance_down_is_not_ok() throws Exception {
- grantWithAdminCluster(DOWN, DOWN, new ServiceType("some-service-type"), false);
- }
-
- @Test
- public void test_all_up_is_ok() throws Exception {
- grantWithAdminCluster(UP, UP, new ServiceType("some-service-type"), true);
- }
-
- @Test
- public void test_policy_other_host_allowed_to_be_down() {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, UP)
- .instance(HOST_NAME_3, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_5, UP))
- .build();
-
- when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
-
- try {
- policy.grantSuspensionRequest(
- applicationInstance,
- HOST_NAME_3,
- mutablestatusRegistry);
- fail();
- } catch (HostStateChangeDeniedException e) {
- // As expected.
- assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
- assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1);
- }
-
- verify(mutablestatusRegistry, never()).setHostState(any(), any());
- }
-
- @Test
- public void test_policy_this_host_allowed_to_be_down() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, UP)
- .instance(HOST_NAME_3, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_5, UP))
- .build();
-
- when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_3))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
-
- policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
-
- verify(mutablestatusRegistry, times(0)).setHostState(HOST_NAME_3, HostStatus.ALLOWED_TO_BE_DOWN);
- }
-
- @Test
- public void from_five_to_ten_percent_suspension() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, UP)
- .instance(HOST_NAME_3, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP))
- .build();
-
- when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
-
- policy.grantSuspensionRequest(
- applicationInstance,
- HOST_NAME_3,
- mutablestatusRegistry);
-
- verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_3, HostStatus.ALLOWED_TO_BE_DOWN);
- }
-
- @Test
- public void from_ten_to_fifteen_percent_suspension() {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, UP)
- .instance(HOST_NAME_2, UP)
- .instance(HOST_NAME_3, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP))
- .build();
-
- when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
-
- try {
- policy.grantSuspensionRequest(
- applicationInstance,
- HOST_NAME_3,
- mutablestatusRegistry);
- fail();
- } catch (HostStateChangeDeniedException e) {
- // As expected.
- assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
- assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1);
- }
-
- verify(mutablestatusRegistry, never()).setHostState(any(), any());
- }
-
- @Test
- public void from_five_to_fifteen_percent_suspension() {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, UP)
- .instance(HOST_NAME_3, UP)
- .instance(HOST_NAME_3, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP)
- .instance(HOST_NAME_5, UP))
- .build();
-
- when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
-
- try {
- policy.grantSuspensionRequest(
- applicationInstance,
- HOST_NAME_3,
- mutablestatusRegistry);
- fail();
- } catch (HostStateChangeDeniedException e) {
- // As expected.
- assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
- assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1);
- }
-
- verify(mutablestatusRegistry, never()).setHostState(any(), any());
- }
-
@Test
- public void test_policy_no_services() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder().build();
-
- HostName hostName = new HostName("test-hostname");
- policy.grantSuspensionRequest(
- applicationInstance,
- hostName,
- mutablestatusRegistry
- );
-
- verify(mutablestatusRegistry, times(1)).setHostState(hostName, HostStatus.ALLOWED_TO_BE_DOWN);
- }
-
- // The cluster name happens to be the cluster id of any of the content service clusters.
- private static final String CONTENT_CLUSTER_NAME = "content-cluster-id";
- private static final HostName CLUSTER_CONTROLLER_HOST = new HostName("controller-0");
- private static final HostName STORAGE_NODE_HOST = new HostName("storage-2");
- private static final int STORAGE_NODE_INDEX = 2;
-
- private static final ServiceCluster<ServiceMonitorStatus> CLUSTER_CONTROLLER_SERVICE_CLUSTER = new ServiceCluster<>(
- new ClusterId("cluster-0"),
- VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE,
- makeServiceInstanceSet(
- new ServiceInstance<>(
- TestUtil.clusterControllerConfigId(CONTENT_CLUSTER_NAME, 0),
- CLUSTER_CONTROLLER_HOST,
- UP)));
-
- private static final ServiceCluster<ServiceMonitorStatus> DISTRIBUTOR_SERVICE_CLUSTER = new ServiceCluster<>(
- new ClusterId(CONTENT_CLUSTER_NAME),
- VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE,
- makeServiceInstanceSet(
- new ServiceInstance<>(
- new ConfigId("distributor-id-1"),
- new HostName("distributor-1"),
- UP)));
-
- private static final ServiceCluster<ServiceMonitorStatus> STORAGE_SERVICE_CLUSTER = new ServiceCluster<>(
- new ClusterId(CONTENT_CLUSTER_NAME),
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
- makeServiceInstanceSet(
- new ServiceInstance<>(
- TestUtil.storageNodeConfigId(CONTENT_CLUSTER_NAME, STORAGE_NODE_INDEX),
- STORAGE_NODE_HOST,
- UP)));
-
- private static final ApplicationInstance<ServiceMonitorStatus> APPLICATION_INSTANCE =
- new ApplicationInstance<>(
- TENANT_ID,
- APPLICATION_INSTANCE_ID,
- makeServiceClusterSet(
- CLUSTER_CONTROLLER_SERVICE_CLUSTER,
- DISTRIBUTOR_SERVICE_CLUSTER,
- STORAGE_SERVICE_CLUSTER));
-
- // The grantSuspensionRequest() and releaseSuspensionGrant() functions happen to have similar signature,
- // which allows us to reuse test code for testing both functions. The actual call to one of these two functions
- // is encapsulated into the following functional interface.
- interface PolicyFunction {
- void grant(
- final HostedVespaPolicy policy,
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance,
- final HostName hostName,
- final MutableStatusRegistry hostStatusRegistry) throws HostStateChangeDeniedException;
- }
-
- /**
- * Since grantSuspensionRequest and releaseSuspensionGrant is quite similar, this test util contains the bulk
- * of the test code used to test their common functionality.
- *
- * @param grantFunction Encapsulates the grant function to call
- * @param currentHostStatus The current HostStatus of the host
- * @param expectedNodeStateSentToClusterController The NodeState the test expects to be sent to the controller,
- * or null if no CC request is expected to be sent.
- * @param expectedHostStateSetOnHostStatusService The HostState the test expects to be set on the host service.
- */
- private void testCommonGrantFunctionality(
- PolicyFunction grantFunction,
- ApplicationInstance<ServiceMonitorStatus> application,
- HostStatus currentHostStatus,
- Optional<ClusterControllerState> expectedNodeStateSentToClusterController,
- HostStatus expectedHostStateSetOnHostStatusService) throws Exception {
- // There is only one service running on the host, which is a storage node.
- // Therefore, the corresponding cluster controller will have to be contacted
- // to ask for permission.
-
- ClusterControllerStateResponse response = new ClusterControllerStateResponse(true, "ok");
- // MOTD: anyInt() MUST be used for an int field, otherwise a NullPointerException is thrown!
- // In general, special anyX() must be used for primitive fields.
- when(client.setNodeState(anyInt(), any())).thenReturn(response);
-
- when(mutablestatusRegistry.getHostStatus(any())).thenReturn(currentHostStatus);
-
- // Execution phase.
- grantFunction.grant(policy, application, STORAGE_NODE_HOST, mutablestatusRegistry);
-
- // Verification phase.
-
- if (expectedNodeStateSentToClusterController.isPresent()) {
- List<HostName> hostNames = CLUSTER_CONTROLLER_SERVICE_CLUSTER.serviceInstances().stream()
- .map(service -> service.hostName())
- .collect(Collectors.toList());
- verify(clientFactory, times(1))
- .createClient(
- hostNames,
- CONTENT_CLUSTER_NAME);
- verify(client, times(1))
- .setNodeState(
- STORAGE_NODE_INDEX,
- expectedNodeStateSentToClusterController.get());
- } else {
- verify(client, never()).setNodeState(anyInt(), any());
- }
-
- verify(mutablestatusRegistry, times(1))
- .setHostState(
- STORAGE_NODE_HOST,
- expectedHostStateSetOnHostStatusService);
- }
-
- @Test
- public void test_defer_to_controller() throws Exception {
- HostStatus currentHostStatus = HostStatus.NO_REMARKS;
- ClusterControllerState expectedNodeStateSentToClusterController = ClusterControllerState.MAINTENANCE;
- HostStatus expectedHostStateSetOnHostStatusService = HostStatus.ALLOWED_TO_BE_DOWN;
- testCommonGrantFunctionality(
- HostedVespaPolicy::grantSuspensionRequest,
- APPLICATION_INSTANCE,
- currentHostStatus,
- Optional.of(expectedNodeStateSentToClusterController),
- expectedHostStateSetOnHostStatusService);
- }
-
- @Test
- public void test_release_suspension_grant_gives_no_remarks() throws Exception {
- HostStatus currentHostStatus = HostStatus.ALLOWED_TO_BE_DOWN;
- ClusterControllerState expectedNodeStateSentToClusterController = ClusterControllerState.UP;
- HostStatus expectedHostStateSetOnHostStatusService = HostStatus.NO_REMARKS;
- testCommonGrantFunctionality(
- HostedVespaPolicy::releaseSuspensionGrant,
- APPLICATION_INSTANCE,
- currentHostStatus,
- Optional.of(expectedNodeStateSentToClusterController),
- expectedHostStateSetOnHostStatusService);
- }
-
- @Test
- public void okToSuspendHostWithNoConfiguredServices() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, DOWN)
- .instance(HOST_NAME_2, DOWN))
- .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
- .instance(HOST_NAME_4, DOWN)
- .instance(HOST_NAME_5, DOWN))
- .build();
-
- policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
- }
-
- @Test
- public void okToSuspendHostWithAllItsServicesDownEvenIfOthersAreDownToo() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, DOWN)
- .instance(HOST_NAME_3, DOWN))
- .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
- .instance(HOST_NAME_3, DOWN)
- .instance(HOST_NAME_4, DOWN)
- .instance(HOST_NAME_5, UP))
- .build();
-
- policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
- }
-
- @Test
- public void okToSuspendStorageNodeWhenStorageIsDown() throws Exception {
- ServiceMonitorStatus storageNodeStatus = DOWN;
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new StorageClusterBuilder()
- // DOWN storage service => ok to suspend and no cluster controller call
- .instance(STORAGE_NODE_HOST, DOWN, STORAGE_NODE_INDEX)
- .instance(HOST_NAME_2, DOWN, STORAGE_NODE_INDEX + 1)
- .instance(HOST_NAME_3, DOWN, STORAGE_NODE_INDEX + 2))
- .addCluster(CLUSTER_CONTROLLER_SERVICE_CLUSTER)
- .addCluster(DISTRIBUTOR_SERVICE_CLUSTER)
- // This service has one down service on another host, which should
- // not block us from suspension because STORAGE_NODE_HOST down too.
- .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
- .instance(STORAGE_NODE_HOST, DOWN)
- .instance(HOST_NAME_4, DOWN)
- .instance(HOST_NAME_5, UP))
- .build();
-
- HostStatus currentHostStatus = HostStatus.NO_REMARKS;
- Optional<ClusterControllerState> dontExpectAnyCallsToClusterController = Optional.empty();
- testCommonGrantFunctionality(
- HostedVespaPolicy::grantSuspensionRequest,
- applicationInstance,
- currentHostStatus,
- dontExpectAnyCallsToClusterController,
- HostStatus.ALLOWED_TO_BE_DOWN);
- }
-
- @Test
- public void denySuspendOfStorageIfOthersAreDown() throws Exception {
- // If the storage service is up, but other hosts' storage services are down,
- // we should be denied permission to suspend. This behavior is common for
- // storage service and non-storage service (they differ when it comes to
- // the cluster controller).
- ServiceMonitorStatus storageNodeStatus = UP;
-
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new StorageClusterBuilder()
- .instance(STORAGE_NODE_HOST, storageNodeStatus, STORAGE_NODE_INDEX)
- .instance(HOST_NAME_2, DOWN, STORAGE_NODE_INDEX + 1)
- .instance(HOST_NAME_3, DOWN, STORAGE_NODE_INDEX + 2))
- .addCluster(CLUSTER_CONTROLLER_SERVICE_CLUSTER)
- .addCluster(DISTRIBUTOR_SERVICE_CLUSTER)
- .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
- .instance(STORAGE_NODE_HOST, DOWN)
- .instance(HOST_NAME_4, DOWN)
- .instance(HOST_NAME_5, UP))
- .build();
-
- when(mutablestatusRegistry.getHostStatus(any())).thenReturn(HostStatus.NO_REMARKS);
-
- try {
- policy.grantSuspensionRequest(applicationInstance, STORAGE_NODE_HOST, mutablestatusRegistry);
- fail();
- } catch (HostStateChangeDeniedException e) {
- // As expected.
- assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
- assertThat(e.getServiceType()).isEqualTo(VespaModelUtil.STORAGENODE_SERVICE_TYPE);
- }
- }
-
- // In this test we verify the storage service cluster suspend policy of allowing at most 1
- // storage service to be effectively down. The normal policy (and the one used previously for storage)
- // is to allow 10%. Therefore, the test verifies we disallow suspending 2 hosts = 5% (some random number <10%).
- //
- // Since the Orchestrator doesn't allow suspending the host, the Orchestrator doesn't even bother calling the
- // Cluster Controller. The CC also has a policy of max 1, so it's just an optimization and safety guard.
- @Test
- public void dontBotherCallingClusterControllerIfOtherStorageNodesAreDown() throws Exception {
- StorageClusterBuilder clusterBuilder = new StorageClusterBuilder();
- for (int i = 0; i < 40; ++i) {
- clusterBuilder.instance(new HostName("host-" + i), UP, i);
- }
- ApplicationInstance<ServiceMonitorStatus> applicationInstance =
- new AppBuilder().addCluster(clusterBuilder).build();
-
- HostName host_1 = new HostName("host-1");
- when(mutablestatusRegistry.getHostStatus(eq(host_1))).thenReturn(HostStatus.NO_REMARKS);
-
- HostName host_2 = new HostName("host-2");
- when(mutablestatusRegistry.getHostStatus(eq(host_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
-
- try {
- policy.grantSuspensionRequest(applicationInstance, host_1, mutablestatusRegistry);
- fail();
- } catch (HostStateChangeDeniedException e) {
- // As expected.
- assertThat(e.getConstraintName())
- .isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
- assertThat(e.getServiceType()).isEqualTo(VespaModelUtil.STORAGENODE_SERVICE_TYPE);
- }
-
- verify(mutablestatusRegistry, never()).setHostState(any(), any());
- }
-
- @Test
- public void ownServiceInstanceDown() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, DOWN)
- .instance(HOST_NAME_3, DOWN))
- .build();
-
- policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
- }
-
- @Test
- public void ownServiceInstanceDown_otherServiceIsAllNotChecked() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, DOWN)
- .instance(HOST_NAME_3, DOWN))
- .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
- .instance(HOST_NAME_3, NOT_CHECKED)
- .instance(HOST_NAME_4, NOT_CHECKED)
- .instance(HOST_NAME_5, NOT_CHECKED))
- .build();
-
- policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
- }
-
- @Test
- public void ownServiceInstanceDown_otherServiceIsAllNotChecked_oneHostDown() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, DOWN)
- .instance(HOST_NAME_3, DOWN))
- .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
- .instance(HOST_NAME_3, NOT_CHECKED)
- .instance(HOST_NAME_4, NOT_CHECKED)
- .instance(HOST_NAME_5, NOT_CHECKED))
- .build();
-
- when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_4))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN);
- try {
- policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
- fail("Should not be allowed to set " + HOST_NAME_3 + " down when " + HOST_NAME_4 + " is already down.");
- } catch (HostStateChangeDeniedException e) {
- // As expected.
- assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT);
- assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_2);
- }
- }
+ public void testAcquirePermissionToRemove() throws OrchestrationException {
+ final HostedVespaClusterPolicy clusterPolicy = mock(HostedVespaClusterPolicy.class);
+ final HostedVespaPolicy policy = new HostedVespaPolicy(clusterPolicy, clientFactory);
+ final ApplicationApi applicationApi = mock(ApplicationApi.class);
+ when(applicationApi.applicationInfo()).thenReturn("tenant:app");
- @Test
- public void ownServiceInstanceDown_otherServiceIsAllUp() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, DOWN)
- .instance(HOST_NAME_3, DOWN))
- .addCluster(new ClusterBuilder(SERVICE_TYPE_2)
- .instance(HOST_NAME_3, UP)
- .instance(HOST_NAME_4, UP)
- .instance(HOST_NAME_5, UP))
- .build();
-
- policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
- }
+ ClusterApi clusterApi1 = mock(ClusterApi.class);
+ ClusterApi clusterApi2 = mock(ClusterApi.class);
+ ClusterApi clusterApi3 = mock(ClusterApi.class);
+ List<ClusterApi> clusterApis = Arrays.asList(clusterApi1, clusterApi2, clusterApi3);
+ when(applicationApi.getClusters()).thenReturn(clusterApis);
- @Test
- public void hostHasTwoInstances_oneDownOneUp() throws Exception {
- final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder()
- .addCluster(new ClusterBuilder(SERVICE_TYPE_1)
- .instance(HOST_NAME_1, UP)
- .instance(HOST_NAME_2, UP)
- .instance(HOST_NAME_3, UP)
- .instance(HOST_NAME_3, DOWN))
- .build();
-
- policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry);
- }
+ StorageNode storageNode1 = mock(StorageNode.class);
+ HostName hostName1 = new HostName("storage-1");
+ when(storageNode1.hostName()).thenReturn(hostName1);
- // Helper classes for terseness.
+ HostName hostName2 = new HostName("host-2");
- private static class AppBuilder {
- private final Set<ServiceCluster<ServiceMonitorStatus>> serviceClusters = new HashSet<>();
+ StorageNode storageNode3 = mock(StorageNode.class);
+ HostName hostName3 = new HostName("storage-3");
+ when(storageNode1.hostName()).thenReturn(hostName3);
- public AppBuilder addCluster(final ServiceCluster<ServiceMonitorStatus> cluster) {
- serviceClusters.add(cluster);
- return this;
- }
+ List<StorageNode> upStorageNodes = Arrays.asList(storageNode1, storageNode3);
+ when(applicationApi.getStorageNodesInGroupInClusterOrder()).thenReturn(upStorageNodes);
- public AppBuilder addCluster(final ClusterBuilder clusterBuilder) {
- serviceClusters.add(clusterBuilder.build());
- return this;
- }
+ List<HostName> noRemarksHostNames = Arrays.asList(hostName1, hostName2, hostName3);
+ when(applicationApi.getNodesInGroupWithStatus(HostStatus.NO_REMARKS)).thenReturn(noRemarksHostNames);
- public AppBuilder addCluster(final StorageClusterBuilder clusterBuilder) {
- serviceClusters.add(clusterBuilder.build());
- return this;
- }
+ InOrder order = inOrder(applicationApi, clusterPolicy, storageNode1, storageNode3);
- public ApplicationInstance<ServiceMonitorStatus> build() {
- return new ApplicationInstance<>(
- TENANT_ID,
- APPLICATION_INSTANCE_ID,
- serviceClusters);
- }
- }
+ policy.acquirePermissionToRemove(applicationApi);
- private static class ClusterBuilder {
- private final ServiceType serviceType;
- private final Set<ServiceInstance<ServiceMonitorStatus>> instances = new HashSet<>();
- private final ClusterId clusterId;
- private int instanceIndex = 0;
-
- public ClusterBuilder(final ClusterId clusterId, final ServiceType serviceType) {
- this.clusterId = clusterId;
- this.serviceType = serviceType;
- }
-
- public ClusterBuilder(final ServiceType serviceType) {
- this.clusterId = new ClusterId("clusterId");
- this.serviceType = serviceType;
- }
-
- public ClusterBuilder instance(final HostName hostName, final ServiceMonitorStatus status) {
- instances.add(new ServiceInstance<>(new ConfigId("configId-" + instanceIndex), hostName, status));
- ++instanceIndex;
- return this;
- }
-
- public ServiceCluster<ServiceMonitorStatus> build() {
- return new ServiceCluster<>(clusterId, serviceType, instances);
- }
- }
+ order.verify(applicationApi).getClusters();
+ order.verify(clusterPolicy).verifyGroupGoingDownPermanentlyIsFine(clusterApi1);
+ order.verify(clusterPolicy).verifyGroupGoingDownPermanentlyIsFine(clusterApi2);
+ order.verify(clusterPolicy).verifyGroupGoingDownPermanentlyIsFine(clusterApi3);
- private static class StorageClusterBuilder {
- private final Set<ServiceInstance<ServiceMonitorStatus>> instances = new HashSet<>();
+ order.verify(applicationApi).getStorageNodesInGroupInClusterOrder();
+ order.verify(storageNode1).setNodeState(ClusterControllerNodeState.DOWN);
+ order.verify(storageNode3).setNodeState(ClusterControllerNodeState.DOWN);
- public StorageClusterBuilder instance(final HostName hostName, final ServiceMonitorStatus status, int index) {
- instances.add(new ServiceInstance<>(TestUtil.storageNodeConfigId("content", index), hostName, status));
- return this;
- }
+ order.verify(applicationApi).getNodesInGroupWithStatus(HostStatus.NO_REMARKS);
+ order.verify(applicationApi).setHostState(hostName1, HostStatus.ALLOWED_TO_BE_DOWN);
+ order.verify(applicationApi).setHostState(hostName2, HostStatus.ALLOWED_TO_BE_DOWN);
+ order.verify(applicationApi).setHostState(hostName3, HostStatus.ALLOWED_TO_BE_DOWN);
- public ServiceCluster<ServiceMonitorStatus> build() {
- return new ServiceCluster<>(new ClusterId(CONTENT_CLUSTER_NAME), VespaModelUtil.STORAGENODE_SERVICE_TYPE, instances);
- }
+ order.verifyNoMoreInteractions();
}
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
index 793b73ec5d8..3d3117f9e07 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
@@ -5,7 +5,6 @@ import com.yahoo.vespa.applicationmodel.ApplicationInstance;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
import com.yahoo.vespa.applicationmodel.HostName;
-import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.orchestrator.InstanceLookupService;
import com.yahoo.vespa.orchestrator.OrchestratorImpl;
@@ -104,6 +103,10 @@ public class HostResourceTest {
}
@Override
+ public void acquirePermissionToRemove(ApplicationApi applicationApi) throws HostStateChangeDeniedException {
+ }
+
+ @Override
public void releaseSuspensionGrant(
ApplicationInstance<ServiceMonitorStatus> applicationInstance,
HostName hostName,
@@ -203,6 +206,11 @@ public class HostResourceTest {
}
@Override
+ public void acquirePermissionToRemove(ApplicationApi applicationApi) throws HostStateChangeDeniedException {
+ doThrow();
+ }
+
+ @Override
public void releaseSuspensionGrant(
ApplicationInstance<ServiceMonitorStatus> applicationInstance,
HostName hostName,
@@ -214,7 +222,6 @@ public class HostResourceTest {
throw new HostStateChangeDeniedException(
new HostName("some-host"),
"impossible-policy",
- new ServiceType("silly-service"),
"This policy rejects all requests");
}
}