diff options
30 files changed, 529 insertions, 788 deletions
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImplTest.java index c618b5c4648..660b86fb4ed 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImplTest.java @@ -48,7 +48,7 @@ public class OrchestratorImplTest { OrchestratorImpl.WEB_SERVICE_PORT, Optional.empty(), UpdateHostResponse.class - )).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "service", "fail"))); + )).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "fail"))); orchestrator.suspend(hostName); } @@ -95,7 +95,7 @@ public class OrchestratorImplTest { OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended", OrchestratorImpl.WEB_SERVICE_PORT, UpdateHostResponse.class - )).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "service", "fail"))); + )).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "fail"))); orchestrator.resume(hostName); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index c5aa78fefad..d35a9c158b7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -40,6 +40,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final ReservationExpirer reservationExpirer; private final InactiveExpirer inactiveExpirer; private final RetiredExpirer retiredExpirer; + private final RetiredEarlyExpirer retiredEarlyExpirer; private final FailedExpirer failedExpirer; private final DirtyExpirer dirtyExpirer; private final NodeRebooter nodeRebooter; @@ -66,6 +67,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { zooKeeperAccessMaintainer = new ZooKeeperAccessMaintainer(nodeRepository, curator, durationFromEnv("zookeeper_access_maintenance_interval").orElse(defaults.zooKeeperAccessMaintenanceInterval), jobControl); reservationExpirer = new ReservationExpirer(nodeRepository, clock, durationFromEnv("reservation_expiry").orElse(defaults.reservationExpiry), jobControl); retiredExpirer = new RetiredExpirer(nodeRepository, deployer, clock, durationFromEnv("retired_expiry").orElse(defaults.retiredExpiry), jobControl); + retiredEarlyExpirer = new RetiredEarlyExpirer(nodeRepository, zone, durationFromEnv("retired_early_interval").orElse(defaults.retiredEarlyInterval), jobControl, deployer, orchestrator); inactiveExpirer = new InactiveExpirer(nodeRepository, clock, durationFromEnv("inactive_expiry").orElse(defaults.inactiveExpiry), jobControl); failedExpirer = new FailedExpirer(nodeRepository, zone, clock, durationFromEnv("failed_expiry").orElse(defaults.failedExpiry), jobControl); dirtyExpirer = new DirtyExpirer(nodeRepository, clock, durationFromEnv("dirty_expiry").orElse(defaults.dirtyExpiry), jobControl); @@ -91,6 +93,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { reservationExpirer.deconstruct(); inactiveExpirer.deconstruct(); retiredExpirer.deconstruct(); + retiredEarlyExpirer.deconstruct(); failedExpirer.deconstruct(); dirtyExpirer.deconstruct(); nodeRebooter.deconstruct(); @@ -135,6 +138,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration rebootInterval; private final Duration nodeRetirerInterval; private final Duration metricsInterval; + private final Duration retiredEarlyInterval; private final NodeFailer.ThrottlePolicy throttlePolicy; @@ -149,6 +153,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { reservationExpiry = Duration.ofMinutes(20); // same as deployment timeout inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy retiredExpiry = Duration.ofDays(4); // enough time to migrate data + retiredEarlyInterval = Duration.ofMinutes(29); failedExpiry = Duration.ofDays(4); // enough time to recover data even if it happens friday night dirtyExpiry = Duration.ofHours(2); // enough time to clean the node rebootInterval = Duration.ofDays(30); @@ -165,6 +170,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { reservationExpiry = Duration.ofMinutes(10); // Need to be long enough for deployment to be finished for all config model versions inactiveExpiry = Duration.ofSeconds(2); // support interactive wipe start over retiredExpiry = Duration.ofMinutes(1); + retiredEarlyInterval = Duration.ofMinutes(5); failedExpiry = Duration.ofMinutes(10); dirtyExpiry = Duration.ofMinutes(30); rebootInterval = Duration.ofDays(30); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java new file mode 100644 index 00000000000..f936c1e06ba --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.collections.ListMap; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Deployer; +import com.yahoo.config.provision.Deployment; +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.RegionName; +import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.Zone; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.orchestrator.OrchestrationException; +import com.yahoo.vespa.orchestrator.Orchestrator; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.logging.Level; +import java.util.stream.Collectors; + +public class RetiredEarlyExpirer extends Maintainer { + private final Deployer deployer; + private final Orchestrator orchestrator; + + public RetiredEarlyExpirer(NodeRepository nodeRepository, + Zone zone, + Duration interval, + JobControl jobControl, + Deployer deployer, + Orchestrator orchestrator) { + super(nodeRepository, interval, jobControl); + this.deployer = deployer; + this.orchestrator = orchestrator; + + List<Zone> applies = Arrays.asList(new Zone(SystemName.cd, Environment.dev, RegionName.from("cd-us-central-1"))); + if (!applies.contains(zone)) { + String targetZones = applies.stream().map(Zone::toString).collect(Collectors.joining(", ")); + log.info(RetiredEarlyExpirer.class.getName() + " only runs in " + targetZones + ", stopping."); + deconstruct(); + } + } + + @Override + protected void maintain() { + List<Node> activeNodes = nodeRepository().getNodes(Node.State.active); + + ListMap<ApplicationId, Node> retiredNodesByApplication = new ListMap<>(); + for (Node node : activeNodes) { + if (node.allocation().isPresent() && node.allocation().get().membership().retired()) { + retiredNodesByApplication.put(node.allocation().get().owner(), node); + } + } + + for (Map.Entry<ApplicationId, List<Node>> entry : retiredNodesByApplication.entrySet()) { + ApplicationId application = entry.getKey(); + List<Node> retiredNodes = entry.getValue(); + + try { + Optional<Deployment> deployment = deployer.deployFromLocalActive(application, Duration.ofMinutes(30)); + if ( ! deployment.isPresent()) continue; // this will be done at another config server + + List<Node> nodesToRemove = new ArrayList<>(); + for (Node node : retiredNodes) { + if (nodeCanBeRemoved(node)) { + nodesToRemove.add(node); + } + } + + nodeRepository().setRemovable(application, nodesToRemove); + + deployment.get().activate(); + + String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", ")); + log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList); + } catch (RuntimeException e) { + String nodeList = retiredNodes.stream().map(Node::hostname).collect(Collectors.joining(", ")); + log.log(Level.WARNING, "Exception trying to deactivate retired nodes from " + application + + ": " + nodeList, e); + } + } + } + + boolean nodeCanBeRemoved(Node node) { + try { + orchestrator.acquirePermissionToRemove(new HostName(node.hostname())); + return true; + } catch (OrchestrationException e) { + log.info("Did not get permission to remove retired node " + node + ": " + e.getMessage()); + return false; + } + } +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java index b29b5ee813e..538feeb042d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java @@ -7,6 +7,7 @@ import com.yahoo.vespa.orchestrator.ApplicationStateChangeDeniedException; import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException; import com.yahoo.vespa.orchestrator.BatchInternalErrorException; import com.yahoo.vespa.orchestrator.HostNameNotFoundException; +import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.model.NodeGroup; import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; @@ -62,6 +63,9 @@ public class OrchestratorMock implements Orchestrator { } @Override + public void acquirePermissionToRemove(HostName hostName) throws OrchestrationException {} + + @Override public void suspendAll(HostName parentHostname, List<HostName> hostNames) throws BatchInternalErrorException, BatchHostStateChangeDeniedException, BatchHostNameNotFoundException { throw new UnsupportedOperationException("Not implemented"); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java index f034490b3f7..9bfeccb9a5d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java @@ -9,6 +9,7 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.InstanceName; +import com.yahoo.config.provision.NodeFlavors; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.TenantName; @@ -20,11 +21,12 @@ import com.yahoo.vespa.curator.mock.MockCurator; import com.yahoo.vespa.curator.transaction.CuratorTransaction; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.config.provision.NodeFlavors; import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner; import com.yahoo.vespa.hosted.provision.testutils.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver; +import com.yahoo.vespa.orchestrator.OrchestrationException; +import com.yahoo.vespa.orchestrator.Orchestrator; import org.junit.Test; import java.time.Duration; @@ -35,6 +37,11 @@ import java.util.Optional; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; /** * @author bratseth @@ -58,7 +65,7 @@ public class RetiredExpirerTest { ApplicationId applicationId = ApplicationId.from(TenantName.from("foo"), ApplicationName.from("bar"), InstanceName.from("fuz")); // Allocate content cluster of sizes 7 -> 2 -> 3: - // Should end up with 3 nodes in the cluster (one previously retired), and 3 retired + // Should end up with 3 nodes in the cluster (one previously retired), and 4 retired ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Version.fromString("6.42")); int wantedNodes; activate(applicationId, cluster, wantedNodes=7, 1, provisioner); @@ -117,6 +124,64 @@ public class RetiredExpirerTest { assertFalse(node.allocation().get().membership().retired()); } + @Test + public void ensure_early_inactivation() throws OrchestrationException { + ManualClock clock = new ManualClock(); + Zone zone = new Zone(Environment.prod, RegionName.from("us-east")); + NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default"); + NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone, + new MockNameResolver().mockAnyLookup()); + NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone); + + createReadyNodes(7, nodeRepository, nodeFlavors); + createHostNodes(4, nodeRepository, nodeFlavors); + + ApplicationId applicationId = ApplicationId.from(TenantName.from("foo"), ApplicationName.from("bar"), InstanceName.from("fuz")); + + // Allocate content cluster of sizes 7 -> 2 -> 3: + // Should end up with 3 nodes in the cluster (one previously retired), and 4 retired + ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Version.fromString("6.42")); + int wantedNodes; + activate(applicationId, cluster, wantedNodes=7, 1, provisioner); + activate(applicationId, cluster, wantedNodes=2, 1, provisioner); + activate(applicationId, cluster, wantedNodes=3, 1, provisioner); + assertEquals(7, nodeRepository.getNodes(applicationId, Node.State.active).size()); + assertEquals(0, nodeRepository.getNodes(applicationId, Node.State.inactive).size()); + + // Cause inactivation of retired nodes + clock.advance(Duration.ofHours(30)); // Retire period spent + MockDeployer deployer = + new MockDeployer(provisioner, + Collections.singletonMap( + applicationId, + new MockDeployer.ApplicationContext(applicationId, cluster, Capacity.fromNodeCount(wantedNodes, Optional.of("default")), 1))); + + Orchestrator orchestrator = mock(Orchestrator.class); + // Allow the 1st and 3rd retired nodes permission to inactivate + doNothing() + .doThrow(new OrchestrationException("Permission not granted 1")) + .doNothing() + .doThrow(new OrchestrationException("Permission not granted 2")) + .when(orchestrator).acquirePermissionToRemove(any()); + + new RetiredEarlyExpirer( + nodeRepository, + zone, + Duration.ofDays(30), + new JobControl(nodeRepository.database()), + deployer, + orchestrator).run(); + assertEquals(5, nodeRepository.getNodes(applicationId, Node.State.active).size()); + assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.inactive).size()); + assertEquals(1, deployer.redeployments); + + verify(orchestrator, times(4)).acquirePermissionToRemove(any()); + + // inactivated nodes are not retired + for (Node node : nodeRepository.getNodes(applicationId, Node.State.inactive)) + assertFalse(node.allocation().get().membership().retired()); + } + private void activate(ApplicationId applicationId, ClusterSpec cluster, int nodes, int groups, NodeRepositoryProvisioner provisioner) { List<HostSpec> hosts = provisioner.prepare(applicationId, cluster, Capacity.fromNodeCount(nodes), groups, null); NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json index a7a2fe6b677..fea4fb8d4d2 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json @@ -28,6 +28,9 @@ "name":"OperatorChangeApplicationMaintainer" }, { + "name":"RetiredEarlyExpirer" + }, + { "name":"MetricsReporter" }, { diff --git a/orchestrator-restapi/src/main/java/com/yahoo/vespa/orchestrator/restapi/wire/HostStateChangeDenialReason.java b/orchestrator-restapi/src/main/java/com/yahoo/vespa/orchestrator/restapi/wire/HostStateChangeDenialReason.java index 37366e6efd3..8345455d68a 100644 --- a/orchestrator-restapi/src/main/java/com/yahoo/vespa/orchestrator/restapi/wire/HostStateChangeDenialReason.java +++ b/orchestrator-restapi/src/main/java/com/yahoo/vespa/orchestrator/restapi/wire/HostStateChangeDenialReason.java @@ -13,20 +13,16 @@ import java.util.Objects; public class HostStateChangeDenialReason { public static final String FIELD_NAME_CONSTRAINT = "constraint"; - public static final String FIELD_NAME_SERVICE_TYPE = "service-type"; public static final String FIELD_NAME_MESSAGE = "message"; private final String constraintName; - private final String serviceType; private final String message; @JsonCreator public HostStateChangeDenialReason( @JsonProperty(FIELD_NAME_CONSTRAINT) final String constraintName, - @JsonProperty(FIELD_NAME_SERVICE_TYPE) final String serviceType, @JsonProperty(FIELD_NAME_MESSAGE) final String message) { this.constraintName = constraintName; - this.serviceType = serviceType; this.message = message; } @@ -35,11 +31,6 @@ public class HostStateChangeDenialReason { return constraintName; } - @JsonProperty(FIELD_NAME_SERVICE_TYPE) - public String serviceType() { - return serviceType; - } - @JsonProperty(FIELD_NAME_MESSAGE) public String message() { return message; @@ -53,7 +44,6 @@ public class HostStateChangeDenialReason { final HostStateChangeDenialReason other = (HostStateChangeDenialReason) o; if (!Objects.equals(this.constraintName, other.constraintName)) return false; - if (!Objects.equals(this.serviceType, other.serviceType)) return false; if (!Objects.equals(this.message, other.message)) return false; return true; @@ -61,6 +51,6 @@ public class HostStateChangeDenialReason { @Override public int hashCode() { - return Objects.hash(constraintName, serviceType, message); + return Objects.hash(constraintName, message); } } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java index e3451ad9576..1a1ffe13b37 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/Orchestrator.java @@ -60,6 +60,12 @@ public interface Orchestrator { void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException; /** + * Acquire permission to remove a node permanently from the application, or otherwise throw + * {@link OrchestrationException}. + */ + void acquirePermissionToRemove(HostName hostName) throws OrchestrationException; + + /** * Suspend normal operations for a group of nodes in the same application. * * @param nodeGroup The group of nodes in an application. diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java index 72b644be464..5cd5f0ab638 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java @@ -11,7 +11,7 @@ import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceCluster; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; -import com.yahoo.vespa.orchestrator.controller.ClusterControllerState; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState; import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse; import com.yahoo.vespa.orchestrator.model.ApplicationApi; import com.yahoo.vespa.orchestrator.model.ApplicationApiImpl; @@ -124,6 +124,21 @@ public class OrchestratorImpl implements Orchestrator { suspendGroup(nodeGroup); } + @Override + public void acquirePermissionToRemove(HostName hostName) throws OrchestrationException { + ApplicationInstance<ServiceMonitorStatus> appInstance = getApplicationInstance(hostName); + NodeGroup nodeGroup = new NodeGroup(appInstance, hostName); + + try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(appInstance.reference())) { + ApplicationApi applicationApi = new ApplicationApiImpl( + nodeGroup, + statusRegistry, + clusterControllerClientFactory); + + policy.acquirePermissionToRemove(applicationApi); + } + } + // Public for testing purposes @Override public void suspendGroup(NodeGroup nodeGroup) throws HostStateChangeDeniedException, HostNameNotFoundException { @@ -291,7 +306,7 @@ public class OrchestratorImpl implements Orchestrator { // If the clustercontroller throws an error the nodes will be marked as allowed to be down // and be set back up on next resume invocation. - setClusterStateInController(application, ClusterControllerState.MAINTENANCE); + setClusterStateInController(application, ClusterControllerNodeState.MAINTENANCE); } statusRegistry.setApplicationInstanceStatus(status); @@ -299,7 +314,7 @@ public class OrchestratorImpl implements Orchestrator { } private void setClusterStateInController(ApplicationInstance<ServiceMonitorStatus> application, - ClusterControllerState state) + ClusterControllerNodeState state) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException { // Get all content clusters for this application Set<ClusterId> contentClusterIds = application.serviceClusters().stream() diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java index 9de7bfab627..b51ecfd1331 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClient.java @@ -13,13 +13,13 @@ public interface ClusterControllerClient { * * @throws IOException if there was a problem communicating with the cluster controller */ - ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerState wantedState) throws IOException; + ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerNodeState wantedState) throws IOException; /** * Requests that a cluster controller sets all nodes in the cluster to the requested state. * * @throws IOException if there was a problem communicating with the cluster controller */ - ClusterControllerStateResponse setApplicationState(ClusterControllerState wantedState) throws IOException; + ClusterControllerStateResponse setApplicationState(ClusterControllerNodeState wantedState) throws IOException; } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java index 01ae6dadfc7..87fb22b882c 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java @@ -29,7 +29,7 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ * @throws IOException if there was a problem communicating with the cluster controller */ @Override - public ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerState wantedState) throws IOException { + public ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerNodeState wantedState) throws IOException { ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON); ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.SAFE); @@ -52,7 +52,7 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ * @throws IOException if there was a problem communicating with the cluster controller */ @Override - public ClusterControllerStateResponse setApplicationState(final ClusterControllerState wantedState) throws IOException { + public ClusterControllerStateResponse setApplicationState(final ClusterControllerNodeState wantedState) throws IOException { final ClusterControllerStateRequest.State state = new ClusterControllerStateRequest.State(wantedState, REQUEST_REASON); final ClusterControllerStateRequest stateRequest = new ClusterControllerStateRequest(state, ClusterControllerStateRequest.Condition.FORCE); diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerNodeState.java index e5db0ec9795..5886a44af02 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerState.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerNodeState.java @@ -11,13 +11,14 @@ import com.fasterxml.jackson.annotation.JsonValue; * * @author hakonhall */ -public enum ClusterControllerState { +public enum ClusterControllerNodeState { MAINTENANCE("maintenance"), - UP("up"); + UP("up"), + DOWN("down"); private final String wireName; - ClusterControllerState(final String wireName) { + ClusterControllerNodeState(final String wireName) { this.wireName = wireName; } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java index 2ea489337d8..c3b4498a52d 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerStateRequest.java @@ -50,7 +50,7 @@ public class ClusterControllerStateRequest { public static class State { @JsonProperty("state") - public final ClusterControllerState state; + public final ClusterControllerNodeState state; /** * The reason the client is making the request to set the node state. @@ -59,7 +59,7 @@ public class ClusterControllerStateRequest { @JsonProperty("reason") public final String reason; - public State(ClusterControllerState state, String reason) { + public State(ClusterControllerNodeState state, String reason) { this.state = state; this.reason = reason; } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java index 0ac602b106e..62c2bb372e3 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.orchestrator.model; import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; import com.yahoo.vespa.orchestrator.status.HostStatus; import java.util.List; @@ -12,11 +13,19 @@ import java.util.List; public interface ApplicationApi { String applicationInfo(); + /** + * The policy acts on some subset of nodes in the application. + */ + NodeGroup getNodeGroup(); + List<ClusterApi> getClusters(); + ApplicationInstanceStatus getApplicationStatus(); + void setHostState(HostName hostName, HostStatus status); List<HostName> getNodesInGroupWithStatus(HostStatus status); + List<StorageNode> getStorageNodesInGroupInClusterOrder(); List<StorageNode> getUpStorageNodesInGroupInClusterOrder(); List<StorageNode> getStorageNodesAllowedToBeDownInGroupInReverseClusterOrder(); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java index 5f84c80a051..889455044b1 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java @@ -6,6 +6,7 @@ import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceCluster; import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; +import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; import com.yahoo.vespa.orchestrator.status.HostStatus; import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; import com.yahoo.vespa.orchestrator.status.ReadOnlyStatusRegistry; @@ -67,12 +68,23 @@ public class ApplicationApiImpl implements ApplicationApi { @Override public List<StorageNode> getStorageNodesAllowedToBeDownInGroupInReverseClusterOrder() { + return getStorageNodesInGroupInClusterOrder().stream() + .filter(storageNode -> getHostStatus(storageNode.hostName()) == HostStatus.ALLOWED_TO_BE_DOWN) + .sorted(Comparator.reverseOrder()) + .collect(Collectors.toList()); + } + + @Override + public NodeGroup getNodeGroup() { + return nodeGroup; + } + + @Override + public List<StorageNode> getStorageNodesInGroupInClusterOrder() { return clusterInOrder.stream() .map(ClusterApi::storageNodeInGroup) .filter(Optional::isPresent) .map(Optional::get) - .filter(storageNode -> getHostStatus(storageNode.hostName()) == HostStatus.ALLOWED_TO_BE_DOWN) - .sorted(Comparator.reverseOrder()) .collect(Collectors.toList()); } @@ -86,6 +98,11 @@ public class ApplicationApiImpl implements ApplicationApi { } @Override + public ApplicationInstanceStatus getApplicationStatus() { + return hostStatusService.getApplicationInstanceStatus(); + } + + @Override public void setHostState(HostName hostName, HostStatus status) { hostStatusService.setHostState(hostName, status); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNode.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNode.java index 1698770904f..ffd3cf5b39e 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNode.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNode.java @@ -2,10 +2,10 @@ package com.yahoo.vespa.orchestrator.model; import com.yahoo.vespa.applicationmodel.HostName; -import com.yahoo.vespa.orchestrator.controller.ClusterControllerState; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState; import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; public interface StorageNode extends Comparable<StorageNode> { HostName hostName(); - void setNodeState(ClusterControllerState wantedState) throws HostStateChangeDeniedException; + void setNodeState(ClusterControllerNodeState wantedState) throws HostStateChangeDeniedException; } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNodeImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNodeImpl.java index 1984c5cca40..879c7b8f0e8 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNodeImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/StorageNodeImpl.java @@ -9,7 +9,7 @@ import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; -import com.yahoo.vespa.orchestrator.controller.ClusterControllerState; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState; import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse; import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy; @@ -44,7 +44,7 @@ public class StorageNodeImpl implements StorageNode { } @Override - public void setNodeState(ClusterControllerState wantedNodeState) + public void setNodeState(ClusterControllerNodeState wantedNodeState) throws HostStateChangeDeniedException { // The "cluster name" used by the Cluster Controller IS the cluster ID. String clusterId = this.clusterId.s(); @@ -72,7 +72,6 @@ public class StorageNodeImpl implements StorageNode { throw new HostStateChangeDeniedException( hostName(), HostedVespaPolicy.CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT, - VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE, "Failed to communicate with cluster controllers " + clusterControllers + ": " + e, e); } @@ -81,8 +80,7 @@ public class StorageNodeImpl implements StorageNode { throw new HostStateChangeDeniedException( hostName(), HostedVespaPolicy.SET_NODE_STATE_CONSTRAINT, - VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE, - "Failed to set state to " + wantedNodeState + " in controller: " + response.reason); + "Failed to set state to " + wantedNodeState + " in cluster controller: " + response.reason); } } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java index d0d7cd83e60..98062640ab0 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java @@ -7,8 +7,14 @@ public interface ClusterPolicy { /** * There's an implicit group of nodes known to clusterApi. This method answers whether * it would be fine, just looking at this cluster (and disregarding Cluster Controller/storage - * which is handled separately), whether it would be fine to allow all services on all the nodes - * in the group to go down. + * which is handled separately), to allow all services on all the nodes in the group to go down. */ void verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException; + + /** + * There's an implicit group of nodes known to clusterApi. This method answers whether + * it would be fine, just looking at this cluster, to allow all services on all the nodes + * in the group to go down PERMANENTLY (removed from application). + */ + void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException; } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java index 9bd584d82af..6062fef7189 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostStateChangeDeniedException.java @@ -3,37 +3,33 @@ package com.yahoo.vespa.orchestrator.policy; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceType; -import com.yahoo.vespa.orchestrator.model.NodeGroup; import com.yahoo.vespa.orchestrator.OrchestrationException; +import com.yahoo.vespa.orchestrator.model.NodeGroup; + +import java.util.Optional; /** * @author bakksjo */ public class HostStateChangeDeniedException extends OrchestrationException { - private final String constraintName; - private final ServiceType serviceType; + private final Optional<ServiceType> serviceType; - public HostStateChangeDeniedException(HostName hostName, String constraintName, - ServiceType serviceType, String message) { - this(hostName, constraintName, serviceType, message, null); + public HostStateChangeDeniedException(HostName hostName, String constraintName, String message) { + this(hostName, constraintName, message, null); } - public HostStateChangeDeniedException(HostName hostName, String constraintName, - ServiceType serviceType, String message, Throwable cause) { - this(hostName.toString(), constraintName, serviceType, message, cause); + public HostStateChangeDeniedException(HostName hostName, String constraintName, String message, Exception e) { + this(hostName.s(), constraintName, Optional.empty(), message, e); } - public HostStateChangeDeniedException(NodeGroup nodeGroup, - String constraintName, - ServiceType serviceType, - String message) { - this(nodeGroup.toCommaSeparatedString(), constraintName, serviceType, message, null); + public HostStateChangeDeniedException(NodeGroup nodeGroup, String constraintName, String message) { + this(nodeGroup.toCommaSeparatedString(), constraintName, Optional.empty(), message, null); } private HostStateChangeDeniedException(String nodes, String constraintName, - ServiceType serviceType, + Optional<ServiceType> serviceType, String message, Throwable cause) { super(createMessage(nodes, constraintName, serviceType, message), cause); @@ -43,18 +39,18 @@ public class HostStateChangeDeniedException extends OrchestrationException { private static String createMessage(String nodes, String constraintName, - ServiceType serviceType, + Optional<ServiceType> serviceType, String message) { return "Changing the state of " + nodes + " would violate " + constraintName - + " for service type " + serviceType + ": " + message; + + (serviceType.isPresent() ? " for service type " + serviceType.get() : "") + + ": " + message; } public String getConstraintName() { return constraintName; } - public ServiceType getServiceType() { + public Optional<ServiceType> getServiceType() { return serviceType; } - } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java index eb2fb9dc1b9..a081236f4bc 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java @@ -7,6 +7,7 @@ import com.yahoo.vespa.orchestrator.model.ClusterApi; import static com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT; public class HostedVespaClusterPolicy implements ClusterPolicy { + @Override public void verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { if (clusterApi.noServicesOutsideGroupIsDown()) { @@ -25,8 +26,8 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { throw new HostStateChangeDeniedException( clusterApi.getNodeGroup(), ENOUGH_SERVICES_UP_CONSTRAINT, - clusterApi.serviceType(), - "Suspension percentage would increase from " + clusterApi.percentageOfServicesDown() + "Suspension percentage for service type " + clusterApi.serviceType() + + " would increase from " + clusterApi.percentageOfServicesDown() + "% to " + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() + "%, over the limit of " + percentageOfServicesAllowedToBeDown + "%." + " These instances may be down: " + clusterApi.servicesDownAndNotInGroupDescription() @@ -34,7 +35,35 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { + clusterApi.nodesAllowedToBeDownNotInGroupDescription()); } - static ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi) { + @Override + public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) + throws HostStateChangeDeniedException { + // This policy is similar to verifyGroupGoingDownIsFine, except that services being down in the group + // is no excuse to allow suspension (like it is for verifyGroupGoingDownIsFine), since if we grant + // suspension in this case they will permanently be down/removed. + + if (clusterApi.noServicesOutsideGroupIsDown()) { + return; + } + + int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage(); + if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) { + return; + } + + throw new HostStateChangeDeniedException( + clusterApi.getNodeGroup(), + ENOUGH_SERVICES_UP_CONSTRAINT, + "Down percentage for service type " + clusterApi.serviceType() + + " would increase to " + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() + + "%, over the limit of " + percentageOfServicesAllowedToBeDown + "%." + + " These instances may be down: " + clusterApi.servicesDownAndNotInGroupDescription() + + " and these hosts are allowed to be down: " + + clusterApi.nodesAllowedToBeDownNotInGroupDescription()); + } + + // Non-private for testing purposes + ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi) { if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) { if (VespaModelUtil.SLOBROK_SERVICE_TYPE.equals(clusterApi.serviceType())) { return ConcurrentSuspensionLimitForCluster.ONE_NODE; diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java index 515a46cb1da..ac1587ce7de 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java @@ -5,12 +5,13 @@ import com.yahoo.log.LogLevel; import com.yahoo.vespa.applicationmodel.ApplicationInstance; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; -import com.yahoo.vespa.orchestrator.controller.ClusterControllerState; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState; import com.yahoo.vespa.orchestrator.model.ApplicationApi; import com.yahoo.vespa.orchestrator.model.ApplicationApiImpl; import com.yahoo.vespa.orchestrator.model.ClusterApi; import com.yahoo.vespa.orchestrator.model.NodeGroup; import com.yahoo.vespa.orchestrator.model.StorageNode; +import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; import com.yahoo.vespa.orchestrator.status.HostStatus; import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; @@ -23,6 +24,7 @@ import java.util.logging.Logger; public class HostedVespaPolicy implements Policy { + public static final String APPLICATION_SUSPENDED_CONSTRAINT = "application-suspended"; public static final String ENOUGH_SERVICES_UP_CONSTRAINT = "enough-services-up"; public static final String SET_NODE_STATE_CONSTRAINT = "controller-set-node-state"; public static final String CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT = "controller-available"; @@ -48,7 +50,7 @@ public class HostedVespaPolicy implements Policy { // Ask Cluster Controller to set UP storage nodes in maintenance. // These storage nodes are guaranteed to be NO_REMARKS for (StorageNode storageNode : application.getUpStorageNodesInGroupInClusterOrder()) { - storageNode.setNodeState(ClusterControllerState.MAINTENANCE); + storageNode.setNodeState(ClusterControllerNodeState.MAINTENANCE); log.log(LogLevel.INFO, "The storage node on " + storageNode.hostName() + " has been set to MAINTENANCE"); } @@ -63,7 +65,7 @@ public class HostedVespaPolicy implements Policy { public void releaseSuspensionGrant(ApplicationApi application) throws HostStateChangeDeniedException { // Always defer to Cluster Controller whether it's OK to resume storage node for (StorageNode storageNode : application.getStorageNodesAllowedToBeDownInGroupInReverseClusterOrder()) { - storageNode.setNodeState(ClusterControllerState.UP); + storageNode.setNodeState(ClusterControllerNodeState.UP); log.log(LogLevel.INFO, "The storage node on " + storageNode.hostName() + " has been set to UP"); } @@ -73,6 +75,36 @@ public class HostedVespaPolicy implements Policy { } } + @Override + public void acquirePermissionToRemove(ApplicationApi applicationApi) throws HostStateChangeDeniedException { + ApplicationInstanceStatus applicationStatus = applicationApi.getApplicationStatus(); + if (applicationStatus == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) { + throw new HostStateChangeDeniedException( + applicationApi.getNodeGroup(), + HostedVespaPolicy.APPLICATION_SUSPENDED_CONSTRAINT, + "Unable to test availability constraints as the application " + applicationApi.applicationInfo() + + " is allowed to be down"); + } + + // Apply per-cluster policy + for (ClusterApi cluster : applicationApi.getClusters()) { + clusterPolicy.verifyGroupGoingDownPermanentlyIsFine(cluster); + } + + // Ask Cluster Controller to set storage nodes to DOWN. + // These storage nodes are guaranteed to be NO_REMARKS + for (StorageNode storageNode : applicationApi.getStorageNodesInGroupInClusterOrder()) { + storageNode.setNodeState(ClusterControllerNodeState.DOWN); + log.log(LogLevel.INFO, "The storage node on " + storageNode.hostName() + " has been set DOWN"); + } + + // Ensure all nodes in the group are marked as allowed to be down + for (HostName hostName : applicationApi.getNodesInGroupWithStatus(HostStatus.NO_REMARKS)) { + applicationApi.setHostState(hostName, HostStatus.ALLOWED_TO_BE_DOWN); + log.log(LogLevel.INFO, hostName + " is now allowed to be down (suspended)"); + } + } + // TODO: Remove later - currently used for backward compatibility testing @Override public void grantSuspensionRequest(ApplicationInstance<ServiceMonitorStatus> applicationInstance, diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java index fee2940efda..2db842274b3 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java @@ -30,6 +30,13 @@ public interface Policy { void releaseSuspensionGrant(ApplicationApi application) throws HostStateChangeDeniedException; /** + * Give all hosts in a group permission to be removed from the application. + * + * @param applicationApi + */ + void acquirePermissionToRemove(ApplicationApi applicationApi) throws HostStateChangeDeniedException; + + /** * Release an earlier grant for suspension. * * @throws HostStateChangeDeniedException if the release failed. diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java index b6dac92206c..c6e9e5415ce 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/HostResource.java @@ -57,7 +57,7 @@ public class HostResource implements HostApi { throw new NotFoundException(e); } catch (HostStateChangeDeniedException e) { log.log(LogLevel.INFO, "Failed to suspend " + hostName + ": " + e.getMessage()); - throw webExceptionWithDenialReason(hostName, e); + throw webExceptionWithDenialReason("suspend", hostName, e); } return new UpdateHostResponse(hostName.s(), null); } @@ -72,14 +72,19 @@ public class HostResource implements HostApi { throw new NotFoundException(e); } catch (HostStateChangeDeniedException e) { log.log(LogLevel.INFO, "Failed to resume " + hostName + ": " + e.getMessage()); - throw webExceptionWithDenialReason(hostName, e); + throw webExceptionWithDenialReason("resume", hostName, e); } return new UpdateHostResponse(hostName.s(), null); } - private static WebApplicationException webExceptionWithDenialReason(HostName hostName, HostStateChangeDeniedException e) { - HostStateChangeDenialReason hostStateChangeDenialReason = new HostStateChangeDenialReason( - e.getConstraintName(), e.getServiceType().s(), e.getMessage()); + private static WebApplicationException webExceptionWithDenialReason( + String operationDescription, + HostName hostName, + HostStateChangeDeniedException e) { + HostStateChangeDenialReason hostStateChangeDenialReason = + new HostStateChangeDenialReason( + e.getConstraintName(), + operationDescription + " failed: " + e.getMessage()); UpdateHostResponse response = new UpdateHostResponse(hostName.s(), hostStateChangeDenialReason); return new WebApplicationException( hostStateChangeDenialReason.toString(), diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java index 09aa37f9610..ccee70630af 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java @@ -5,7 +5,6 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.vespa.applicationmodel.ApplicationInstance; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; import com.yahoo.vespa.applicationmodel.HostName; -import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock; import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; @@ -228,11 +227,10 @@ public class OrchestratorImplTest { Throwable supensionFailure = new HostStateChangeDeniedException( DummyInstanceLookupService.TEST6_HOST_NAME, "some-constraint", - new ServiceType("foo"), "error message"); doThrow(supensionFailure).when(orchestrator).suspendGroup(DummyInstanceLookupService.TEST6_NODE_GROUP); - doThrow(new HostStateChangeDeniedException(DummyInstanceLookupService.TEST1_HOST_NAME, "foo1-constraint", new ServiceType("foo1-service"), "foo1-message")) + doThrow(new HostStateChangeDeniedException(DummyInstanceLookupService.TEST1_HOST_NAME, "foo1-constraint", "foo1-message")) .when(orchestrator).resume(DummyInstanceLookupService.TEST1_HOST_NAME); doNothing().when(orchestrator).resume(DummyInstanceLookupService.TEST6_HOST_NAME); doNothing().when(orchestrator).resume(DummyInstanceLookupService.TEST3_HOST_NAME); @@ -251,10 +249,10 @@ public class OrchestratorImplTest { assertEquals("Failed to suspend NodeGroup{application=tenant-id-3:application-instance-3:prod:utopia-1:default, " + "hostNames=[test6.prod.us-east-1.vespahosted.ne1.yahoo.com]} with parent host parentHostname: " + "Changing the state of test6.prod.us-east-1.vespahosted.ne1.yahoo.com would violate " + - "some-constraint for service type foo: error message; With suppressed throwable " + + "some-constraint: error message; With suppressed throwable " + "com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException: " + - "Changing the state of test1.prod.utpoia-1.vespahosted.ut1.yahoo.com would violate " + - "foo1-constraint for service type foo1-service: foo1-message", + "Changing the state of test1.prod.utpoia-1.vespahosted.ut1.yahoo.com " + + "would violate foo1-constraint: foo1-message", e.getMessage()); } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java index 93dcb5815c6..a36a468a345 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientFactoryMock.java @@ -22,14 +22,14 @@ import java.util.Set; * @author smorgrav */ public class ClusterControllerClientFactoryMock implements ClusterControllerClientFactory { - Map<String, ClusterControllerState> nodes = new HashMap<>(); + Map<String, ClusterControllerNodeState> nodes = new HashMap<>(); public boolean isInMaintenance(ApplicationInstance<ServiceMonitorStatus> appInstance, HostName hostName) { try { ClusterId clusterName = VespaModelUtil.getContentClusterName(appInstance, hostName); int storageNodeIndex = VespaModelUtil.getStorageNodeIndex(appInstance, hostName); String globalMapKey = clusterName.s() + storageNodeIndex; - return nodes.getOrDefault(globalMapKey, ClusterControllerState.UP) == ClusterControllerState.MAINTENANCE; + return nodes.getOrDefault(globalMapKey, ClusterControllerNodeState.UP) == ClusterControllerNodeState.MAINTENANCE; } catch (Exception e) { //Catch all - meant to catch cases where the node is not part of a storage cluster return false; @@ -43,7 +43,7 @@ public class ClusterControllerClientFactoryMock implements ClusterControllerClie ClusterId clusterName = VespaModelUtil.getContentClusterName(app, host); int storageNodeIndex = VespaModelUtil.getStorageNodeIndex(app, host); String globalMapKey = clusterName.s() + storageNodeIndex; - nodes.put(globalMapKey, ClusterControllerState.UP); + nodes.put(globalMapKey, ClusterControllerNodeState.UP); } } } @@ -53,13 +53,13 @@ public class ClusterControllerClientFactoryMock implements ClusterControllerClie return new ClusterControllerClient() { @Override - public ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerState wantedState) throws IOException { + public ClusterControllerStateResponse setNodeState(int storageNodeIndex, ClusterControllerNodeState wantedState) throws IOException { nodes.put(clusterName + storageNodeIndex, wantedState); return new ClusterControllerStateResponse(true, "Yes"); } @Override - public ClusterControllerStateResponse setApplicationState(ClusterControllerState wantedState) throws IOException { + public ClusterControllerStateResponse setApplicationState(ClusterControllerNodeState wantedState) throws IOException { Set<String> keyCopy = new HashSet<>(nodes.keySet()); for (String s : keyCopy) { if (s.startsWith(clusterName)) { diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java index 68df4dce393..2841677ec04 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java @@ -22,7 +22,7 @@ public class ClusterControllerClientTest { strategyMock, CLUSTER_NAME); - final ClusterControllerState wantedState = ClusterControllerState.MAINTENANCE; + final ClusterControllerNodeState wantedState = ClusterControllerNodeState.MAINTENANCE; clusterControllerClient.setNodeState(STORAGE_NODE_INDEX, wantedState); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java index cde644e7e8b..1081858812b 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java @@ -64,7 +64,7 @@ public class SingleInstanceClusterControllerClientFactoryTest { final List<HostName> clusterControllers = Arrays.asList(HOST_NAME_1); clientFactory.createClient(clusterControllers, "clusterName") - .setNodeState(0, ClusterControllerState.MAINTENANCE); + .setNodeState(0, ClusterControllerNodeState.MAINTENANCE); verify(jaxRsClientFactory).createClient( ClusterControllerJaxRsApi.class, @@ -90,7 +90,7 @@ public class SingleInstanceClusterControllerClientFactoryTest { final List<HostName> clusterControllers = Arrays.asList(HOST_NAME_1, HOST_NAME_2, HOST_NAME_3); clientFactory.createClient(clusterControllers, "clusterName") - .setNodeState(0, ClusterControllerState.MAINTENANCE); + .setNodeState(0, ClusterControllerNodeState.MAINTENANCE); verify(jaxRsClientFactory).createClient( eq(ClusterControllerJaxRsApi.class), diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java new file mode 100644 index 00000000000..e6e60900906 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java @@ -0,0 +1,113 @@ +package com.yahoo.vespa.orchestrator.policy; + +import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.applicationmodel.TenantId; +import com.yahoo.vespa.orchestrator.model.ClusterApi; +import com.yahoo.vespa.orchestrator.model.NodeGroup; +import com.yahoo.vespa.orchestrator.model.VespaModelUtil; +import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +public class HostedVespaClusterPolicyTest { + private ClusterApi clusterApi = mock(ClusterApi.class); + private HostedVespaClusterPolicy policy = spy(new HostedVespaClusterPolicy()); + + @Test + public void testSlobrokSuspensionLimit() { + when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID); + when(clusterApi.serviceType()).thenReturn(VespaModelUtil.SLOBROK_SERVICE_TYPE); + assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE, + policy.getConcurrentSuspensionLimit(clusterApi)); + } + + @Test + public void testAdminSuspensionLimit() { + when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID); + when(clusterApi.serviceType()).thenReturn(new ServiceType("non-slobrok-service-type")); + assertEquals(ConcurrentSuspensionLimitForCluster.ALL_NODES, + policy.getConcurrentSuspensionLimit(clusterApi)); + } + + @Test + public void testStorageSuspensionLimit() { + when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id")); + when(clusterApi.isStorageCluster()).thenReturn(true); + assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE, + policy.getConcurrentSuspensionLimit(clusterApi)); + } + + @Test + public void testDefaultSuspensionLimit() { + when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id")); + when(clusterApi.isStorageCluster()).thenReturn(false); + assertEquals(ConcurrentSuspensionLimitForCluster.TEN_PERCENT, + policy.getConcurrentSuspensionLimit(clusterApi)); + } + + @Test + public void verifyGroupGoingDownIsFine_noServicesOutsideGroupIsDownIsFine() { + verifyGroupGoingDownIsFine(true, false, 13, true); + } + + @Test + public void verifyGroupGoingDownIsFine_noServicesInGroupIsUp() { + verifyGroupGoingDownIsFine(false, true, 13, true); + } + + @Test + public void verifyGroupGoingDownIsFine_percentageIsFine() { + verifyGroupGoingDownIsFine(false, false, 9, true); + } + + @Test + public void verifyGroupGoingDownIsFine_fails() { + verifyGroupGoingDownIsFine(false, false, 13, false); + } + + private void verifyGroupGoingDownIsFine(boolean noServicesOutsideGroupIsDown, + boolean noServicesInGroupIsUp, + int percentageOfServicesDownIfGroupIsAllowedToBeDown, + boolean expectSuccess) { + when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown); + when(clusterApi.noServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp); + when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20); + doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi); + + when(clusterApi.serviceType()).thenReturn(new ServiceType("service-type")); + when(clusterApi.percentageOfServicesDown()).thenReturn(5); + when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(percentageOfServicesDownIfGroupIsAllowedToBeDown); + when(clusterApi.servicesDownAndNotInGroupDescription()).thenReturn("services-down-and-not-in-group"); + when(clusterApi.nodesAllowedToBeDownNotInGroupDescription()).thenReturn("allowed-to-be-down"); + + NodeGroup nodeGroup = mock(NodeGroup.class); + when(clusterApi.getNodeGroup()).thenReturn(nodeGroup); + when(nodeGroup.toCommaSeparatedString()).thenReturn("node-group"); + + when(clusterApi.noServicesInGroupIsUp()).thenReturn(false); + try { + policy.verifyGroupGoingDownIsFine(clusterApi); + if (!expectSuccess) { + fail(); + } + } catch (HostStateChangeDeniedException e) { + if (!expectSuccess) { + assertEquals("Changing the state of node-group would violate enough-services-up: " + + "Suspension percentage for service type service-type would increase from " + + "5% to 13%, over the limit of 10%. These instances may be down: " + + "services-down-and-not-in-group and these hosts are allowed to be down: " + + "allowed-to-be-down", e.getMessage()); + assertEquals("enough-services-up", e.getConstraintName()); + } + } + } +}
\ No newline at end of file diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java index 5cba6375717..fe2d9a60acb 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java @@ -2,53 +2,25 @@ package com.yahoo.vespa.orchestrator.policy; -import com.yahoo.vespa.applicationmodel.ApplicationInstance; -import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; -import com.yahoo.vespa.applicationmodel.ClusterId; -import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.HostName; -import com.yahoo.vespa.applicationmodel.ServiceCluster; -import com.yahoo.vespa.applicationmodel.ServiceInstance; -import com.yahoo.vespa.applicationmodel.ServiceType; -import com.yahoo.vespa.applicationmodel.TenantId; -import com.yahoo.vespa.orchestrator.TestUtil; +import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; -import com.yahoo.vespa.orchestrator.controller.ClusterControllerState; -import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState; import com.yahoo.vespa.orchestrator.model.ApplicationApi; import com.yahoo.vespa.orchestrator.model.ClusterApi; import com.yahoo.vespa.orchestrator.model.StorageNode; -import com.yahoo.vespa.orchestrator.model.VespaModelUtil; import com.yahoo.vespa.orchestrator.status.HostStatus; -import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; -import com.yahoo.vespa.service.monitor.ServiceMonitorStatus; import org.junit.Before; import org.junit.Test; import org.mockito.InOrder; import java.util.Arrays; -import java.util.HashSet; import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; - -import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceClusterSet; -import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceInstanceSet; -import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.DOWN; -import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.NOT_CHECKED; -import static com.yahoo.vespa.service.monitor.ServiceMonitorStatus.UP; -import static org.fest.assertions.Assertions.assertThat; -import static org.junit.Assert.fail; + import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.eq; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; /** @@ -105,8 +77,8 @@ public class HostedVespaPolicyTest { order.verify(clusterPolicy).verifyGroupGoingDownIsFine(clusterApi3); order.verify(applicationApi).getUpStorageNodesInGroupInClusterOrder(); - order.verify(storageNode1).setNodeState(ClusterControllerState.MAINTENANCE); - order.verify(storageNode3).setNodeState(ClusterControllerState.MAINTENANCE); + order.verify(storageNode1).setNodeState(ClusterControllerNodeState.MAINTENANCE); + order.verify(storageNode3).setNodeState(ClusterControllerNodeState.MAINTENANCE); order.verify(applicationApi).getNodesInGroupWithStatus(HostStatus.NO_REMARKS); order.verify(applicationApi).setHostState(hostName1, HostStatus.ALLOWED_TO_BE_DOWN); @@ -116,689 +88,53 @@ public class HostedVespaPolicyTest { order.verifyNoMoreInteractions(); } - private static final TenantId TENANT_ID = new TenantId("tenantId"); - private static final ApplicationInstanceId APPLICATION_INSTANCE_ID = new ApplicationInstanceId("applicationId"); - private static final HostName HOST_NAME_1 = new HostName("host-1"); - private static final HostName HOST_NAME_2 = new HostName("host-2"); - private static final HostName HOST_NAME_3 = new HostName("host-3"); - private static final HostName HOST_NAME_4 = new HostName("host-4"); - private static final HostName HOST_NAME_5 = new HostName("host-5"); - private static final ServiceType SERVICE_TYPE_1 = new ServiceType("service-1"); - private static final ServiceType SERVICE_TYPE_2 = new ServiceType("service-2"); - - // TODO: Replace HostedVespaClusterPolicy with ClusterPolicy mock - private final HostedVespaPolicy policy = - new HostedVespaPolicy(new HostedVespaClusterPolicy(), clientFactory); - - private final MutableStatusRegistry mutablestatusRegistry = mock(MutableStatusRegistry.class); - { - when(mutablestatusRegistry.getHostStatus(any())).thenReturn(HostStatus.NO_REMARKS); - } - - @Test - public void test_policy_everyone_agrees_everything_is_up() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, UP) - .instance(HOST_NAME_3, UP)) - .build(); - - policy.grantSuspensionRequest( - applicationInstance, - HOST_NAME_1, - mutablestatusRegistry - ); - - verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_1, HostStatus.ALLOWED_TO_BE_DOWN); - } - - private void grantWithAdminCluster( - ServiceMonitorStatus statusParallelInstanceOtherHost, - ServiceMonitorStatus statusInstanceOtherHost, - ServiceType serviceType, - boolean expectGranted) throws HostStateChangeDeniedException { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, statusInstanceOtherHost) - .instance(HOST_NAME_3, UP)) - .addCluster(new ClusterBuilder(VespaModelUtil.ADMIN_CLUSTER_ID, serviceType) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, statusParallelInstanceOtherHost)) - .build(); - - if (expectGranted) { - policy.grantSuspensionRequest( - applicationInstance, - HOST_NAME_1, - mutablestatusRegistry - ); - - verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_1, HostStatus.ALLOWED_TO_BE_DOWN); - } else { - try { - policy.grantSuspensionRequest( - applicationInstance, - HOST_NAME_1, - mutablestatusRegistry); - fail(); - } catch (HostStateChangeDeniedException e) { - // As expected. - assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); - } - - verify(mutablestatusRegistry, never()).setHostState(any(), any()); - } - } - - @Test - public void test_parallel_cluster_down_is_ok() throws Exception { - grantWithAdminCluster(DOWN, UP, new ServiceType("some-service-type"), true); - } - - @Test - public void test_slobrok_cluster_down_is_not_ok() throws Exception { - grantWithAdminCluster(DOWN, UP, VespaModelUtil.SLOBROK_SERVICE_TYPE, false); - } - - @Test - public void test_other_cluster_instance_down_is_not_ok() throws Exception { - grantWithAdminCluster(DOWN, DOWN, new ServiceType("some-service-type"), false); - } - - @Test - public void test_all_up_is_ok() throws Exception { - grantWithAdminCluster(UP, UP, new ServiceType("some-service-type"), true); - } - - @Test - public void test_policy_other_host_allowed_to_be_down() { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, UP) - .instance(HOST_NAME_3, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_5, UP)) - .build(); - - when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); - - try { - policy.grantSuspensionRequest( - applicationInstance, - HOST_NAME_3, - mutablestatusRegistry); - fail(); - } catch (HostStateChangeDeniedException e) { - // As expected. - assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); - assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1); - } - - verify(mutablestatusRegistry, never()).setHostState(any(), any()); - } - - @Test - public void test_policy_this_host_allowed_to_be_down() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, UP) - .instance(HOST_NAME_3, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_5, UP)) - .build(); - - when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_3))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); - - policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); - - verify(mutablestatusRegistry, times(0)).setHostState(HOST_NAME_3, HostStatus.ALLOWED_TO_BE_DOWN); - } - - @Test - public void from_five_to_ten_percent_suspension() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, UP) - .instance(HOST_NAME_3, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP)) - .build(); - - when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); - - policy.grantSuspensionRequest( - applicationInstance, - HOST_NAME_3, - mutablestatusRegistry); - - verify(mutablestatusRegistry, times(1)).setHostState(HOST_NAME_3, HostStatus.ALLOWED_TO_BE_DOWN); - } - - @Test - public void from_ten_to_fifteen_percent_suspension() { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, UP) - .instance(HOST_NAME_2, UP) - .instance(HOST_NAME_3, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP)) - .build(); - - when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); - - try { - policy.grantSuspensionRequest( - applicationInstance, - HOST_NAME_3, - mutablestatusRegistry); - fail(); - } catch (HostStateChangeDeniedException e) { - // As expected. - assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); - assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1); - } - - verify(mutablestatusRegistry, never()).setHostState(any(), any()); - } - - @Test - public void from_five_to_fifteen_percent_suspension() { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, UP) - .instance(HOST_NAME_3, UP) - .instance(HOST_NAME_3, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP) - .instance(HOST_NAME_5, UP)) - .build(); - - when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); - - try { - policy.grantSuspensionRequest( - applicationInstance, - HOST_NAME_3, - mutablestatusRegistry); - fail(); - } catch (HostStateChangeDeniedException e) { - // As expected. - assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); - assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_1); - } - - verify(mutablestatusRegistry, never()).setHostState(any(), any()); - } - @Test - public void test_policy_no_services() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder().build(); - - HostName hostName = new HostName("test-hostname"); - policy.grantSuspensionRequest( - applicationInstance, - hostName, - mutablestatusRegistry - ); - - verify(mutablestatusRegistry, times(1)).setHostState(hostName, HostStatus.ALLOWED_TO_BE_DOWN); - } - - // The cluster name happens to be the cluster id of any of the content service clusters. - private static final String CONTENT_CLUSTER_NAME = "content-cluster-id"; - private static final HostName CLUSTER_CONTROLLER_HOST = new HostName("controller-0"); - private static final HostName STORAGE_NODE_HOST = new HostName("storage-2"); - private static final int STORAGE_NODE_INDEX = 2; - - private static final ServiceCluster<ServiceMonitorStatus> CLUSTER_CONTROLLER_SERVICE_CLUSTER = new ServiceCluster<>( - new ClusterId("cluster-0"), - VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE, - makeServiceInstanceSet( - new ServiceInstance<>( - TestUtil.clusterControllerConfigId(CONTENT_CLUSTER_NAME, 0), - CLUSTER_CONTROLLER_HOST, - UP))); - - private static final ServiceCluster<ServiceMonitorStatus> DISTRIBUTOR_SERVICE_CLUSTER = new ServiceCluster<>( - new ClusterId(CONTENT_CLUSTER_NAME), - VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE, - makeServiceInstanceSet( - new ServiceInstance<>( - new ConfigId("distributor-id-1"), - new HostName("distributor-1"), - UP))); - - private static final ServiceCluster<ServiceMonitorStatus> STORAGE_SERVICE_CLUSTER = new ServiceCluster<>( - new ClusterId(CONTENT_CLUSTER_NAME), - VespaModelUtil.STORAGENODE_SERVICE_TYPE, - makeServiceInstanceSet( - new ServiceInstance<>( - TestUtil.storageNodeConfigId(CONTENT_CLUSTER_NAME, STORAGE_NODE_INDEX), - STORAGE_NODE_HOST, - UP))); - - private static final ApplicationInstance<ServiceMonitorStatus> APPLICATION_INSTANCE = - new ApplicationInstance<>( - TENANT_ID, - APPLICATION_INSTANCE_ID, - makeServiceClusterSet( - CLUSTER_CONTROLLER_SERVICE_CLUSTER, - DISTRIBUTOR_SERVICE_CLUSTER, - STORAGE_SERVICE_CLUSTER)); - - // The grantSuspensionRequest() and releaseSuspensionGrant() functions happen to have similar signature, - // which allows us to reuse test code for testing both functions. The actual call to one of these two functions - // is encapsulated into the following functional interface. - interface PolicyFunction { - void grant( - final HostedVespaPolicy policy, - final ApplicationInstance<ServiceMonitorStatus> applicationInstance, - final HostName hostName, - final MutableStatusRegistry hostStatusRegistry) throws HostStateChangeDeniedException; - } - - /** - * Since grantSuspensionRequest and releaseSuspensionGrant is quite similar, this test util contains the bulk - * of the test code used to test their common functionality. - * - * @param grantFunction Encapsulates the grant function to call - * @param currentHostStatus The current HostStatus of the host - * @param expectedNodeStateSentToClusterController The NodeState the test expects to be sent to the controller, - * or null if no CC request is expected to be sent. - * @param expectedHostStateSetOnHostStatusService The HostState the test expects to be set on the host service. - */ - private void testCommonGrantFunctionality( - PolicyFunction grantFunction, - ApplicationInstance<ServiceMonitorStatus> application, - HostStatus currentHostStatus, - Optional<ClusterControllerState> expectedNodeStateSentToClusterController, - HostStatus expectedHostStateSetOnHostStatusService) throws Exception { - // There is only one service running on the host, which is a storage node. - // Therefore, the corresponding cluster controller will have to be contacted - // to ask for permission. - - ClusterControllerStateResponse response = new ClusterControllerStateResponse(true, "ok"); - // MOTD: anyInt() MUST be used for an int field, otherwise a NullPointerException is thrown! - // In general, special anyX() must be used for primitive fields. - when(client.setNodeState(anyInt(), any())).thenReturn(response); - - when(mutablestatusRegistry.getHostStatus(any())).thenReturn(currentHostStatus); - - // Execution phase. - grantFunction.grant(policy, application, STORAGE_NODE_HOST, mutablestatusRegistry); - - // Verification phase. - - if (expectedNodeStateSentToClusterController.isPresent()) { - List<HostName> hostNames = CLUSTER_CONTROLLER_SERVICE_CLUSTER.serviceInstances().stream() - .map(service -> service.hostName()) - .collect(Collectors.toList()); - verify(clientFactory, times(1)) - .createClient( - hostNames, - CONTENT_CLUSTER_NAME); - verify(client, times(1)) - .setNodeState( - STORAGE_NODE_INDEX, - expectedNodeStateSentToClusterController.get()); - } else { - verify(client, never()).setNodeState(anyInt(), any()); - } - - verify(mutablestatusRegistry, times(1)) - .setHostState( - STORAGE_NODE_HOST, - expectedHostStateSetOnHostStatusService); - } - - @Test - public void test_defer_to_controller() throws Exception { - HostStatus currentHostStatus = HostStatus.NO_REMARKS; - ClusterControllerState expectedNodeStateSentToClusterController = ClusterControllerState.MAINTENANCE; - HostStatus expectedHostStateSetOnHostStatusService = HostStatus.ALLOWED_TO_BE_DOWN; - testCommonGrantFunctionality( - HostedVespaPolicy::grantSuspensionRequest, - APPLICATION_INSTANCE, - currentHostStatus, - Optional.of(expectedNodeStateSentToClusterController), - expectedHostStateSetOnHostStatusService); - } - - @Test - public void test_release_suspension_grant_gives_no_remarks() throws Exception { - HostStatus currentHostStatus = HostStatus.ALLOWED_TO_BE_DOWN; - ClusterControllerState expectedNodeStateSentToClusterController = ClusterControllerState.UP; - HostStatus expectedHostStateSetOnHostStatusService = HostStatus.NO_REMARKS; - testCommonGrantFunctionality( - HostedVespaPolicy::releaseSuspensionGrant, - APPLICATION_INSTANCE, - currentHostStatus, - Optional.of(expectedNodeStateSentToClusterController), - expectedHostStateSetOnHostStatusService); - } - - @Test - public void okToSuspendHostWithNoConfiguredServices() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, DOWN) - .instance(HOST_NAME_2, DOWN)) - .addCluster(new ClusterBuilder(SERVICE_TYPE_2) - .instance(HOST_NAME_4, DOWN) - .instance(HOST_NAME_5, DOWN)) - .build(); - - policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); - } - - @Test - public void okToSuspendHostWithAllItsServicesDownEvenIfOthersAreDownToo() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, DOWN) - .instance(HOST_NAME_3, DOWN)) - .addCluster(new ClusterBuilder(SERVICE_TYPE_2) - .instance(HOST_NAME_3, DOWN) - .instance(HOST_NAME_4, DOWN) - .instance(HOST_NAME_5, UP)) - .build(); - - policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); - } - - @Test - public void okToSuspendStorageNodeWhenStorageIsDown() throws Exception { - ServiceMonitorStatus storageNodeStatus = DOWN; - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new StorageClusterBuilder() - // DOWN storage service => ok to suspend and no cluster controller call - .instance(STORAGE_NODE_HOST, DOWN, STORAGE_NODE_INDEX) - .instance(HOST_NAME_2, DOWN, STORAGE_NODE_INDEX + 1) - .instance(HOST_NAME_3, DOWN, STORAGE_NODE_INDEX + 2)) - .addCluster(CLUSTER_CONTROLLER_SERVICE_CLUSTER) - .addCluster(DISTRIBUTOR_SERVICE_CLUSTER) - // This service has one down service on another host, which should - // not block us from suspension because STORAGE_NODE_HOST down too. - .addCluster(new ClusterBuilder(SERVICE_TYPE_2) - .instance(STORAGE_NODE_HOST, DOWN) - .instance(HOST_NAME_4, DOWN) - .instance(HOST_NAME_5, UP)) - .build(); - - HostStatus currentHostStatus = HostStatus.NO_REMARKS; - Optional<ClusterControllerState> dontExpectAnyCallsToClusterController = Optional.empty(); - testCommonGrantFunctionality( - HostedVespaPolicy::grantSuspensionRequest, - applicationInstance, - currentHostStatus, - dontExpectAnyCallsToClusterController, - HostStatus.ALLOWED_TO_BE_DOWN); - } - - @Test - public void denySuspendOfStorageIfOthersAreDown() throws Exception { - // If the storage service is up, but other hosts' storage services are down, - // we should be denied permission to suspend. This behavior is common for - // storage service and non-storage service (they differ when it comes to - // the cluster controller). - ServiceMonitorStatus storageNodeStatus = UP; - - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new StorageClusterBuilder() - .instance(STORAGE_NODE_HOST, storageNodeStatus, STORAGE_NODE_INDEX) - .instance(HOST_NAME_2, DOWN, STORAGE_NODE_INDEX + 1) - .instance(HOST_NAME_3, DOWN, STORAGE_NODE_INDEX + 2)) - .addCluster(CLUSTER_CONTROLLER_SERVICE_CLUSTER) - .addCluster(DISTRIBUTOR_SERVICE_CLUSTER) - .addCluster(new ClusterBuilder(SERVICE_TYPE_2) - .instance(STORAGE_NODE_HOST, DOWN) - .instance(HOST_NAME_4, DOWN) - .instance(HOST_NAME_5, UP)) - .build(); - - when(mutablestatusRegistry.getHostStatus(any())).thenReturn(HostStatus.NO_REMARKS); - - try { - policy.grantSuspensionRequest(applicationInstance, STORAGE_NODE_HOST, mutablestatusRegistry); - fail(); - } catch (HostStateChangeDeniedException e) { - // As expected. - assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); - assertThat(e.getServiceType()).isEqualTo(VespaModelUtil.STORAGENODE_SERVICE_TYPE); - } - } - - // In this test we verify the storage service cluster suspend policy of allowing at most 1 - // storage service to be effectively down. The normal policy (and the one used previously for storage) - // is to allow 10%. Therefore, the test verifies we disallow suspending 2 hosts = 5% (some random number <10%). - // - // Since the Orchestrator doesn't allow suspending the host, the Orchestrator doesn't even bother calling the - // Cluster Controller. The CC also has a policy of max 1, so it's just an optimization and safety guard. - @Test - public void dontBotherCallingClusterControllerIfOtherStorageNodesAreDown() throws Exception { - StorageClusterBuilder clusterBuilder = new StorageClusterBuilder(); - for (int i = 0; i < 40; ++i) { - clusterBuilder.instance(new HostName("host-" + i), UP, i); - } - ApplicationInstance<ServiceMonitorStatus> applicationInstance = - new AppBuilder().addCluster(clusterBuilder).build(); - - HostName host_1 = new HostName("host-1"); - when(mutablestatusRegistry.getHostStatus(eq(host_1))).thenReturn(HostStatus.NO_REMARKS); - - HostName host_2 = new HostName("host-2"); - when(mutablestatusRegistry.getHostStatus(eq(host_2))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); - - try { - policy.grantSuspensionRequest(applicationInstance, host_1, mutablestatusRegistry); - fail(); - } catch (HostStateChangeDeniedException e) { - // As expected. - assertThat(e.getConstraintName()) - .isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); - assertThat(e.getServiceType()).isEqualTo(VespaModelUtil.STORAGENODE_SERVICE_TYPE); - } - - verify(mutablestatusRegistry, never()).setHostState(any(), any()); - } - - @Test - public void ownServiceInstanceDown() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, DOWN) - .instance(HOST_NAME_3, DOWN)) - .build(); - - policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); - } - - @Test - public void ownServiceInstanceDown_otherServiceIsAllNotChecked() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, DOWN) - .instance(HOST_NAME_3, DOWN)) - .addCluster(new ClusterBuilder(SERVICE_TYPE_2) - .instance(HOST_NAME_3, NOT_CHECKED) - .instance(HOST_NAME_4, NOT_CHECKED) - .instance(HOST_NAME_5, NOT_CHECKED)) - .build(); - - policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); - } - - @Test - public void ownServiceInstanceDown_otherServiceIsAllNotChecked_oneHostDown() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, DOWN) - .instance(HOST_NAME_3, DOWN)) - .addCluster(new ClusterBuilder(SERVICE_TYPE_2) - .instance(HOST_NAME_3, NOT_CHECKED) - .instance(HOST_NAME_4, NOT_CHECKED) - .instance(HOST_NAME_5, NOT_CHECKED)) - .build(); - - when(mutablestatusRegistry.getHostStatus(eq(HOST_NAME_4))).thenReturn(HostStatus.ALLOWED_TO_BE_DOWN); - try { - policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); - fail("Should not be allowed to set " + HOST_NAME_3 + " down when " + HOST_NAME_4 + " is already down."); - } catch (HostStateChangeDeniedException e) { - // As expected. - assertThat(e.getConstraintName()).isEqualTo(HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT); - assertThat(e.getServiceType()).isEqualTo(SERVICE_TYPE_2); - } - } + public void testAcquirePermissionToRemove() throws OrchestrationException { + final HostedVespaClusterPolicy clusterPolicy = mock(HostedVespaClusterPolicy.class); + final HostedVespaPolicy policy = new HostedVespaPolicy(clusterPolicy, clientFactory); + final ApplicationApi applicationApi = mock(ApplicationApi.class); + when(applicationApi.applicationInfo()).thenReturn("tenant:app"); - @Test - public void ownServiceInstanceDown_otherServiceIsAllUp() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, DOWN) - .instance(HOST_NAME_3, DOWN)) - .addCluster(new ClusterBuilder(SERVICE_TYPE_2) - .instance(HOST_NAME_3, UP) - .instance(HOST_NAME_4, UP) - .instance(HOST_NAME_5, UP)) - .build(); - - policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); - } + ClusterApi clusterApi1 = mock(ClusterApi.class); + ClusterApi clusterApi2 = mock(ClusterApi.class); + ClusterApi clusterApi3 = mock(ClusterApi.class); + List<ClusterApi> clusterApis = Arrays.asList(clusterApi1, clusterApi2, clusterApi3); + when(applicationApi.getClusters()).thenReturn(clusterApis); - @Test - public void hostHasTwoInstances_oneDownOneUp() throws Exception { - final ApplicationInstance<ServiceMonitorStatus> applicationInstance = new AppBuilder() - .addCluster(new ClusterBuilder(SERVICE_TYPE_1) - .instance(HOST_NAME_1, UP) - .instance(HOST_NAME_2, UP) - .instance(HOST_NAME_3, UP) - .instance(HOST_NAME_3, DOWN)) - .build(); - - policy.grantSuspensionRequest(applicationInstance, HOST_NAME_3, mutablestatusRegistry); - } + StorageNode storageNode1 = mock(StorageNode.class); + HostName hostName1 = new HostName("storage-1"); + when(storageNode1.hostName()).thenReturn(hostName1); - // Helper classes for terseness. + HostName hostName2 = new HostName("host-2"); - private static class AppBuilder { - private final Set<ServiceCluster<ServiceMonitorStatus>> serviceClusters = new HashSet<>(); + StorageNode storageNode3 = mock(StorageNode.class); + HostName hostName3 = new HostName("storage-3"); + when(storageNode1.hostName()).thenReturn(hostName3); - public AppBuilder addCluster(final ServiceCluster<ServiceMonitorStatus> cluster) { - serviceClusters.add(cluster); - return this; - } + List<StorageNode> upStorageNodes = Arrays.asList(storageNode1, storageNode3); + when(applicationApi.getStorageNodesInGroupInClusterOrder()).thenReturn(upStorageNodes); - public AppBuilder addCluster(final ClusterBuilder clusterBuilder) { - serviceClusters.add(clusterBuilder.build()); - return this; - } + List<HostName> noRemarksHostNames = Arrays.asList(hostName1, hostName2, hostName3); + when(applicationApi.getNodesInGroupWithStatus(HostStatus.NO_REMARKS)).thenReturn(noRemarksHostNames); - public AppBuilder addCluster(final StorageClusterBuilder clusterBuilder) { - serviceClusters.add(clusterBuilder.build()); - return this; - } + InOrder order = inOrder(applicationApi, clusterPolicy, storageNode1, storageNode3); - public ApplicationInstance<ServiceMonitorStatus> build() { - return new ApplicationInstance<>( - TENANT_ID, - APPLICATION_INSTANCE_ID, - serviceClusters); - } - } + policy.acquirePermissionToRemove(applicationApi); - private static class ClusterBuilder { - private final ServiceType serviceType; - private final Set<ServiceInstance<ServiceMonitorStatus>> instances = new HashSet<>(); - private final ClusterId clusterId; - private int instanceIndex = 0; - - public ClusterBuilder(final ClusterId clusterId, final ServiceType serviceType) { - this.clusterId = clusterId; - this.serviceType = serviceType; - } - - public ClusterBuilder(final ServiceType serviceType) { - this.clusterId = new ClusterId("clusterId"); - this.serviceType = serviceType; - } - - public ClusterBuilder instance(final HostName hostName, final ServiceMonitorStatus status) { - instances.add(new ServiceInstance<>(new ConfigId("configId-" + instanceIndex), hostName, status)); - ++instanceIndex; - return this; - } - - public ServiceCluster<ServiceMonitorStatus> build() { - return new ServiceCluster<>(clusterId, serviceType, instances); - } - } + order.verify(applicationApi).getClusters(); + order.verify(clusterPolicy).verifyGroupGoingDownPermanentlyIsFine(clusterApi1); + order.verify(clusterPolicy).verifyGroupGoingDownPermanentlyIsFine(clusterApi2); + order.verify(clusterPolicy).verifyGroupGoingDownPermanentlyIsFine(clusterApi3); - private static class StorageClusterBuilder { - private final Set<ServiceInstance<ServiceMonitorStatus>> instances = new HashSet<>(); + order.verify(applicationApi).getStorageNodesInGroupInClusterOrder(); + order.verify(storageNode1).setNodeState(ClusterControllerNodeState.DOWN); + order.verify(storageNode3).setNodeState(ClusterControllerNodeState.DOWN); - public StorageClusterBuilder instance(final HostName hostName, final ServiceMonitorStatus status, int index) { - instances.add(new ServiceInstance<>(TestUtil.storageNodeConfigId("content", index), hostName, status)); - return this; - } + order.verify(applicationApi).getNodesInGroupWithStatus(HostStatus.NO_REMARKS); + order.verify(applicationApi).setHostState(hostName1, HostStatus.ALLOWED_TO_BE_DOWN); + order.verify(applicationApi).setHostState(hostName2, HostStatus.ALLOWED_TO_BE_DOWN); + order.verify(applicationApi).setHostState(hostName3, HostStatus.ALLOWED_TO_BE_DOWN); - public ServiceCluster<ServiceMonitorStatus> build() { - return new ServiceCluster<>(new ClusterId(CONTENT_CLUSTER_NAME), VespaModelUtil.STORAGENODE_SERVICE_TYPE, instances); - } + order.verifyNoMoreInteractions(); } } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java index 793b73ec5d8..3d3117f9e07 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java @@ -5,7 +5,6 @@ import com.yahoo.vespa.applicationmodel.ApplicationInstance; import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; import com.yahoo.vespa.applicationmodel.HostName; -import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; import com.yahoo.vespa.orchestrator.InstanceLookupService; import com.yahoo.vespa.orchestrator.OrchestratorImpl; @@ -104,6 +103,10 @@ public class HostResourceTest { } @Override + public void acquirePermissionToRemove(ApplicationApi applicationApi) throws HostStateChangeDeniedException { + } + + @Override public void releaseSuspensionGrant( ApplicationInstance<ServiceMonitorStatus> applicationInstance, HostName hostName, @@ -203,6 +206,11 @@ public class HostResourceTest { } @Override + public void acquirePermissionToRemove(ApplicationApi applicationApi) throws HostStateChangeDeniedException { + doThrow(); + } + + @Override public void releaseSuspensionGrant( ApplicationInstance<ServiceMonitorStatus> applicationInstance, HostName hostName, @@ -214,7 +222,6 @@ public class HostResourceTest { throw new HostStateChangeDeniedException( new HostName("some-host"), "impossible-policy", - new ServiceType("silly-service"), "This policy rejects all requests"); } } |