summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorOla Aunrønning <olaa@verizonmedia.com>2021-04-16 11:26:01 +0200
committerOla Aunrønning <olaa@verizonmedia.com>2021-04-16 11:38:53 +0200
commit46a45826fea31ca587f65fb00c5681c9e2455c62 (patch)
tree48c7718febb7c1d5a1c51a10467b5a31aea881a9 /controller-server
parentdc8cf2848a7bd3e535b6296128cb28064e66bf6f (diff)
Default action is no-op. Add tests
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java50
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java32
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainerTest.java171
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json3
5 files changed, 236 insertions, 23 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
index 4a4bee03ee5..9513348303f 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
@@ -69,6 +69,7 @@ public class ControllerMaintenance extends AbstractComponent {
maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer));
maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer));
maintainers.add(new ChangeRequestMaintainer(controller, intervals.changeRequestMaintainer));
+ maintainers.add(new VCMRMaintainer(controller, intervals.vcmrMaintainer));
}
public Upgrader upgrader() { return upgrader; }
@@ -123,6 +124,7 @@ public class ControllerMaintenance extends AbstractComponent {
private final Duration archiveAccessMaintainer;
private final Duration tenantRoleMaintainer;
private final Duration changeRequestMaintainer;
+ private final Duration vcmrMaintainer;
public Intervals(SystemName system) {
this.system = Objects.requireNonNull(system);
@@ -154,6 +156,7 @@ public class ControllerMaintenance extends AbstractComponent {
this.archiveAccessMaintainer = duration(10, MINUTES);
this.tenantRoleMaintainer = duration(5, MINUTES);
this.changeRequestMaintainer = duration(12, HOURS);
+ this.vcmrMaintainer = duration(12, HOURS);
}
private Duration duration(long amount, TemporalUnit unit) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java
index 994f0ec603f..e5d99e38be5 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java
@@ -10,7 +10,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository;
import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeRepositoryNode;
import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeState;
-import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest;
+import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest.Impact;
import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction;
import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction.State;
import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest;
@@ -32,7 +32,7 @@ import java.util.stream.Collectors;
* @author olaa
*
* Maintains status and execution of VCMRs
- * For now only executes
+ * For now only retires all affected tenant hosts if zone capacity allows it
*/
public class VCMRMaintainer extends ControllerMaintainer {
@@ -46,7 +46,6 @@ public class VCMRMaintainer extends ControllerMaintainer {
this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository();
}
-
@Override
protected boolean maintain() {
var changeRequests = curator.readChangeRequests()
@@ -120,20 +119,18 @@ public class VCMRMaintainer extends ControllerMaintainer {
private HostAction nextAction(Node node, VespaChangeRequest changeRequest, boolean spareCapacity) {
var hostAction = getPreviousAction(node, changeRequest)
- .orElse(new HostAction(node.hostname().value(), State.PENDING_RETIREMENT, Instant.now()));
-
- if (node.type() != NodeType.host || !spareCapacity) {
- return hostAction.withState(State.REQUIRES_OPERATOR_ACTION);
- }
+ .orElse(new HostAction(node.hostname().value(), State.NONE, Instant.now()));
if (changeRequest.getChangeRequestSource().isClosed()) {
- recycleNode(changeRequest.getZoneId(), node);
+ recycleNode(changeRequest.getZoneId(), node, hostAction);
return hostAction.withState(State.COMPLETE);
}
+ if (node.type() != NodeType.host || !spareCapacity) {
+ return hostAction.withState(State.REQUIRES_OPERATOR_ACTION);
+ }
+
if (shouldRetire(changeRequest, hostAction)) {
- if (node.state() != Node.State.active)
- return hostAction.withState(State.RETIRED);
if (!node.wantToRetire()) {
logger.info(String.format("Retiring %s due to %s", node.hostname().value(), changeRequest.getChangeRequestSource().getId()));
setWantToRetire(changeRequest.getZoneId(), node, true);
@@ -141,15 +138,25 @@ public class VCMRMaintainer extends ControllerMaintainer {
return hostAction.withState(State.RETIRING);
}
+ if (hasRetired(node, hostAction)) {
+ return hostAction.withState(State.RETIRED);
+ }
+
+ if (pendingRetirement(node)) {
+ return hostAction.withState(State.PENDING_RETIREMENT);
+ }
+
return hostAction;
}
- private void recycleNode(ZoneId zoneId, Node node) {
- if (node.state() == Node.State.parked) {
+ // Dirty host iff the parked host was retired by this maintainer
+ private void recycleNode(ZoneId zoneId, Node node, HostAction hostAction) {
+ if (hostAction.getState() == State.RETIRED &&
+ node.state() == Node.State.parked) {
logger.info("Setting " + node.hostname() + " to dirty");
nodeRepository.setState(zoneId, NodeState.dirty, node.hostname().value());
}
- if (node.wantToRetire())
+ if (hostAction.getState() == State.RETIRING && node.wantToRetire())
setWantToRetire(zoneId, node, false);
}
@@ -160,6 +167,18 @@ public class VCMRMaintainer extends ControllerMaintainer {
.isBefore(ZonedDateTime.now());
}
+ private boolean hasRetired(Node node, HostAction hostAction) {
+ return hostAction.getState() == State.RETIRING &&
+ node.state() == Node.State.parked;
+ }
+
+ /**
+ * TODO: For now, we choose to retire any active host
+ */
+ private boolean pendingRetirement(Node node) {
+ return node.state() == Node.State.active;
+ }
+
private Map<ZoneId, List<Node>> nodesByZone() {
return controller().zoneRegistry()
.zones()
@@ -181,7 +200,8 @@ public class VCMRMaintainer extends ControllerMaintainer {
}
private Predicate<VespaChangeRequest> shouldUpdate() {
return changeRequest -> changeRequest.getStatus() != Status.COMPLETED &&
- List.of(ChangeRequest.Impact.HIGH, ChangeRequest.Impact.VERY_HIGH).contains(changeRequest.getImpact());
+ List.of(Impact.HIGH, Impact.VERY_HIGH)
+ .contains(changeRequest.getImpact());
}
private boolean hasSpareCapacity(ZoneId zoneId, List<Node> nodes) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
index bb1ff5c0189..fe241976d13 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
@@ -55,6 +55,7 @@ public class NodeRepositoryMock implements NodeRepository {
private final Map<ZoneId, List<NodeRepositoryNode>> nodeRepoNodes = new HashMap<>();
private boolean allowPatching = false;
+ private boolean hasSpareCapacity = false;
/** Add or update given nodes in zone */
public void putNodes(ZoneId zone, List<Node> nodes) {
@@ -161,8 +162,14 @@ public class NodeRepositoryMock implements NodeRepository {
}
@Override
- public void setState(ZoneId zone, NodeState nodeState, String hostname) {
- throw new UnsupportedOperationException();
+ public void setState(ZoneId zone, NodeState nodeState, String hostName) {
+ var existing = list(zone, List.of(HostName.from(hostName)));
+ if (existing.size() != 1) throw new IllegalArgumentException("Node " + hostName + " not found in " + zone);
+
+ var node = new Node.Builder(existing.get(0))
+ .state(Node.State.valueOf(nodeState.name()))
+ .build();
+ putNodes(zone, node);
}
@Override
@@ -277,11 +284,16 @@ public class NodeRepositoryMock implements NodeRepository {
List<Node> existing = list(zoneId, List.of(HostName.from(hostName)));
if (existing.size() != 1) throw new IllegalArgumentException("Node " + hostName + " not found in " + zoneId);
- // Note: Only supports switchHostname and modelName
- Node newNode = new Node.Builder(existing.get(0)).switchHostname(node.getSwitchHostname())
- .modelName(node.getModelName())
- .build();
- putNodes(zoneId, newNode);
+ // Note: Only supports switchHostname, modelName and wantToRetire
+ Node.Builder newNode = new Node.Builder(existing.get(0));
+ if (node.getSwitchHostname() != null)
+ newNode.switchHostname(node.getSwitchHostname());
+ if (node.getModelName() != null)
+ newNode.modelName(node.getModelName());
+ if (node.getWantToRetire() != null)
+ newNode.wantToRetire(node.getWantToRetire());
+
+ putNodes(zoneId, newNode.build());
}
@Override
@@ -291,7 +303,7 @@ public class NodeRepositoryMock implements NodeRepository {
@Override
public boolean isReplaceable(ZoneId zoneId, List<HostName> hostNames) {
- return false;
+ return hasSpareCapacity;
}
public Optional<Duration> osUpgradeBudget(ZoneId zone, NodeType type, Version version) {
@@ -341,4 +353,8 @@ public class NodeRepositoryMock implements NodeRepository {
return this;
}
+ public void hasSpareCapacity(boolean hasSpareCapacity) {
+ this.hasSpareCapacity = hasSpareCapacity;
+ }
+
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainerTest.java
new file mode 100644
index 00000000000..f5cea0eb391
--- /dev/null
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainerTest.java
@@ -0,0 +1,171 @@
+package com.yahoo.vespa.hosted.controller.maintenance;
+
+import com.yahoo.config.provision.HostName;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.zone.ZoneId;
+import com.yahoo.vespa.hosted.controller.ControllerTester;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
+import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest;
+import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequestSource;
+import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction;
+import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction.State;
+import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest;
+import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest.Status;
+import com.yahoo.vespa.hosted.controller.integration.NodeRepositoryMock;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.time.ZonedDateTime;
+import java.util.List;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author olaa
+ */
+public class VCMRMaintainerTest {
+
+ private final ControllerTester tester = new ControllerTester();
+ private final VCMRMaintainer maintainer = new VCMRMaintainer(tester.controller(), Duration.ofMinutes(1));
+ private final NodeRepositoryMock nodeRepo = tester.serviceRegistry().configServer().nodeRepository();
+ private final ZoneId zoneId = ZoneId.from("prod.us-east-3");
+ private final HostName host1 = HostName.from("host1");
+ private final HostName host2 = HostName.from("host2");
+ private final String changeRequestId = "id123";
+
+ @Test
+ public void recycle_hosts_after_completion() {
+ var parkedNode = createNode(host1, NodeType.host, Node.State.parked, true);
+ var failedNode = createNode(host2, NodeType.host, Node.State.failed, false);
+ nodeRepo.putNodes(zoneId, List.of(parkedNode, failedNode));
+
+ tester.curator().writeChangeRequest(canceledChangeRequest());
+ maintainer.maintain();
+
+ // Only the parked node is recycled
+ var nodeList = nodeRepo.list(zoneId, List.of(host1, host2));
+ assertEquals(Node.State.dirty, nodeList.get(0).state());
+ assertEquals(Node.State.failed, nodeList.get(1).state());
+ var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).get();
+ assertEquals(Status.COMPLETED, writtenChangeRequest.getStatus());
+ }
+
+ @Test
+ public void infrastructure_hosts_require_maunal_intervention() {
+ var configNode = createNode(host1, NodeType.config, Node.State.active, false);
+ var activeNode = createNode(host2, NodeType.host, Node.State.active, false);
+ nodeRepo.putNodes(zoneId, List.of(configNode, activeNode));
+ nodeRepo.hasSpareCapacity(true);
+
+ tester.curator().writeChangeRequest(openChangeRequest());
+ maintainer.maintain();
+
+ var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).get();
+ var configAction = writtenChangeRequest.getHostActionPlan().get(0);
+ var tenantHostAction = writtenChangeRequest.getHostActionPlan().get(1);
+ assertEquals(State.REQUIRES_OPERATOR_ACTION, configAction.getState());
+ assertEquals(State.PENDING_RETIREMENT, tenantHostAction.getState());
+ assertEquals(Status.REQUIRES_OPERATOR_ACTION, writtenChangeRequest.getStatus());
+ }
+
+ @Test
+ public void retires_hosts_when_near_vcmr() {
+ var activeNode = createNode(host1, NodeType.host, Node.State.active, false);
+ var failedNode = createNode(host2, NodeType.host, Node.State.failed, false);
+ nodeRepo.putNodes(zoneId, List.of(activeNode, failedNode));
+ nodeRepo.allowPatching(true).hasSpareCapacity(true);
+
+ tester.curator().writeChangeRequest(startingChangeRequest());
+ maintainer.maintain();
+
+ var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).orElseThrow();
+ var parkedNodeAction = writtenChangeRequest.getHostActionPlan().get(0);
+ var failedNodeAction = writtenChangeRequest.getHostActionPlan().get(1);
+ assertEquals(State.RETIRING, parkedNodeAction.getState());
+ assertEquals(State.NONE, failedNodeAction.getState());
+ assertEquals(Status.IN_PROGRESS, writtenChangeRequest.getStatus());
+
+ activeNode = nodeRepo.list(zoneId, List.of(activeNode.hostname())).get(0);
+ assertTrue(activeNode.wantToRetire());
+
+ }
+
+ @Test
+ public void no_spare_capacity_requires_operator_action() {
+ var activeNode = createNode(host1, NodeType.host, Node.State.active, false);
+ var failedNode = createNode(host2, NodeType.host, Node.State.failed, false);
+ nodeRepo.putNodes(zoneId, List.of(activeNode, failedNode));
+ nodeRepo.hasSpareCapacity(false);
+
+ tester.curator().writeChangeRequest(startingChangeRequest());
+ maintainer.maintain();
+
+ var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).orElseThrow();
+ var parkedNodeAction = writtenChangeRequest.getHostActionPlan().get(0);
+ var failedNodeAction = writtenChangeRequest.getHostActionPlan().get(1);
+ assertEquals(State.REQUIRES_OPERATOR_ACTION, parkedNodeAction.getState());
+ assertEquals(State.REQUIRES_OPERATOR_ACTION, failedNodeAction.getState());
+ assertEquals(Status.REQUIRES_OPERATOR_ACTION, writtenChangeRequest.getStatus());
+ }
+
+ @Test
+ public void updates_status_when_retiring_host_is_parked() {
+ var parkedNode = createNode(host1, NodeType.host, Node.State.parked, true);
+ nodeRepo.putNodes(zoneId, parkedNode);
+ nodeRepo.hasSpareCapacity(true);
+
+ tester.curator().writeChangeRequest(inProgressChangeRequest());
+ maintainer.maintain();
+
+ var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).orElseThrow();
+ var parkedNodeAction = writtenChangeRequest.getHostActionPlan().get(0);
+ assertEquals(State.RETIRED, parkedNodeAction.getState());
+ assertEquals(Status.IN_PROGRESS, writtenChangeRequest.getStatus());
+ }
+
+ private VespaChangeRequest canceledChangeRequest() {
+ return newChangeRequest(ChangeRequestSource.Status.CANCELED, State.RETIRED, State.RETIRING, ZonedDateTime.now());
+ }
+
+ private VespaChangeRequest openChangeRequest() {
+ return newChangeRequest(ChangeRequestSource.Status.WAITING_FOR_APPROVAL, State.NONE, State.NONE, ZonedDateTime.now().plus(Duration.ofDays(5L)));
+ }
+
+ private VespaChangeRequest startingChangeRequest() {
+ return newChangeRequest(ChangeRequestSource.Status.STARTED, State.PENDING_RETIREMENT, State.NONE, ZonedDateTime.now());
+ }
+
+ private VespaChangeRequest inProgressChangeRequest() {
+ return newChangeRequest(ChangeRequestSource.Status.STARTED, State.RETIRING, State.RETIRING, ZonedDateTime.now());
+ }
+
+
+ private VespaChangeRequest newChangeRequest(ChangeRequestSource.Status sourceStatus, State state1, State state2, ZonedDateTime startTime) {
+ var source = new ChangeRequestSource("aws", changeRequestId, "url", sourceStatus , startTime, ZonedDateTime.now());
+ var actionPlan = List.of(
+ new HostAction(host1.value(), state1, Instant.now()),
+ new HostAction(host2.value(), state2, Instant.now())
+ );
+ return new VespaChangeRequest(
+ changeRequestId,
+ source,
+ List.of("switch1"),
+ List.of("host1", "host2"),
+ ChangeRequest.Approval.APPROVED,
+ ChangeRequest.Impact.VERY_HIGH,
+ VespaChangeRequest.Status.IN_PROGRESS,
+ actionPlan,
+ ZoneId.from("prod.us-east-3")
+ );
+ }
+
+ private Node createNode(HostName hostname, NodeType nodeType, Node.State state, boolean wantToRetire) {
+ return new Node.Builder()
+ .hostname(hostname)
+ .type(nodeType)
+ .state(state)
+ .wantToRetire(wantToRetire)
+ .build();
+ }
+} \ No newline at end of file
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
index 44c464cc5e7..3cf79977fb8 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
@@ -91,6 +91,9 @@
"name": "Upgrader"
},
{
+ "name": "VCMRMaintainer"
+ },
+ {
"name": "VersionStatusUpdater"
}
],