diff options
author | Ola Aunrønning <olaa@verizonmedia.com> | 2020-11-06 19:25:00 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-06 19:25:00 +0100 |
commit | 8905185c86ea99afb2d05066d1b093a4a6081893 (patch) | |
tree | 627c5f8500bebe7ac11725915f1ac07bda4dbf9d | |
parent | 846feef93e16dd971e15a4c98e391948d50b5d27 (diff) | |
parent | 6dc92b5b36bec70f143453a6e7a924a663b42cff (diff) |
Merge pull request #15210 from vespa-engine/olaa/maintain-all-breakfixed-nodes
Maintain all nodes in breakfixed state
7 files changed, 43 insertions, 47 deletions
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java index 1130ee706b4..8fd294f64f8 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java @@ -9,9 +9,12 @@ import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.TenantName; +import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeHistory; import java.time.Instant; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; @@ -53,6 +56,7 @@ public class Node { private final Optional<TenantName> reservedTo; private final Optional<ApplicationId> exclusiveTo; private final Map<String, JsonNode> reports; + private final List<NodeHistory> history; public Node(HostName hostname, Optional<HostName> parentHostname, State state, NodeType type, NodeResources resources, Optional<ApplicationId> owner, Version currentVersion, Version wantedVersion, Version currentOsVersion, Version wantedOsVersion, @@ -60,7 +64,7 @@ public class Node { Optional<Instant> suspendedSince, long restartGeneration, long wantedRestartGeneration, long rebootGeneration, long wantedRebootGeneration, int cost, String flavor, String clusterId, ClusterType clusterType, boolean wantToRetire, boolean wantToDeprovision, Optional<TenantName> reservedTo, Optional<ApplicationId> exclusiveTo, - DockerImage wantedDockerImage, DockerImage currentDockerImage, Map<String, JsonNode> reports) { + DockerImage wantedDockerImage, DockerImage currentDockerImage, Map<String, JsonNode> reports, List<NodeHistory> history) { this.hostname = hostname; this.parentHostname = parentHostname; this.state = state; @@ -90,6 +94,7 @@ public class Node { this.wantedDockerImage = wantedDockerImage; this.currentDockerImage = currentDockerImage; this.reports = reports; + this.history = history; } public HostName hostname() { @@ -202,6 +207,10 @@ public class Node { return reports; } + public List<NodeHistory> history() { + return history; + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -275,6 +284,7 @@ public class Node { private Optional<TenantName> reservedTo = Optional.empty(); private Optional<ApplicationId> exclusiveTo = Optional.empty(); private Map<String, JsonNode> reports = new HashMap<>(); + private List<NodeHistory> history = new ArrayList<>(); public Builder() { } @@ -308,6 +318,7 @@ public class Node { this.reservedTo = node.reservedTo; this.exclusiveTo = node.exclusiveTo; this.reports = node.reports; + this.history = node.history; } public Builder hostname(HostName hostname) { @@ -450,12 +461,17 @@ public class Node { return this; } + public Builder history(List<NodeHistory> history) { + this.history = history; + return this; + } + public Node build() { return new Node(hostname, parentHostname, state, type, resources, owner, currentVersion, wantedVersion, currentOsVersion, wantedOsVersion, currentFirmwareCheck, wantedFirmwareCheck, serviceState, suspendedSince, restartGeneration, wantedRestartGeneration, rebootGeneration, wantedRebootGeneration, cost, flavor, clusterId, clusterType, wantToRetire, wantToDeprovision, reservedTo, exclusiveTo, - wantedDockerImage, currentDockerImage, reports); + wantedDockerImage, currentDockerImage, reports, history); } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java index df6a6d5070d..ca8af48e4fd 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java @@ -134,7 +134,8 @@ public interface NodeRepository { Optional.ofNullable(node.getExclusiveTo()).map(ApplicationId::fromSerializedForm), dockerImageFrom(node.getWantedDockerImage()), dockerImageFrom(node.getCurrentDockerImage()), - node.getReports()); + node.getReports(), + node.getHistory()); } private static String clusterIdOf(NodeMembership nodeMembership) { diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeRepositoryNode.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeRepositoryNode.java index a21e7a3b3fd..7bb47185751 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeRepositoryNode.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeRepositoryNode.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.JsonNode; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.Set; @@ -79,7 +80,7 @@ public class NodeRepositoryNode { @JsonProperty("cost") private Integer cost; @JsonProperty("history") - private NodeHistory[] history; + private List<NodeHistory> history; @JsonProperty("allowedToBeDown") private Boolean allowedToBeDown; @JsonProperty("suspendedSinceMillis") @@ -307,11 +308,11 @@ public class NodeRepositoryNode { this.cost = cost; } - public NodeHistory[] getHistory() { + public List<NodeHistory> getHistory() { return history; } - public void setHistory(NodeHistory[] history) { + public void setHistory(List<NodeHistory> history) { this.history = history; } @@ -419,7 +420,7 @@ public class NodeRepositoryNode { ", wantToRetire=" + wantToRetire + ", wantToDeprovision=" + wantToDeprovision + ", cost=" + cost + - ", history=" + Arrays.toString(history) + + ", history=" + history + ", allowedToBeDown=" + allowedToBeDown + ", reports=" + reports + ", modelName=" + modelName + diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/repair/HostRepairClient.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/repair/HostRepairClient.java index a4a5a773cb9..c3fa0890cbb 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/repair/HostRepairClient.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/repair/HostRepairClient.java @@ -15,7 +15,7 @@ import java.util.Map; public interface HostRepairClient { /* Checks current ticket status and takes appropriate action */ - void updateRepairStatus(ZoneApi zone, Map<Node, RepairTicketReport> nodes); + void updateRepairStatus(ZoneApi zone, List<Node> nodes); /* Creates reparation ticket for given host. Returns ticket number */ String createTicket(HostName hostname, String colo, ZoneId zoneId, String description, String category); diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/repair/MockRepairClient.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/repair/MockRepairClient.java index 6ceceda5712..7a4398d69bb 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/repair/MockRepairClient.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/repair/MockRepairClient.java @@ -18,8 +18,8 @@ public class MockRepairClient implements HostRepairClient { List<Node> updatedNodes = new ArrayList<>(); @Override - public void updateRepairStatus(ZoneApi zone, Map<Node, RepairTicketReport> nodes) { - updatedNodes.addAll(nodes.keySet()); + public void updateRepairStatus(ZoneApi zone, List<Node> nodes) { + updatedNodes.addAll(nodes); } @Override diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostRepairMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostRepairMaintainer.java index e3c6862384f..57727e64e30 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostRepairMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostRepairMaintainer.java @@ -47,15 +47,12 @@ public class HostRepairMaintainer extends ControllerMaintainer { controller().zoneRegistry().zones() .reachable().zones().stream() .forEach(zoneApi -> { - var nodeTicketMap = nodeRepository.list((zoneApi).getId()) + var breakfixedNodes = nodeRepository.list((zoneApi).getId()) .stream() - .filter(this::hasOpenTicket) - .collect(Collectors.toMap( - node -> node, - this::getTicketReport) - ); + .filter(node -> node.state() == Node.State.breakfixed) + .collect(Collectors.toList()); try { - repairClient.updateRepairStatus(zoneApi, nodeTicketMap); + repairClient.updateRepairStatus(zoneApi, breakfixedNodes); } catch (Exception e) { log.warning("Failed to update repair status; " + Exceptions.toMessageString(e)); exceptions.incrementAndGet(); @@ -66,16 +63,4 @@ public class HostRepairMaintainer extends ControllerMaintainer { return exceptions.get() == 0; } - - private boolean hasOpenTicket(Node node) { - var reports = node.reports(); - if (!reports.containsKey(RepairTicketReport.getReportId())) { - return false; - } - return "OPEN".equals(getTicketReport(node).getStatus()); - } - - private RepairTicketReport getTicketReport(Node node) { - return uncheck(() -> RepairTicketReport.fromJsonNode(node.reports().get(RepairTicketReport.getReportId()))); - } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/HostRepairMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/HostRepairMaintainerTest.java index 556755581fe..ab6e13bb5a2 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/HostRepairMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/HostRepairMaintainerTest.java @@ -3,10 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.hosted.controller.ControllerTester; -import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeRepositoryNode; -import com.yahoo.vespa.hosted.controller.api.integration.repair.HostRepairClient; -import com.yahoo.vespa.hosted.controller.api.integration.repair.MockRepairClient; -import com.yahoo.vespa.hosted.controller.api.integration.repair.RepairTicketReport; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; import org.junit.Test; import java.time.Duration; @@ -29,23 +26,19 @@ public class HostRepairMaintainerTest { var hostname1 = HostName.from("node-1-tenant-host-dev.us-east-1"); var hostname2 = HostName.from("node-2-tenant-host-dev.us-east-1"); var timestamp = Instant.now().toEpochMilli(); - var openTicket = new RepairTicketReport("OPEN", "ticket-1", timestamp, timestamp); - var closedTicket = new RepairTicketReport("CLOSED", "ticket-2", timestamp, timestamp); - - tester.configServer().nodeRepository().addReport( - zoneId, - hostname1, - RepairTicketReport.getReportId(), - openTicket.toJsonNode()); - tester.configServer().nodeRepository().addReport( - zoneId, - hostname2, - RepairTicketReport.getReportId(), - closedTicket.toJsonNode()); + var node1 = new Node.Builder() + .state(Node.State.active) + .hostname(hostname1) + .build(); + var node2 = new Node.Builder() + .state(Node.State.breakfixed) + .hostname(hostname2) + .build(); + tester.configServer().nodeRepository().putNodes(zoneId, List.of(node1, node2)); maintainer.maintain(); var updatedNodes = tester.serviceRegistry().hostRepairClient().getUpdatedNodes(); assertEquals(1, updatedNodes.size()); - assertEquals(hostname1, updatedNodes.get(0).hostname()); + assertEquals(hostname2, updatedNodes.get(0).hostname()); } }
\ No newline at end of file |