diff options
7 files changed, 268 insertions, 24 deletions
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/CloudEvent.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/CloudEvent.java index a7c8a680b73..defcda28a0f 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/CloudEvent.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/CloudEvent.java @@ -14,10 +14,10 @@ public final class CloudEvent { public final Optional<Date> notAfter; public String awsRegionName; - public Set<String> affectedHostnames; + public Set<String> affectedInstances; public CloudEvent(String instanceEventId, String code, String description, Date notAfter, Date notBefore, Date notBeforeDeadline, - String awsRegionName, Set<String> affectedHostnames) { + String awsRegionName, Set<String> affectedInstances) { this.instanceEventId = instanceEventId; this.code = code; this.description = description; @@ -26,6 +26,6 @@ public final class CloudEvent { this.notAfter = Optional.ofNullable(notAfter); this.awsRegionName = awsRegionName; - this.affectedHostnames = affectedHostnames; + this.affectedInstances = affectedInstances; } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/MockAwsEventFetcher.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/MockAwsEventFetcher.java index 79b332c093a..baf248fc31c 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/MockAwsEventFetcher.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/MockAwsEventFetcher.java @@ -2,18 +2,29 @@ package com.yahoo.vespa.hosted.controller.api.integration.aws; import com.yahoo.vespa.hosted.controller.api.integration.organization.Issue; +import com.yahoo.vespa.hosted.controller.api.integration.organization.User; import java.util.List; +import java.util.Map; +import java.util.HashMap; +import java.util.ArrayList; import java.util.Optional; public class MockAwsEventFetcher implements AwsEventFetcher { + + private Map<String, List<CloudEvent>> mockedEvents = new HashMap<>(); + @Override public List<CloudEvent> getEvents(String awsRegionName) { - return List.of(); + return mockedEvents.getOrDefault(awsRegionName, new ArrayList<>()); } @Override public Issue createIssue(CloudEvent event) { - return new Issue("summary", "description", "VESPA", Optional.empty()); + return new Issue("summary", event.affectedInstances.toString(), "VESPA", Optional.empty()).with(User.from(event.awsRegionName)); + } + + public void addEvent(String awsRegionName, CloudEvent cloudEvent) { + mockedEvents.computeIfAbsent(awsRegionName, i -> new ArrayList<>()).add(cloudEvent); } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java index 43fea2b76fd..8bb5775566a 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java @@ -37,11 +37,13 @@ public class Node { private final String flavor; private final String clusterId; private final ClusterType clusterType; + private final boolean wantToRetire; + private final boolean wantToDeprovision; public Node(HostName hostname, Optional<HostName> parentHostname, State state, NodeType type, NodeResources resources, Optional<ApplicationId> owner, Version currentVersion, Version wantedVersion, Version currentOsVersion, Version wantedOsVersion, ServiceState serviceState, long restartGeneration, long wantedRestartGeneration, long rebootGeneration, long wantedRebootGeneration, - int cost, String flavor, String clusterId, ClusterType clusterType) { + int cost, String flavor, String clusterId, ClusterType clusterType, boolean wantToRetire, boolean wantToDeprovision) { this.hostname = hostname; this.parentHostname = parentHostname; this.state = state; @@ -61,6 +63,8 @@ public class Node { this.flavor = flavor; this.clusterId = clusterId; this.clusterType = clusterType; + this.wantToRetire = wantToRetire; + this.wantToDeprovision = wantToDeprovision; } public HostName hostname() { @@ -137,6 +141,14 @@ public class Node { return clusterType; } + public boolean wantToRetire() { + return wantToRetire; + } + + public boolean wantToDeprovision() { + return wantToDeprovision; + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -198,6 +210,8 @@ public class Node { private String flavor; private String clusterId; private ClusterType clusterType; + private boolean wantToRetire; + private boolean wantToDeprovision; public Builder() { } @@ -221,6 +235,8 @@ public class Node { this.flavor = node.flavor; this.clusterId = node.clusterId; this.clusterType = node.clusterType; + this.wantToRetire = node.wantToRetire; + this.wantToDeprovision = node.wantToDeprovision; } public Builder hostname(HostName hostname) { @@ -318,10 +334,20 @@ public class Node { return this; } + public Builder wantToRetire(boolean wantToRetire) { + this.wantToRetire = wantToRetire; + return this; + } + + public Builder wantToDeprovision(boolean wantToDeprovision) { + this.wantToDeprovision = wantToDeprovision; + return this; + } + public Node build() { return new Node(hostname, parentHostname, state, type, resources, owner, currentVersion, wantedVersion, currentOsVersion, wantedOsVersion, serviceState, restartGeneration, wantedRestartGeneration, rebootGeneration, wantedRebootGeneration, - cost, flavor, clusterId, clusterType); + cost, flavor, clusterId, clusterType, wantToRetire, wantToDeprovision); } } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java index 94616fd27b2..dd99bef5ee2 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java @@ -73,6 +73,8 @@ public interface NodeRepository { /** Cancels firmware checks on all hosts in the given zone. */ void cancelFirmwareCheck(ZoneId zone); + void retireAndDeprovision(ZoneId zoneId, String hostName); + private static Node toNode(NodeRepositoryNode node) { var application = Optional.ofNullable(node.getOwner()) .map(owner -> ApplicationId.from(owner.getTenant(), owner.getApplication(), @@ -103,7 +105,9 @@ public interface NodeRepository { toInt(node.getCost()), node.getFlavor(), clusterIdOf(node.getMembership()), - clusterTypeOf(node.getMembership())); + clusterTypeOf(node.getMembership()), + node.getWantToRetire(), + node.getWantToDeprovision()); } private static String clusterIdOf(NodeMembership nodeMembership) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java index eda3d0fc571..bd8faaed2e2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java @@ -2,23 +2,28 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.aws.AwsEventFetcher; import com.yahoo.vespa.hosted.controller.api.integration.aws.CloudEvent; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository; import com.yahoo.vespa.hosted.controller.api.integration.organization.Issue; import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueHandler; import java.time.Duration; import java.util.List; -import java.util.Set; +import java.util.Map; +import java.util.function.Predicate; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; /** - * Automatically fetches scheduled events from AWS and submits issues detailing them. - * + * Automatically fetches and handles scheduled events from AWS: + * 1. Deprovisions the affected hosts if applicable + * 2. Submits an issue detailing the event if some hosts are not processed by 1. * @author mgimle */ public class CloudEventReporter extends Maintainer { @@ -27,33 +32,70 @@ public class CloudEventReporter extends Maintainer { private final IssueHandler issueHandler; private final AwsEventFetcher eventFetcher; - private final Set<String> awsRegions; + private final Map<String, List<ZoneApi>> zonesByCloudNativeRegion; + private final NodeRepository nodeRepository; CloudEventReporter(Controller controller, Duration interval, JobControl jobControl) { super(controller, interval, jobControl); this.issueHandler = controller.serviceRegistry().issueHandler(); this.eventFetcher = controller.serviceRegistry().eventFetcherService(); - this.awsRegions = controller.zoneRegistry().zones() - .ofCloud(CloudName.from("aws")) - .reachable() - .zones().stream() - .map(ZoneApi::getCloudNativeRegionName) - .collect(Collectors.toSet()); + this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); + this.zonesByCloudNativeRegion = getZonesByCloudNativeRegion(); } @Override protected void maintain() { log.log(Level.INFO, "Fetching events for cloud hosts."); - for (var awsRegion : awsRegions) { + for (var awsRegion : zonesByCloudNativeRegion.keySet()) { List<CloudEvent> events = eventFetcher.getEvents(awsRegion); for (var event : events) { - Issue issue = eventFetcher.createIssue(event); - if (!issueHandler.issueExists(issue)) { - issueHandler.file(issue); - log.log(Level.INFO, String.format("Filed an issue with the title '%s'", issue.summary())); - } + List<String> deprovisionedHosts = deprovisionHosts(awsRegion, event); + submitIssue(event, deprovisionedHosts); } } } + private List<String> deprovisionHosts(String awsRegion, CloudEvent event) { + return zonesByCloudNativeRegion.get(awsRegion) + .stream() + .flatMap(zone -> + nodeRepository.list(zone.getId()) + .stream() + .filter(shouldDeprovisionHost(event)) + .map(node -> { + if (!node.wantToDeprovision() || !node.wantToRetire()) + log.info(String.format("Setting host %s to wantToRetire and wantToDeprovision", node.hostname().value())); + nodeRepository.retireAndDeprovision(zone.getId(), node.hostname().value()); + return node.hostname().value(); + }) + ) + .collect(Collectors.toList()); + } + + private void submitIssue(CloudEvent event, List<String> deprovisionedHosts) { + if (event.affectedInstances.size() == deprovisionedHosts.size()) + return; + Issue issue = eventFetcher.createIssue(event); + if (!issueHandler.issueExists(issue)) { + issueHandler.file(issue); + log.log(Level.INFO, String.format("Filed an issue with the title '%s'", issue.summary())); + } + } + + private Predicate<Node> shouldDeprovisionHost(CloudEvent event) { + return node -> + node.type() == NodeType.host && + event.affectedInstances.stream() + .anyMatch(instance -> node.hostname().value().contains(instance)); + } + + private Map<String, List<ZoneApi>> getZonesByCloudNativeRegion() { + return controller().zoneRegistry().zones() + .ofCloud(CloudName.from("aws")) + .reachable() + .zones().stream() + .collect(Collectors.groupingBy( + ZoneApi::getCloudNativeRegionName + )); + } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java index b05fef7c2ba..f99396b3b02 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java @@ -188,6 +188,11 @@ public class NodeRepositoryMock implements NodeRepository { public void cancelFirmwareCheck(ZoneId zone) { } + @Override + public void retireAndDeprovision(ZoneId zoneId, String hostName) { + nodeRepository.get(zoneId).remove(HostName.from(hostName)); + } + public void doUpgrade(DeploymentId deployment, Optional<HostName> hostName, Version version) { modifyNodes(deployment, hostName, node -> { assert node.wantedVersion().equals(version); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java new file mode 100644 index 00000000000..cd2a4fd8453 --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java @@ -0,0 +1,156 @@ +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.config.provision.HostName; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.vespa.hosted.controller.ControllerTester; +import com.yahoo.vespa.hosted.controller.api.integration.aws.CloudEvent; +import com.yahoo.vespa.hosted.controller.api.integration.aws.MockAwsEventFetcher; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; +import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId; +import com.yahoo.vespa.hosted.controller.api.integration.organization.MockIssueHandler; +import com.yahoo.vespa.hosted.controller.integration.ZoneApiMock; +import org.junit.Test; + +import java.time.Duration; +import java.util.Arrays; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.junit.Assert.*; + +/** + * @author olaa + */ +public class CloudEventReporterTest { + + private ControllerTester tester = new ControllerTester(); + private ZoneApiMock nonAwsZone = createZone("prod.zone3", "region-1", "other"); + private ZoneApiMock awsZone1 = createZone("prod.zone1", "region-1", "aws"); + private ZoneApiMock awsZone2 = createZone("prod.zone2", "region-2", "aws"); + + + /** + * Test scenario: + * Consider three zones, two of which are based in AWS + * We want to test the following: + * 1. Non-AWS zone is completely ignored + * 2. Tenant hosts affected by cloud event are deprovisioned + * 3. Infrastructure hosts affected by cloud event are reported by IssueHandler + */ + @Test + public void maintain() { + setUpZones(); + CloudEventReporter cloudEventReporter = new CloudEventReporter(tester.controller(), Duration.ofMinutes(15), new JobControl(tester.curator())); + + assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(nonAwsZone.getId())); + assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(awsZone1.getId())); + assertEquals(Set.of("host4.com", "host5.com", "confighost.com"), getHostnames(awsZone2.getId())); + + mockEvents(); + cloudEventReporter.maintain(); + + assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(nonAwsZone.getId())); + assertEquals(Set.of("host3.com"), getHostnames(awsZone1.getId())); + assertEquals(Set.of("host4.com", "confighost.com"), getHostnames(awsZone2.getId())); + + Map<IssueId, MockIssueHandler.MockIssue> createdIssues = tester.serviceRegistry().issueHandler().issues(); + assertEquals(1, createdIssues.size()); + String description = createdIssues.get(IssueId.from("1")).issue().description(); + assertTrue(description.contains("confighost")); + + } + + private void mockEvents() { + MockAwsEventFetcher mockAwsEventFetcher = (MockAwsEventFetcher)tester.controller().serviceRegistry().eventFetcherService(); + + Date date = new Date(); + CloudEvent event1 = new CloudEvent("event 1", + "instance code", + "description", + date, + date, + date, + "region-1", + Set.of("host1", "host2")); + + CloudEvent event2 = new CloudEvent("event 2", + "instance code", + "description", + date, + date, + date, + "region-2", + Set.of("host5", "confighost")); + + mockAwsEventFetcher.addEvent("region-1", event1); + mockAwsEventFetcher.addEvent("region-2", event2); + } + + private void setUpZones() { + + tester.zoneRegistry().setZones( + nonAwsZone, + awsZone1, + awsZone2); + + tester.configServer().nodeRepository().putByHostname( + nonAwsZone.getId(), + createNodesWithHostnames( + "host1.com", + "host2.com", + "host3.com" + ) + ); + tester.configServer().nodeRepository().putByHostname( + awsZone1.getId(), + createNodesWithHostnames( + "host1.com", + "host2.com", + "host3.com" + ) + ); + tester.configServer().nodeRepository().putByHostname( + awsZone2.getId(), + createNodesWithHostnames( + "host4.com", + "host5.com" + ) + ); + tester.configServer().nodeRepository().putByHostname( + awsZone2.getId(), + List.of(createNode("confighost.com", NodeType.confighost)) + ); + } + + private List<Node> createNodesWithHostnames(String... hostnames) { + return Arrays.stream(hostnames) + .map(hostname -> createNode(hostname, NodeType.host)) + .collect(Collectors.toUnmodifiableList()); + } + + private Node createNode(String hostname, NodeType nodeType) { + return new Node.Builder() + .hostname(HostName.from(hostname)) + .type(nodeType) + .build(); + } + + private Set<String> getHostnames(ZoneId zoneId) { + return tester.configServer().nodeRepository().list(zoneId) + .stream() + .map(node -> node.hostname().value()) + .collect(Collectors.toSet()); + } + + private ZoneApiMock createZone(String zoneId, String cloudNativeRegionName, String cloud) { + return ZoneApiMock.newBuilder().withId(zoneId) + .withCloudNativeRegionName(cloudNativeRegionName) + .withCloud(cloud) + .build(); + } + +}
\ No newline at end of file |