diff options
author | Ola Aunrønning <olaa@verizonmedia.com> | 2020-01-09 09:55:00 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-09 09:55:00 +0100 |
commit | 461569468279dbe9012f263e26a1098b325fbe25 (patch) | |
tree | 2299808aa8e49aa4cea6246c5c925e7fd739f301 /controller-server | |
parent | 99c1d19f37cedd20901f06bf304ef3f69418eb69 (diff) | |
parent | 922b993d409c74543953dcff4b91297906b6fb05 (diff) |
Merge pull request #11683 from vespa-engine/olaa/retire-nodes-with-event
Deprovision tenant hosts affected by cloud event
Diffstat (limited to 'controller-server')
3 files changed, 219 insertions, 16 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java index eda3d0fc571..bd8faaed2e2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java @@ -2,23 +2,28 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.aws.AwsEventFetcher; import com.yahoo.vespa.hosted.controller.api.integration.aws.CloudEvent; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository; import com.yahoo.vespa.hosted.controller.api.integration.organization.Issue; import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueHandler; import java.time.Duration; import java.util.List; -import java.util.Set; +import java.util.Map; +import java.util.function.Predicate; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; /** - * Automatically fetches scheduled events from AWS and submits issues detailing them. - * + * Automatically fetches and handles scheduled events from AWS: + * 1. Deprovisions the affected hosts if applicable + * 2. Submits an issue detailing the event if some hosts are not processed by 1. * @author mgimle */ public class CloudEventReporter extends Maintainer { @@ -27,33 +32,70 @@ public class CloudEventReporter extends Maintainer { private final IssueHandler issueHandler; private final AwsEventFetcher eventFetcher; - private final Set<String> awsRegions; + private final Map<String, List<ZoneApi>> zonesByCloudNativeRegion; + private final NodeRepository nodeRepository; CloudEventReporter(Controller controller, Duration interval, JobControl jobControl) { super(controller, interval, jobControl); this.issueHandler = controller.serviceRegistry().issueHandler(); this.eventFetcher = controller.serviceRegistry().eventFetcherService(); - this.awsRegions = controller.zoneRegistry().zones() - .ofCloud(CloudName.from("aws")) - .reachable() - .zones().stream() - .map(ZoneApi::getCloudNativeRegionName) - .collect(Collectors.toSet()); + this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); + this.zonesByCloudNativeRegion = getZonesByCloudNativeRegion(); } @Override protected void maintain() { log.log(Level.INFO, "Fetching events for cloud hosts."); - for (var awsRegion : awsRegions) { + for (var awsRegion : zonesByCloudNativeRegion.keySet()) { List<CloudEvent> events = eventFetcher.getEvents(awsRegion); for (var event : events) { - Issue issue = eventFetcher.createIssue(event); - if (!issueHandler.issueExists(issue)) { - issueHandler.file(issue); - log.log(Level.INFO, String.format("Filed an issue with the title '%s'", issue.summary())); - } + List<String> deprovisionedHosts = deprovisionHosts(awsRegion, event); + submitIssue(event, deprovisionedHosts); } } } + private List<String> deprovisionHosts(String awsRegion, CloudEvent event) { + return zonesByCloudNativeRegion.get(awsRegion) + .stream() + .flatMap(zone -> + nodeRepository.list(zone.getId()) + .stream() + .filter(shouldDeprovisionHost(event)) + .map(node -> { + if (!node.wantToDeprovision() || !node.wantToRetire()) + log.info(String.format("Setting host %s to wantToRetire and wantToDeprovision", node.hostname().value())); + nodeRepository.retireAndDeprovision(zone.getId(), node.hostname().value()); + return node.hostname().value(); + }) + ) + .collect(Collectors.toList()); + } + + private void submitIssue(CloudEvent event, List<String> deprovisionedHosts) { + if (event.affectedInstances.size() == deprovisionedHosts.size()) + return; + Issue issue = eventFetcher.createIssue(event); + if (!issueHandler.issueExists(issue)) { + issueHandler.file(issue); + log.log(Level.INFO, String.format("Filed an issue with the title '%s'", issue.summary())); + } + } + + private Predicate<Node> shouldDeprovisionHost(CloudEvent event) { + return node -> + node.type() == NodeType.host && + event.affectedInstances.stream() + .anyMatch(instance -> node.hostname().value().contains(instance)); + } + + private Map<String, List<ZoneApi>> getZonesByCloudNativeRegion() { + return controller().zoneRegistry().zones() + .ofCloud(CloudName.from("aws")) + .reachable() + .zones().stream() + .collect(Collectors.groupingBy( + ZoneApi::getCloudNativeRegionName + )); + } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java index b05fef7c2ba..f99396b3b02 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java @@ -188,6 +188,11 @@ public class NodeRepositoryMock implements NodeRepository { public void cancelFirmwareCheck(ZoneId zone) { } + @Override + public void retireAndDeprovision(ZoneId zoneId, String hostName) { + nodeRepository.get(zoneId).remove(HostName.from(hostName)); + } + public void doUpgrade(DeploymentId deployment, Optional<HostName> hostName, Version version) { modifyNodes(deployment, hostName, node -> { assert node.wantedVersion().equals(version); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java new file mode 100644 index 00000000000..cd2a4fd8453 --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java @@ -0,0 +1,156 @@ +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.config.provision.HostName; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.vespa.hosted.controller.ControllerTester; +import com.yahoo.vespa.hosted.controller.api.integration.aws.CloudEvent; +import com.yahoo.vespa.hosted.controller.api.integration.aws.MockAwsEventFetcher; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; +import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId; +import com.yahoo.vespa.hosted.controller.api.integration.organization.MockIssueHandler; +import com.yahoo.vespa.hosted.controller.integration.ZoneApiMock; +import org.junit.Test; + +import java.time.Duration; +import java.util.Arrays; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.junit.Assert.*; + +/** + * @author olaa + */ +public class CloudEventReporterTest { + + private ControllerTester tester = new ControllerTester(); + private ZoneApiMock nonAwsZone = createZone("prod.zone3", "region-1", "other"); + private ZoneApiMock awsZone1 = createZone("prod.zone1", "region-1", "aws"); + private ZoneApiMock awsZone2 = createZone("prod.zone2", "region-2", "aws"); + + + /** + * Test scenario: + * Consider three zones, two of which are based in AWS + * We want to test the following: + * 1. Non-AWS zone is completely ignored + * 2. Tenant hosts affected by cloud event are deprovisioned + * 3. Infrastructure hosts affected by cloud event are reported by IssueHandler + */ + @Test + public void maintain() { + setUpZones(); + CloudEventReporter cloudEventReporter = new CloudEventReporter(tester.controller(), Duration.ofMinutes(15), new JobControl(tester.curator())); + + assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(nonAwsZone.getId())); + assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(awsZone1.getId())); + assertEquals(Set.of("host4.com", "host5.com", "confighost.com"), getHostnames(awsZone2.getId())); + + mockEvents(); + cloudEventReporter.maintain(); + + assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(nonAwsZone.getId())); + assertEquals(Set.of("host3.com"), getHostnames(awsZone1.getId())); + assertEquals(Set.of("host4.com", "confighost.com"), getHostnames(awsZone2.getId())); + + Map<IssueId, MockIssueHandler.MockIssue> createdIssues = tester.serviceRegistry().issueHandler().issues(); + assertEquals(1, createdIssues.size()); + String description = createdIssues.get(IssueId.from("1")).issue().description(); + assertTrue(description.contains("confighost")); + + } + + private void mockEvents() { + MockAwsEventFetcher mockAwsEventFetcher = (MockAwsEventFetcher)tester.controller().serviceRegistry().eventFetcherService(); + + Date date = new Date(); + CloudEvent event1 = new CloudEvent("event 1", + "instance code", + "description", + date, + date, + date, + "region-1", + Set.of("host1", "host2")); + + CloudEvent event2 = new CloudEvent("event 2", + "instance code", + "description", + date, + date, + date, + "region-2", + Set.of("host5", "confighost")); + + mockAwsEventFetcher.addEvent("region-1", event1); + mockAwsEventFetcher.addEvent("region-2", event2); + } + + private void setUpZones() { + + tester.zoneRegistry().setZones( + nonAwsZone, + awsZone1, + awsZone2); + + tester.configServer().nodeRepository().putByHostname( + nonAwsZone.getId(), + createNodesWithHostnames( + "host1.com", + "host2.com", + "host3.com" + ) + ); + tester.configServer().nodeRepository().putByHostname( + awsZone1.getId(), + createNodesWithHostnames( + "host1.com", + "host2.com", + "host3.com" + ) + ); + tester.configServer().nodeRepository().putByHostname( + awsZone2.getId(), + createNodesWithHostnames( + "host4.com", + "host5.com" + ) + ); + tester.configServer().nodeRepository().putByHostname( + awsZone2.getId(), + List.of(createNode("confighost.com", NodeType.confighost)) + ); + } + + private List<Node> createNodesWithHostnames(String... hostnames) { + return Arrays.stream(hostnames) + .map(hostname -> createNode(hostname, NodeType.host)) + .collect(Collectors.toUnmodifiableList()); + } + + private Node createNode(String hostname, NodeType nodeType) { + return new Node.Builder() + .hostname(HostName.from(hostname)) + .type(nodeType) + .build(); + } + + private Set<String> getHostnames(ZoneId zoneId) { + return tester.configServer().nodeRepository().list(zoneId) + .stream() + .map(node -> node.hostname().value()) + .collect(Collectors.toSet()); + } + + private ZoneApiMock createZone(String zoneId, String cloudNativeRegionName, String cloud) { + return ZoneApiMock.newBuilder().withId(zoneId) + .withCloudNativeRegionName(cloudNativeRegionName) + .withCloud(cloud) + .build(); + } + +}
\ No newline at end of file |