summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorOla Aunrønning <olaa@verizonmedia.com>2020-01-09 09:55:00 +0100
committerGitHub <noreply@github.com>2020-01-09 09:55:00 +0100
commit461569468279dbe9012f263e26a1098b325fbe25 (patch)
tree2299808aa8e49aa4cea6246c5c925e7fd739f301 /controller-server
parent99c1d19f37cedd20901f06bf304ef3f69418eb69 (diff)
parent922b993d409c74543953dcff4b91297906b6fb05 (diff)
Merge pull request #11683 from vespa-engine/olaa/retire-nodes-with-event
Deprovision tenant hosts affected by cloud event
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java74
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java5
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java156
3 files changed, 219 insertions, 16 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
index eda3d0fc571..bd8faaed2e2 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
@@ -2,23 +2,28 @@
package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.config.provision.CloudName;
+import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.integration.aws.AwsEventFetcher;
import com.yahoo.vespa.hosted.controller.api.integration.aws.CloudEvent;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository;
import com.yahoo.vespa.hosted.controller.api.integration.organization.Issue;
import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueHandler;
import java.time.Duration;
import java.util.List;
-import java.util.Set;
+import java.util.Map;
+import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
/**
- * Automatically fetches scheduled events from AWS and submits issues detailing them.
- *
+ * Automatically fetches and handles scheduled events from AWS:
+ * 1. Deprovisions the affected hosts if applicable
+ * 2. Submits an issue detailing the event if some hosts are not processed by 1.
* @author mgimle
*/
public class CloudEventReporter extends Maintainer {
@@ -27,33 +32,70 @@ public class CloudEventReporter extends Maintainer {
private final IssueHandler issueHandler;
private final AwsEventFetcher eventFetcher;
- private final Set<String> awsRegions;
+ private final Map<String, List<ZoneApi>> zonesByCloudNativeRegion;
+ private final NodeRepository nodeRepository;
CloudEventReporter(Controller controller, Duration interval, JobControl jobControl) {
super(controller, interval, jobControl);
this.issueHandler = controller.serviceRegistry().issueHandler();
this.eventFetcher = controller.serviceRegistry().eventFetcherService();
- this.awsRegions = controller.zoneRegistry().zones()
- .ofCloud(CloudName.from("aws"))
- .reachable()
- .zones().stream()
- .map(ZoneApi::getCloudNativeRegionName)
- .collect(Collectors.toSet());
+ this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository();
+ this.zonesByCloudNativeRegion = getZonesByCloudNativeRegion();
}
@Override
protected void maintain() {
log.log(Level.INFO, "Fetching events for cloud hosts.");
- for (var awsRegion : awsRegions) {
+ for (var awsRegion : zonesByCloudNativeRegion.keySet()) {
List<CloudEvent> events = eventFetcher.getEvents(awsRegion);
for (var event : events) {
- Issue issue = eventFetcher.createIssue(event);
- if (!issueHandler.issueExists(issue)) {
- issueHandler.file(issue);
- log.log(Level.INFO, String.format("Filed an issue with the title '%s'", issue.summary()));
- }
+ List<String> deprovisionedHosts = deprovisionHosts(awsRegion, event);
+ submitIssue(event, deprovisionedHosts);
}
}
}
+ private List<String> deprovisionHosts(String awsRegion, CloudEvent event) {
+ return zonesByCloudNativeRegion.get(awsRegion)
+ .stream()
+ .flatMap(zone ->
+ nodeRepository.list(zone.getId())
+ .stream()
+ .filter(shouldDeprovisionHost(event))
+ .map(node -> {
+ if (!node.wantToDeprovision() || !node.wantToRetire())
+ log.info(String.format("Setting host %s to wantToRetire and wantToDeprovision", node.hostname().value()));
+ nodeRepository.retireAndDeprovision(zone.getId(), node.hostname().value());
+ return node.hostname().value();
+ })
+ )
+ .collect(Collectors.toList());
+ }
+
+ private void submitIssue(CloudEvent event, List<String> deprovisionedHosts) {
+ if (event.affectedInstances.size() == deprovisionedHosts.size())
+ return;
+ Issue issue = eventFetcher.createIssue(event);
+ if (!issueHandler.issueExists(issue)) {
+ issueHandler.file(issue);
+ log.log(Level.INFO, String.format("Filed an issue with the title '%s'", issue.summary()));
+ }
+ }
+
+ private Predicate<Node> shouldDeprovisionHost(CloudEvent event) {
+ return node ->
+ node.type() == NodeType.host &&
+ event.affectedInstances.stream()
+ .anyMatch(instance -> node.hostname().value().contains(instance));
+ }
+
+ private Map<String, List<ZoneApi>> getZonesByCloudNativeRegion() {
+ return controller().zoneRegistry().zones()
+ .ofCloud(CloudName.from("aws"))
+ .reachable()
+ .zones().stream()
+ .collect(Collectors.groupingBy(
+ ZoneApi::getCloudNativeRegionName
+ ));
+ }
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
index b05fef7c2ba..f99396b3b02 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
@@ -188,6 +188,11 @@ public class NodeRepositoryMock implements NodeRepository {
public void cancelFirmwareCheck(ZoneId zone) {
}
+ @Override
+ public void retireAndDeprovision(ZoneId zoneId, String hostName) {
+ nodeRepository.get(zoneId).remove(HostName.from(hostName));
+ }
+
public void doUpgrade(DeploymentId deployment, Optional<HostName> hostName, Version version) {
modifyNodes(deployment, hostName, node -> {
assert node.wantedVersion().equals(version);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java
new file mode 100644
index 00000000000..cd2a4fd8453
--- /dev/null
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java
@@ -0,0 +1,156 @@
+package com.yahoo.vespa.hosted.controller.maintenance;
+
+import com.yahoo.config.provision.HostName;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.zone.ZoneId;
+import com.yahoo.vespa.hosted.controller.ControllerTester;
+import com.yahoo.vespa.hosted.controller.api.integration.aws.CloudEvent;
+import com.yahoo.vespa.hosted.controller.api.integration.aws.MockAwsEventFetcher;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
+import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId;
+import com.yahoo.vespa.hosted.controller.api.integration.organization.MockIssueHandler;
+import com.yahoo.vespa.hosted.controller.integration.ZoneApiMock;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author olaa
+ */
+public class CloudEventReporterTest {
+
+ private ControllerTester tester = new ControllerTester();
+ private ZoneApiMock nonAwsZone = createZone("prod.zone3", "region-1", "other");
+ private ZoneApiMock awsZone1 = createZone("prod.zone1", "region-1", "aws");
+ private ZoneApiMock awsZone2 = createZone("prod.zone2", "region-2", "aws");
+
+
+ /**
+ * Test scenario:
+ * Consider three zones, two of which are based in AWS
+ * We want to test the following:
+ * 1. Non-AWS zone is completely ignored
+ * 2. Tenant hosts affected by cloud event are deprovisioned
+ * 3. Infrastructure hosts affected by cloud event are reported by IssueHandler
+ */
+ @Test
+ public void maintain() {
+ setUpZones();
+ CloudEventReporter cloudEventReporter = new CloudEventReporter(tester.controller(), Duration.ofMinutes(15), new JobControl(tester.curator()));
+
+ assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(nonAwsZone.getId()));
+ assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(awsZone1.getId()));
+ assertEquals(Set.of("host4.com", "host5.com", "confighost.com"), getHostnames(awsZone2.getId()));
+
+ mockEvents();
+ cloudEventReporter.maintain();
+
+ assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(nonAwsZone.getId()));
+ assertEquals(Set.of("host3.com"), getHostnames(awsZone1.getId()));
+ assertEquals(Set.of("host4.com", "confighost.com"), getHostnames(awsZone2.getId()));
+
+ Map<IssueId, MockIssueHandler.MockIssue> createdIssues = tester.serviceRegistry().issueHandler().issues();
+ assertEquals(1, createdIssues.size());
+ String description = createdIssues.get(IssueId.from("1")).issue().description();
+ assertTrue(description.contains("confighost"));
+
+ }
+
+ private void mockEvents() {
+ MockAwsEventFetcher mockAwsEventFetcher = (MockAwsEventFetcher)tester.controller().serviceRegistry().eventFetcherService();
+
+ Date date = new Date();
+ CloudEvent event1 = new CloudEvent("event 1",
+ "instance code",
+ "description",
+ date,
+ date,
+ date,
+ "region-1",
+ Set.of("host1", "host2"));
+
+ CloudEvent event2 = new CloudEvent("event 2",
+ "instance code",
+ "description",
+ date,
+ date,
+ date,
+ "region-2",
+ Set.of("host5", "confighost"));
+
+ mockAwsEventFetcher.addEvent("region-1", event1);
+ mockAwsEventFetcher.addEvent("region-2", event2);
+ }
+
+ private void setUpZones() {
+
+ tester.zoneRegistry().setZones(
+ nonAwsZone,
+ awsZone1,
+ awsZone2);
+
+ tester.configServer().nodeRepository().putByHostname(
+ nonAwsZone.getId(),
+ createNodesWithHostnames(
+ "host1.com",
+ "host2.com",
+ "host3.com"
+ )
+ );
+ tester.configServer().nodeRepository().putByHostname(
+ awsZone1.getId(),
+ createNodesWithHostnames(
+ "host1.com",
+ "host2.com",
+ "host3.com"
+ )
+ );
+ tester.configServer().nodeRepository().putByHostname(
+ awsZone2.getId(),
+ createNodesWithHostnames(
+ "host4.com",
+ "host5.com"
+ )
+ );
+ tester.configServer().nodeRepository().putByHostname(
+ awsZone2.getId(),
+ List.of(createNode("confighost.com", NodeType.confighost))
+ );
+ }
+
+ private List<Node> createNodesWithHostnames(String... hostnames) {
+ return Arrays.stream(hostnames)
+ .map(hostname -> createNode(hostname, NodeType.host))
+ .collect(Collectors.toUnmodifiableList());
+ }
+
+ private Node createNode(String hostname, NodeType nodeType) {
+ return new Node.Builder()
+ .hostname(HostName.from(hostname))
+ .type(nodeType)
+ .build();
+ }
+
+ private Set<String> getHostnames(ZoneId zoneId) {
+ return tester.configServer().nodeRepository().list(zoneId)
+ .stream()
+ .map(node -> node.hostname().value())
+ .collect(Collectors.toSet());
+ }
+
+ private ZoneApiMock createZone(String zoneId, String cloudNativeRegionName, String cloud) {
+ return ZoneApiMock.newBuilder().withId(zoneId)
+ .withCloudNativeRegionName(cloudNativeRegionName)
+ .withCloud(cloud)
+ .build();
+ }
+
+} \ No newline at end of file