summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/CloudEvent.java6
-rw-r--r--controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/MockAwsEventFetcher.java15
-rw-r--r--controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java30
-rw-r--r--controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java74
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java5
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java156
7 files changed, 268 insertions, 24 deletions
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/CloudEvent.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/CloudEvent.java
index a7c8a680b73..defcda28a0f 100644
--- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/CloudEvent.java
+++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/CloudEvent.java
@@ -14,10 +14,10 @@ public final class CloudEvent {
public final Optional<Date> notAfter;
public String awsRegionName;
- public Set<String> affectedHostnames;
+ public Set<String> affectedInstances;
public CloudEvent(String instanceEventId, String code, String description, Date notAfter, Date notBefore, Date notBeforeDeadline,
- String awsRegionName, Set<String> affectedHostnames) {
+ String awsRegionName, Set<String> affectedInstances) {
this.instanceEventId = instanceEventId;
this.code = code;
this.description = description;
@@ -26,6 +26,6 @@ public final class CloudEvent {
this.notAfter = Optional.ofNullable(notAfter);
this.awsRegionName = awsRegionName;
- this.affectedHostnames = affectedHostnames;
+ this.affectedInstances = affectedInstances;
}
}
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/MockAwsEventFetcher.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/MockAwsEventFetcher.java
index 79b332c093a..baf248fc31c 100644
--- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/MockAwsEventFetcher.java
+++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/MockAwsEventFetcher.java
@@ -2,18 +2,29 @@
package com.yahoo.vespa.hosted.controller.api.integration.aws;
import com.yahoo.vespa.hosted.controller.api.integration.organization.Issue;
+import com.yahoo.vespa.hosted.controller.api.integration.organization.User;
import java.util.List;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.ArrayList;
import java.util.Optional;
public class MockAwsEventFetcher implements AwsEventFetcher {
+
+ private Map<String, List<CloudEvent>> mockedEvents = new HashMap<>();
+
@Override
public List<CloudEvent> getEvents(String awsRegionName) {
- return List.of();
+ return mockedEvents.getOrDefault(awsRegionName, new ArrayList<>());
}
@Override
public Issue createIssue(CloudEvent event) {
- return new Issue("summary", "description", "VESPA", Optional.empty());
+ return new Issue("summary", event.affectedInstances.toString(), "VESPA", Optional.empty()).with(User.from(event.awsRegionName));
+ }
+
+ public void addEvent(String awsRegionName, CloudEvent cloudEvent) {
+ mockedEvents.computeIfAbsent(awsRegionName, i -> new ArrayList<>()).add(cloudEvent);
}
}
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java
index 43fea2b76fd..8bb5775566a 100644
--- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java
+++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java
@@ -37,11 +37,13 @@ public class Node {
private final String flavor;
private final String clusterId;
private final ClusterType clusterType;
+ private final boolean wantToRetire;
+ private final boolean wantToDeprovision;
public Node(HostName hostname, Optional<HostName> parentHostname, State state, NodeType type, NodeResources resources, Optional<ApplicationId> owner,
Version currentVersion, Version wantedVersion, Version currentOsVersion, Version wantedOsVersion, ServiceState serviceState,
long restartGeneration, long wantedRestartGeneration, long rebootGeneration, long wantedRebootGeneration,
- int cost, String flavor, String clusterId, ClusterType clusterType) {
+ int cost, String flavor, String clusterId, ClusterType clusterType, boolean wantToRetire, boolean wantToDeprovision) {
this.hostname = hostname;
this.parentHostname = parentHostname;
this.state = state;
@@ -61,6 +63,8 @@ public class Node {
this.flavor = flavor;
this.clusterId = clusterId;
this.clusterType = clusterType;
+ this.wantToRetire = wantToRetire;
+ this.wantToDeprovision = wantToDeprovision;
}
public HostName hostname() {
@@ -137,6 +141,14 @@ public class Node {
return clusterType;
}
+ public boolean wantToRetire() {
+ return wantToRetire;
+ }
+
+ public boolean wantToDeprovision() {
+ return wantToDeprovision;
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) return true;
@@ -198,6 +210,8 @@ public class Node {
private String flavor;
private String clusterId;
private ClusterType clusterType;
+ private boolean wantToRetire;
+ private boolean wantToDeprovision;
public Builder() { }
@@ -221,6 +235,8 @@ public class Node {
this.flavor = node.flavor;
this.clusterId = node.clusterId;
this.clusterType = node.clusterType;
+ this.wantToRetire = node.wantToRetire;
+ this.wantToDeprovision = node.wantToDeprovision;
}
public Builder hostname(HostName hostname) {
@@ -318,10 +334,20 @@ public class Node {
return this;
}
+ public Builder wantToRetire(boolean wantToRetire) {
+ this.wantToRetire = wantToRetire;
+ return this;
+ }
+
+ public Builder wantToDeprovision(boolean wantToDeprovision) {
+ this.wantToDeprovision = wantToDeprovision;
+ return this;
+ }
+
public Node build() {
return new Node(hostname, parentHostname, state, type, resources, owner, currentVersion, wantedVersion, currentOsVersion,
wantedOsVersion, serviceState, restartGeneration, wantedRestartGeneration, rebootGeneration, wantedRebootGeneration,
- cost, flavor, clusterId, clusterType);
+ cost, flavor, clusterId, clusterType, wantToRetire, wantToDeprovision);
}
}
}
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java
index 94616fd27b2..dd99bef5ee2 100644
--- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java
+++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java
@@ -73,6 +73,8 @@ public interface NodeRepository {
/** Cancels firmware checks on all hosts in the given zone. */
void cancelFirmwareCheck(ZoneId zone);
+ void retireAndDeprovision(ZoneId zoneId, String hostName);
+
private static Node toNode(NodeRepositoryNode node) {
var application = Optional.ofNullable(node.getOwner())
.map(owner -> ApplicationId.from(owner.getTenant(), owner.getApplication(),
@@ -103,7 +105,9 @@ public interface NodeRepository {
toInt(node.getCost()),
node.getFlavor(),
clusterIdOf(node.getMembership()),
- clusterTypeOf(node.getMembership()));
+ clusterTypeOf(node.getMembership()),
+ node.getWantToRetire(),
+ node.getWantToDeprovision());
}
private static String clusterIdOf(NodeMembership nodeMembership) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
index eda3d0fc571..bd8faaed2e2 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
@@ -2,23 +2,28 @@
package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.config.provision.CloudName;
+import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.integration.aws.AwsEventFetcher;
import com.yahoo.vespa.hosted.controller.api.integration.aws.CloudEvent;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository;
import com.yahoo.vespa.hosted.controller.api.integration.organization.Issue;
import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueHandler;
import java.time.Duration;
import java.util.List;
-import java.util.Set;
+import java.util.Map;
+import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
/**
- * Automatically fetches scheduled events from AWS and submits issues detailing them.
- *
+ * Automatically fetches and handles scheduled events from AWS:
+ * 1. Deprovisions the affected hosts if applicable
+ * 2. Submits an issue detailing the event if some hosts are not processed by 1.
* @author mgimle
*/
public class CloudEventReporter extends Maintainer {
@@ -27,33 +32,70 @@ public class CloudEventReporter extends Maintainer {
private final IssueHandler issueHandler;
private final AwsEventFetcher eventFetcher;
- private final Set<String> awsRegions;
+ private final Map<String, List<ZoneApi>> zonesByCloudNativeRegion;
+ private final NodeRepository nodeRepository;
CloudEventReporter(Controller controller, Duration interval, JobControl jobControl) {
super(controller, interval, jobControl);
this.issueHandler = controller.serviceRegistry().issueHandler();
this.eventFetcher = controller.serviceRegistry().eventFetcherService();
- this.awsRegions = controller.zoneRegistry().zones()
- .ofCloud(CloudName.from("aws"))
- .reachable()
- .zones().stream()
- .map(ZoneApi::getCloudNativeRegionName)
- .collect(Collectors.toSet());
+ this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository();
+ this.zonesByCloudNativeRegion = getZonesByCloudNativeRegion();
}
@Override
protected void maintain() {
log.log(Level.INFO, "Fetching events for cloud hosts.");
- for (var awsRegion : awsRegions) {
+ for (var awsRegion : zonesByCloudNativeRegion.keySet()) {
List<CloudEvent> events = eventFetcher.getEvents(awsRegion);
for (var event : events) {
- Issue issue = eventFetcher.createIssue(event);
- if (!issueHandler.issueExists(issue)) {
- issueHandler.file(issue);
- log.log(Level.INFO, String.format("Filed an issue with the title '%s'", issue.summary()));
- }
+ List<String> deprovisionedHosts = deprovisionHosts(awsRegion, event);
+ submitIssue(event, deprovisionedHosts);
}
}
}
+ private List<String> deprovisionHosts(String awsRegion, CloudEvent event) {
+ return zonesByCloudNativeRegion.get(awsRegion)
+ .stream()
+ .flatMap(zone ->
+ nodeRepository.list(zone.getId())
+ .stream()
+ .filter(shouldDeprovisionHost(event))
+ .map(node -> {
+ if (!node.wantToDeprovision() || !node.wantToRetire())
+ log.info(String.format("Setting host %s to wantToRetire and wantToDeprovision", node.hostname().value()));
+ nodeRepository.retireAndDeprovision(zone.getId(), node.hostname().value());
+ return node.hostname().value();
+ })
+ )
+ .collect(Collectors.toList());
+ }
+
+ private void submitIssue(CloudEvent event, List<String> deprovisionedHosts) {
+ if (event.affectedInstances.size() == deprovisionedHosts.size())
+ return;
+ Issue issue = eventFetcher.createIssue(event);
+ if (!issueHandler.issueExists(issue)) {
+ issueHandler.file(issue);
+ log.log(Level.INFO, String.format("Filed an issue with the title '%s'", issue.summary()));
+ }
+ }
+
+ private Predicate<Node> shouldDeprovisionHost(CloudEvent event) {
+ return node ->
+ node.type() == NodeType.host &&
+ event.affectedInstances.stream()
+ .anyMatch(instance -> node.hostname().value().contains(instance));
+ }
+
+ private Map<String, List<ZoneApi>> getZonesByCloudNativeRegion() {
+ return controller().zoneRegistry().zones()
+ .ofCloud(CloudName.from("aws"))
+ .reachable()
+ .zones().stream()
+ .collect(Collectors.groupingBy(
+ ZoneApi::getCloudNativeRegionName
+ ));
+ }
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
index b05fef7c2ba..f99396b3b02 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
@@ -188,6 +188,11 @@ public class NodeRepositoryMock implements NodeRepository {
public void cancelFirmwareCheck(ZoneId zone) {
}
+ @Override
+ public void retireAndDeprovision(ZoneId zoneId, String hostName) {
+ nodeRepository.get(zoneId).remove(HostName.from(hostName));
+ }
+
public void doUpgrade(DeploymentId deployment, Optional<HostName> hostName, Version version) {
modifyNodes(deployment, hostName, node -> {
assert node.wantedVersion().equals(version);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java
new file mode 100644
index 00000000000..cd2a4fd8453
--- /dev/null
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java
@@ -0,0 +1,156 @@
+package com.yahoo.vespa.hosted.controller.maintenance;
+
+import com.yahoo.config.provision.HostName;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.zone.ZoneId;
+import com.yahoo.vespa.hosted.controller.ControllerTester;
+import com.yahoo.vespa.hosted.controller.api.integration.aws.CloudEvent;
+import com.yahoo.vespa.hosted.controller.api.integration.aws.MockAwsEventFetcher;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
+import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId;
+import com.yahoo.vespa.hosted.controller.api.integration.organization.MockIssueHandler;
+import com.yahoo.vespa.hosted.controller.integration.ZoneApiMock;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author olaa
+ */
+public class CloudEventReporterTest {
+
+ private ControllerTester tester = new ControllerTester();
+ private ZoneApiMock nonAwsZone = createZone("prod.zone3", "region-1", "other");
+ private ZoneApiMock awsZone1 = createZone("prod.zone1", "region-1", "aws");
+ private ZoneApiMock awsZone2 = createZone("prod.zone2", "region-2", "aws");
+
+
+ /**
+ * Test scenario:
+ * Consider three zones, two of which are based in AWS
+ * We want to test the following:
+ * 1. Non-AWS zone is completely ignored
+ * 2. Tenant hosts affected by cloud event are deprovisioned
+ * 3. Infrastructure hosts affected by cloud event are reported by IssueHandler
+ */
+ @Test
+ public void maintain() {
+ setUpZones();
+ CloudEventReporter cloudEventReporter = new CloudEventReporter(tester.controller(), Duration.ofMinutes(15), new JobControl(tester.curator()));
+
+ assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(nonAwsZone.getId()));
+ assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(awsZone1.getId()));
+ assertEquals(Set.of("host4.com", "host5.com", "confighost.com"), getHostnames(awsZone2.getId()));
+
+ mockEvents();
+ cloudEventReporter.maintain();
+
+ assertEquals(Set.of("host1.com", "host2.com", "host3.com"), getHostnames(nonAwsZone.getId()));
+ assertEquals(Set.of("host3.com"), getHostnames(awsZone1.getId()));
+ assertEquals(Set.of("host4.com", "confighost.com"), getHostnames(awsZone2.getId()));
+
+ Map<IssueId, MockIssueHandler.MockIssue> createdIssues = tester.serviceRegistry().issueHandler().issues();
+ assertEquals(1, createdIssues.size());
+ String description = createdIssues.get(IssueId.from("1")).issue().description();
+ assertTrue(description.contains("confighost"));
+
+ }
+
+ private void mockEvents() {
+ MockAwsEventFetcher mockAwsEventFetcher = (MockAwsEventFetcher)tester.controller().serviceRegistry().eventFetcherService();
+
+ Date date = new Date();
+ CloudEvent event1 = new CloudEvent("event 1",
+ "instance code",
+ "description",
+ date,
+ date,
+ date,
+ "region-1",
+ Set.of("host1", "host2"));
+
+ CloudEvent event2 = new CloudEvent("event 2",
+ "instance code",
+ "description",
+ date,
+ date,
+ date,
+ "region-2",
+ Set.of("host5", "confighost"));
+
+ mockAwsEventFetcher.addEvent("region-1", event1);
+ mockAwsEventFetcher.addEvent("region-2", event2);
+ }
+
+ private void setUpZones() {
+
+ tester.zoneRegistry().setZones(
+ nonAwsZone,
+ awsZone1,
+ awsZone2);
+
+ tester.configServer().nodeRepository().putByHostname(
+ nonAwsZone.getId(),
+ createNodesWithHostnames(
+ "host1.com",
+ "host2.com",
+ "host3.com"
+ )
+ );
+ tester.configServer().nodeRepository().putByHostname(
+ awsZone1.getId(),
+ createNodesWithHostnames(
+ "host1.com",
+ "host2.com",
+ "host3.com"
+ )
+ );
+ tester.configServer().nodeRepository().putByHostname(
+ awsZone2.getId(),
+ createNodesWithHostnames(
+ "host4.com",
+ "host5.com"
+ )
+ );
+ tester.configServer().nodeRepository().putByHostname(
+ awsZone2.getId(),
+ List.of(createNode("confighost.com", NodeType.confighost))
+ );
+ }
+
+ private List<Node> createNodesWithHostnames(String... hostnames) {
+ return Arrays.stream(hostnames)
+ .map(hostname -> createNode(hostname, NodeType.host))
+ .collect(Collectors.toUnmodifiableList());
+ }
+
+ private Node createNode(String hostname, NodeType nodeType) {
+ return new Node.Builder()
+ .hostname(HostName.from(hostname))
+ .type(nodeType)
+ .build();
+ }
+
+ private Set<String> getHostnames(ZoneId zoneId) {
+ return tester.configServer().nodeRepository().list(zoneId)
+ .stream()
+ .map(node -> node.hostname().value())
+ .collect(Collectors.toSet());
+ }
+
+ private ZoneApiMock createZone(String zoneId, String cloudNativeRegionName, String cloud) {
+ return ZoneApiMock.newBuilder().withId(zoneId)
+ .withCloudNativeRegionName(cloudNativeRegionName)
+ .withCloud(cloud)
+ .build();
+ }
+
+} \ No newline at end of file