aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server/src/test/java/com
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2019-10-07 13:02:27 +0200
committerMartin Polden <mpolden@mpolden.no>2019-10-07 13:59:40 +0200
commit374b324b2f5fdbcc99761e064bb4a182e0f6aa07 (patch)
tree074cc669ec6ded569d81aeba6c041c108e8b5bb0 /controller-server/src/test/java/com
parentdd2739ec3abce6ca0ab3341b0ef6968f48f26492 (diff)
Report metric for nodes failing OS upgrade
Diffstat (limited to 'controller-server/src/test/java/com')
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java35
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java56
2 files changed, 88 insertions, 3 deletions
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java
index 6da77a967f1..6e7a50b5f81 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java
@@ -151,15 +151,44 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer
/** Set version for an application in a given zone */
public void setVersion(ApplicationId application, ZoneId zone, Version version) {
- setVersion(application, zone, version, -1);
+ setVersion(application, zone, version, -1, false);
}
/** Set version for nodeCount number of nodes in application in a given zone */
public void setVersion(ApplicationId application, ZoneId zone, Version version, int nodeCount) {
+ setVersion(application, zone, version, nodeCount, false);
+ }
+
+ /** Set OS version for an application in a given zone */
+ public void setOsVersion(ApplicationId application, ZoneId zone, Version version) {
+ setOsVersion(application, zone, version, -1);
+ }
+
+ /** Set OS version for an application in a given zone */
+ public void setOsVersion(ApplicationId application, ZoneId zone, Version version, int nodeCount) {
+ setVersion(application, zone, version, nodeCount, true);
+ }
+
+ private void setVersion(ApplicationId application, ZoneId zone, Version version, int nodeCount, boolean osVersion) {
int n = 0;
for (Node node : nodeRepository().list(zone, application)) {
- nodeRepository().putByHostname(zone, new Node(node.hostname(), node.state(), node.type(), node.owner(),
- version, version));
+ Node newNode;
+ if (osVersion) {
+ newNode = new Node(node.hostname(), node.state(), node.type(), node.owner(), node.currentVersion(),
+ node.wantedVersion(), version, version, node.serviceState(),
+ node.restartGeneration(), node.wantedRestartGeneration(), node.rebootGeneration(),
+ node.wantedRebootGeneration(), node.vcpu(), node.memoryGb(), node.diskGb(),
+ node.bandwidthGbps(), node.fastDisk(), node.cost(), node.canonicalFlavor(),
+ node.clusterId(), node.clusterType());
+ } else {
+ newNode = new Node(node.hostname(), node.state(), node.type(), node.owner(), version,
+ version, node.currentOsVersion(), node.wantedOsVersion(), node.serviceState(),
+ node.restartGeneration(), node.wantedRestartGeneration(), node.rebootGeneration(),
+ node.wantedRebootGeneration(), node.vcpu(), node.memoryGb(), node.diskGb(),
+ node.bandwidthGbps(), node.fastDisk(), node.cost(), node.canonicalFlavor(),
+ node.clusterId(), node.clusterType());
+ }
+ nodeRepository().putByHostname(zone, newNode);
if (++n == nodeCount) break;
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java
index 9cb40d60677..44785407874 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.CloudName;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.zone.UpgradePolicy;
import com.yahoo.config.provision.zone.ZoneId;
@@ -262,6 +263,57 @@ public class MetricsReporterTest {
}
}
+ @Test
+ public void test_nodes_failing_os_upgrade() {
+ var tester = new DeploymentTester();
+ var reporter = createReporter(tester.controller());
+ var zone = ZoneApiMock.fromId("prod.eu-west-1");
+ var cloud = CloudName.defaultName();
+ tester.controllerTester().zoneRegistry().setOsUpgradePolicy(cloud, UpgradePolicy.create().upgrade(zone));
+ var osUpgrader = new OsUpgrader(tester.controller(), Duration.ofDays(1),
+ new JobControl(tester.controllerTester().curator()), CloudName.defaultName());;
+ var statusUpdater = new OsVersionStatusUpdater(tester.controller(), Duration.ofDays(1),
+ new JobControl(tester.controller().curator()));
+ tester.configServer().bootstrap(List.of(zone.getId()), SystemApplication.tenantHost);
+
+ // All nodes upgrade to initial OS version
+ var version0 = Version.fromString("8.0");
+ tester.controller().upgradeOsIn(cloud, version0, false);
+ osUpgrader.maintain();
+ tester.configServer().setOsVersion(SystemApplication.tenantHost.id(), zone.getId(), version0);
+ statusUpdater.maintain();
+ reporter.maintain();
+ assertEquals(0, getNodesFailingOsUpgrade());
+
+ for (var version : List.of(Version.fromString("8.1"), Version.fromString("8.2"))) {
+ // System starts upgrading to next OS version
+ tester.controller().upgradeOsIn(cloud, version, false);
+ osUpgrader.maintain();
+ statusUpdater.maintain();
+ reporter.maintain();
+ assertEquals(0, getNodesFailingOsUpgrade());
+
+ // 30 minutes pass and nothing happens
+ tester.clock().advance(Duration.ofMinutes(30));
+ statusUpdater.maintain();
+ reporter.maintain();
+ assertEquals(0, getNodesFailingOsUpgrade());
+
+ // 1/3 nodes upgrade within timeout
+ tester.configServer().setOsVersion(SystemApplication.tenantHost.id(), zone.getId(), version, 1);
+ tester.clock().advance(Duration.ofMinutes(30).plus(Duration.ofSeconds(1)));
+ statusUpdater.maintain();
+ reporter.maintain();
+ assertEquals(2, getNodesFailingOsUpgrade());
+
+ // 3/3 nodes upgrade
+ tester.configServer().setOsVersion(SystemApplication.tenantHost.id(), zone.getId(), version);
+ statusUpdater.maintain();
+ reporter.maintain();
+ assertEquals(0, getNodesFailingOsUpgrade());
+ }
+ }
+
private Duration getAverageDeploymentDuration(ApplicationId id) {
return Duration.ofSeconds(getMetric(MetricsReporter.DEPLOYMENT_AVERAGE_DURATION, id).longValue());
}
@@ -278,6 +330,10 @@ public class MetricsReporterTest {
return metrics.getMetric(MetricsReporter.NODES_FAILING_SYSTEM_UPGRADE).intValue();
}
+ private int getNodesFailingOsUpgrade() {
+ return metrics.getMetric(MetricsReporter.NODES_FAILING_OS_UPGRADE).intValue();
+ }
+
private Number getMetric(String name, ApplicationId id) {
return metrics.getMetric((dimensions) -> id.tenant().value().equals(dimensions.get("tenant")) &&
appDimension(id).equals(dimensions.get("app")),