summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-02-09 19:29:48 +0100
committerGitHub <noreply@github.com>2021-02-09 19:29:48 +0100
commit54f98e3e386a7b78d2f1760bab3de9b58341345e (patch)
tree51c52f7aef15d440390664633fdbee041a0ae068
parent2ca728203f339235fda01583964a4c6f3ffc15b7 (diff)
parent7d97abcf25a5acba2037e3f83308c9da4a3c3cba (diff)
Merge pull request #16451 from vespa-engine/mpolden/schedule-os-upgrades
Schedule OS upgrades automatically in supported clouds
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java95
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java66
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java2
7 files changed, 170 insertions, 15 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
index e12c1903426..abc0784396c 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
@@ -235,6 +235,8 @@ public class Controller extends AbstractComponent {
targets.removeIf(target -> target.osVersion().cloud().equals(cloudName)); // Only allow a single target per cloud
targets.add(new OsVersionTarget(new OsVersion(version, cloudName), upgradeBudget));
curator.writeOsVersionTargets(targets);
+ log.info("Triggered OS upgrade to " + version.toFullString() + " in cloud " +
+ cloudName.value() + upgradeBudget.map(b -> ", with upgrade budget " + b).orElse(""));
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
index b32b4ec73fb..bc0295abca3 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
@@ -51,6 +51,7 @@ public class ControllerMaintenance extends AbstractComponent {
maintainers.add(new SystemUpgrader(controller, intervals.systemUpgrader));
maintainers.add(new JobRunner(controller, intervals.jobRunner));
maintainers.add(new OsVersionStatusUpdater(controller, intervals.osVersionStatusUpdater));
+ maintainers.add(new OsUpgradeScheduler(controller, intervals.osUpgradeScheduler));
maintainers.add(new ContactInformationMaintainer(controller, intervals.contactInformationMaintainer));
maintainers.add(new NameServiceDispatcher(controller, intervals.nameServiceDispatcher));
maintainers.add(new CostReportMaintainer(controller, intervals.costReportMaintainer, controller.serviceRegistry().costReportConsumer()));
@@ -99,6 +100,7 @@ public class ControllerMaintenance extends AbstractComponent {
private final Duration jobRunner;
private final Duration osVersionStatusUpdater;
private final Duration osUpgrader;
+ private final Duration osUpgradeScheduler;
private final Duration contactInformationMaintainer;
private final Duration nameServiceDispatcher;
private final Duration costReportMaintainer;
@@ -124,6 +126,7 @@ public class ControllerMaintenance extends AbstractComponent {
this.jobRunner = duration(90, SECONDS);
this.osVersionStatusUpdater = duration(2, MINUTES);
this.osUpgrader = duration(1, MINUTES);
+ this.osUpgradeScheduler = duration(3, HOURS);
this.contactInformationMaintainer = duration(12, HOURS);
this.nameServiceDispatcher = duration(10, SECONDS);
this.costReportMaintainer = duration(2, HOURS);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
new file mode 100644
index 00000000000..cb6d95a3300
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
@@ -0,0 +1,95 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.controller.maintenance;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.CloudName;
+import com.yahoo.config.provision.zone.ZoneApi;
+import com.yahoo.vespa.hosted.controller.Controller;
+import com.yahoo.vespa.hosted.controller.versions.OsVersion;
+import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Automatically schedule OS upgrades.
+ *
+ * This is used in clouds where new OS versions regularly become available.
+ *
+ * @author mpolden
+ */
+public class OsUpgradeScheduler extends ControllerMaintainer {
+
+ /** Trigger a new upgrade when the current target version reaches this age */
+ private static final Duration MAX_VERSION_AGE = Duration.ofDays(30);
+
+ /**
+ * The interval at which new versions become available. We use this to avoid scheduling upgrades to a version that
+ * may not be available yet
+ */
+ private static final Duration AVAILABILITY_INTERVAL = Duration.ofDays(7);
+
+ private static final DateTimeFormatter VERSION_DATE_PATTERN = DateTimeFormatter.ofPattern("yyyyMMdd");
+
+ public OsUpgradeScheduler(Controller controller, Duration interval) {
+ super(controller, interval);
+ }
+
+ @Override
+ protected boolean maintain() {
+ for (var cloud : supportedClouds()) {
+ Optional<Version> newTarget = newTargetIn(cloud);
+ if (newTarget.isEmpty()) continue;
+ controller().upgradeOsIn(cloud, newTarget.get(), Optional.of(upgradeBudget()), false);
+ }
+ return true;
+ }
+
+ /** Returns the new target version for given cloud, if any */
+ private Optional<Version> newTargetIn(CloudName cloud) {
+ Optional<Version> currentTarget = controller().osVersionTarget(cloud)
+ .map(OsVersionTarget::osVersion)
+ .map(OsVersion::version);
+ if (currentTarget.isEmpty()) return Optional.empty();
+ if (!hasExpired(currentTarget.get())) return Optional.empty();
+
+ Instant now = controller().clock().instant();
+ String qualifier = LocalDate.ofInstant(now.minus(AVAILABILITY_INTERVAL), ZoneOffset.UTC)
+ .format(VERSION_DATE_PATTERN);
+ return Optional.of(new Version(currentTarget.get().getMajor(),
+ currentTarget.get().getMinor(),
+ currentTarget.get().getMicro(),
+ qualifier));
+ }
+
+ /** Returns whether we should upgrade from given version */
+ private boolean hasExpired(Version version) {
+ String qualifier = version.getQualifier();
+ if (!qualifier.matches("^\\d{8,}")) return false;
+
+ String dateString = qualifier.substring(0, 8);
+ Instant now = controller().clock().instant();
+ Instant versionDate = LocalDate.parse(dateString, VERSION_DATE_PATTERN)
+ .atStartOfDay(ZoneOffset.UTC)
+ .toInstant();
+ return versionDate.isBefore(now.minus(MAX_VERSION_AGE));
+ }
+
+ /** Returns the clouds where we can safely schedule OS upgrades */
+ private Set<CloudName> supportedClouds() {
+ return controller().zoneRegistry().zones().reprovisionToUpgradeOs().zones().stream()
+ .map(ZoneApi::getCloudName)
+ .collect(Collectors.toUnmodifiableSet());
+ }
+
+ private Duration upgradeBudget() {
+ return controller().system().isCd() ? Duration.ofHours(1) : Duration.ofDays(14);
+ }
+
+}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
new file mode 100644
index 00000000000..4aed9b0bffe
--- /dev/null
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeSchedulerTest.java
@@ -0,0 +1,66 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.controller.maintenance;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.CloudName;
+import com.yahoo.config.provision.zone.ZoneApi;
+import com.yahoo.vespa.hosted.controller.ControllerTester;
+import com.yahoo.vespa.hosted.controller.integration.ZoneApiMock;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.List;
+import java.util.Optional;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author mpolden
+ */
+public class OsUpgradeSchedulerTest {
+
+ @Test
+ public void maintain() {
+ ControllerTester tester = new ControllerTester();
+ OsUpgradeScheduler scheduler = new OsUpgradeScheduler(tester.controller(), Duration.ofDays(1));
+ Instant initialTime = Instant.parse("2021-01-23T00:00:00.00Z");
+ tester.clock().setInstant(initialTime);
+
+ CloudName cloud = CloudName.from("cloud");
+ ZoneApi zone = zone("prod.us-west-1", cloud);
+ tester.zoneRegistry().setZones(zone).reprovisionToUpgradeOsIn(zone);
+
+ // Initial run does nothing as the cloud does not have a target
+ scheduler.maintain();
+ assertTrue("No target set", tester.controller().osVersionTarget(cloud).isEmpty());
+
+ // Target is set
+ Version version0 = Version.fromString("7.0.0.20210123190005");
+ tester.controller().upgradeOsIn(cloud, version0, Optional.of(Duration.ofDays(1)), false);
+
+ // Target remains unchanged as it hasn't expired yet
+ for (var interval : List.of(Duration.ZERO, Duration.ofDays(15))) {
+ tester.clock().advance(interval);
+ scheduler.maintain();
+ assertEquals(version0, tester.controller().osVersionTarget(cloud).get().osVersion().version());
+ }
+
+ // Just over 30 days pass, and a new target replaces the expired one
+ Version version1 = Version.fromString("7.0.0.20210215");
+ tester.clock().advance(Duration.ofDays(15).plus(Duration.ofSeconds(1)));
+ scheduler.maintain();
+ assertEquals("New target set", version1, tester.controller().osVersionTarget(cloud).get().osVersion().version());
+
+ // A few days pass and target remains unchanged
+ tester.clock().advance(Duration.ofDays(2));
+ scheduler.maintain();
+ assertEquals(version1, tester.controller().osVersionTarget(cloud).get().osVersion().version());
+ }
+
+ private static ZoneApi zone(String id, CloudName cloud) {
+ return ZoneApiMock.newBuilder().withId(id).with(cloud).build();
+ }
+
+}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
index 14fd7abd96c..f8ae1cfefa0 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
@@ -46,6 +46,9 @@
"name": "NameServiceDispatcher"
},
{
+ "name": "OsUpgradeScheduler"
+ },
+ {
"name": "OsVersionStatusUpdater"
},
{
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 5e54f09f7a3..55ee0fc9708 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -34,7 +34,6 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing;
import com.yahoo.vespa.hosted.provision.provisioning.NodeCandidate;
import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer;
-import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceComparator;
import com.yahoo.vespa.hosted.provision.provisioning.NodeSpec;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost;
import com.yahoo.yolean.Exceptions;
@@ -53,7 +52,6 @@ import java.util.function.Function;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
-import java.util.stream.IntStream;
/**
* @author freva
@@ -317,18 +315,6 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
clusterCapacity.bandwidthGbps());
}
- /** Reads node resources declared by target capacity flag */
- private List<NodeResources> targetCapacity() {
- return preprovisionCapacityFlag.value().stream()
- .flatMap(cap -> {
- NodeResources resources = new NodeResources(cap.vcpu(), cap.memoryGb(),
- cap.diskGb(), cap.bandwidthGbps());
- return IntStream.range(0, cap.count()).mapToObj(i -> resources);
- })
- .sorted(NodeResourceComparator.memoryDiskCpuOrder().reversed())
- .collect(Collectors.toList());
- }
-
/** Verify DNS configuration of given nodes */
private void verifyDns(List<Node> nodes) {
for (var node : nodes) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
index cd381b467d4..3fbc7b213a1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
@@ -91,7 +91,7 @@ public class GroupPreparer {
if (nodeRepository.zone().getCloud().dynamicProvisioning()) {
final Version osVersion;
if (allocateOsRequirement.equals("rhel8")) {
- osVersion = new Version(8);
+ osVersion = new Version(8, Integer.MAX_VALUE /* always use latest 8 version */, 0);
} else {
osVersion = nodeRepository.osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion);
}