From f51680fbcf11daf5902bed8128f7b80b3045f3d0 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Wed, 13 May 2020 11:04:47 +0200 Subject: Send OS upgrade budget in applicable clouds --- .../yahoo/vespa/hosted/controller/Controller.java | 22 ++-- .../hosted/controller/maintenance/OsUpgrader.java | 21 +++- .../controller/integration/NodeRepositoryMock.java | 9 +- .../controller/integration/ZoneRegistryMock.java | 12 +- .../maintenance/MetricsReporterTest.java | 5 +- .../controller/maintenance/OsUpgraderTest.java | 122 ++++++++++++++++----- .../maintenance/OsVersionStatusUpdaterTest.java | 3 +- .../hosted/controller/restapi/os/OsApiTest.java | 1 + 8 files changed, 154 insertions(+), 41 deletions(-) (limited to 'controller-server/src') diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java index b1ecff656cf..12a7520129a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java @@ -6,6 +6,7 @@ import com.yahoo.component.AbstractComponent; import com.yahoo.component.Version; import com.yahoo.component.Vtag; import com.yahoo.concurrent.maintenance.JobControl; +import com.yahoo.config.provision.Cloud; import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.SystemName; @@ -33,6 +34,7 @@ import com.yahoo.vespa.hosted.rotation.config.RotationsConfig; import com.yahoo.vespa.serviceview.bindings.ApplicationView; import java.time.Clock; +import java.time.Duration; import java.util.LinkedHashMap; import java.util.Map; import java.util.Objects; @@ -199,22 +201,26 @@ public class Controller extends AbstractComponent { } /** Set the target OS version for infrastructure on cloud in this system */ - public void upgradeOsIn(CloudName cloud, Version version, boolean force) { + public void upgradeOsIn(CloudName cloudName, Version version, Optional upgradeBudget, boolean force) { if (version.isEmpty()) { throw new IllegalArgumentException("Invalid version '" + version.toFullString() + "'"); } - if (!clouds().contains(cloud)) { - throw new IllegalArgumentException("Cloud '" + cloud.value() + "' does not exist in this system"); + Cloud cloud = zoneRegistry.cloud(cloudName); + if (cloud == null) { + throw new IllegalArgumentException("Cloud '" + cloudName + "' does not exist in this system"); + } + if (cloud.reprovisionToUpgradeOs() && upgradeBudget.isEmpty()) { + throw new IllegalArgumentException("Cloud '" + cloudName.value() + "' requires a time budget for OS upgrades"); } try (Lock lock = curator.lockOsVersions()) { Set targets = new TreeSet<>(curator.readOsVersionTargets()); - if (!force && targets.stream().anyMatch(target -> target.osVersion().cloud().equals(cloud) && - target.osVersion().version().isAfter(version))) { - throw new IllegalArgumentException("Cannot downgrade cloud '" + cloud.value() + "' to version " + + if (!force && targets.stream().anyMatch(target -> target.osVersion().cloud().equals(cloudName) && + target.osVersion().version().isAfter(version))) { + throw new IllegalArgumentException("Cannot downgrade cloud '" + cloudName.value() + "' to version " + version.toFullString()); } - targets.removeIf(target -> target.osVersion().cloud().equals(cloud)); // Only allow a single target per cloud - targets.add(new OsVersionTarget(new OsVersion(version, cloud), Optional.empty())); + targets.removeIf(target -> target.osVersion().cloud().equals(cloudName)); // Only allow a single target per cloud + targets.add(new OsVersionTarget(new OsVersion(version, cloudName), upgradeBudget)); curator.writeOsVersionTargets(targets); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java index 344e64d8d3f..817ec9c08e8 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.Cloud; import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; @@ -39,10 +40,14 @@ public class OsUpgrader extends InfrastructureUpgrader { @Override protected void upgrade(OsVersionTarget target, SystemApplication application, ZoneApi zone) { - log.info(String.format("Upgrading OS of %s to version %s in %s in cloud %s", application.id(), target, - zone.getId(), zone.getCloudName())); + Optional zoneUpgradeBudget = target.upgradeBudget() + .map(totalBudget -> zoneBudgetOf(totalBudget, zone)); + log.info(String.format("Upgrading OS of %s to version %s in %s in cloud %s%s", application.id(), target, + zone.getId(), zone.getCloudName(), + zoneUpgradeBudget.map(d -> " with time budget " + d).orElse(""))); controller().serviceRegistry().configServer().nodeRepository().upgradeOs(zone.getId(), application.nodeType(), - target.osVersion().version()); + target.osVersion().version(), + zoneUpgradeBudget); } @Override @@ -79,6 +84,16 @@ public class OsUpgrader extends InfrastructureUpgrader { return minVersion(zone, application, Node::currentOsVersion).orElse(defaultVersion); } + /** Returns the available upgrade budget for given zone */ + private Duration zoneBudgetOf(Duration totalBudget, ZoneApi zone) { + if (!zone.getEnvironment().isProduction()) return Duration.ZERO; + long consecutiveProductionZones = upgradePolicy.asList().stream() + .filter(parallelZones -> parallelZones.stream().map(ZoneApi::getEnvironment) + .anyMatch(Environment::isProduction)) + .count(); + return totalBudget.dividedBy(consecutiveProductionZones); + } + /** Returns whether node is in a state where it can be upgraded */ public static boolean canUpgrade(Node node) { return upgradableNodeStates.contains(node.state()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java index 76a30c289b8..90276b6b590 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeList import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeRepositoryNode; import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeState; +import java.time.Duration; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -37,6 +38,7 @@ public class NodeRepositoryMock implements NodeRepository { private final Map> nodeRepository = new HashMap<>(); private final Map> applications = new HashMap<>(); private final Map targetVersions = new HashMap<>(); + private final Map osUpgradeBudgets = new HashMap<>(); /** Add or update given nodes in zone */ public void putNodes(ZoneId zone, List nodes) { @@ -190,7 +192,8 @@ public class NodeRepositoryMock implements NodeRepository { } @Override - public void upgradeOs(ZoneId zone, NodeType type, Version version) { + public void upgradeOs(ZoneId zone, NodeType type, Version version, Optional upgradeBudget) { + upgradeBudget.ifPresent(d -> this.osUpgradeBudgets.put(Objects.hash(zone, type, version), d)); this.targetVersions.compute(zone, (ignored, targetVersions) -> { if (targetVersions == null) { targetVersions = TargetVersions.EMPTY; @@ -223,6 +226,10 @@ public class NodeRepositoryMock implements NodeRepository { nodeRepository.get(zoneId).remove(HostName.from(hostName)); } + public Optional osUpgradeBudget(ZoneId zone, NodeType type, Version version) { + return Optional.ofNullable(osUpgradeBudgets.get(Objects.hash(zone, type, version))); + } + public void doUpgrade(DeploymentId deployment, Optional hostName, Version version) { modifyNodes(deployment, hostName, node -> { assert node.wantedVersion().equals(version); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java index 902af26e5ba..a453a990855 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java @@ -37,6 +37,7 @@ public class ZoneRegistryMock extends AbstractComponent implements ZoneRegistry private final Map defaultRegionForEnvironment = new HashMap<>(); private final Map osUpgradePolicies = new HashMap<>(); private final Map> zoneRoutingMethods = new HashMap<>(); + private final Map clouds = new HashMap<>(); private List zones; private SystemName system; @@ -61,6 +62,8 @@ public class ZoneRegistryMock extends AbstractComponent implements ZoneRegistry ZoneApiMock.fromId("prod.us-west-1"), ZoneApiMock.fromId("prod.us-central-1"), ZoneApiMock.fromId("prod.eu-west-1")); + var cloud = Cloud.defaultCloud(); + this.clouds.put(cloud.name(), cloud); // All zones use a shared routing method by default setRoutingMethod(this.zones, RoutingMethod.shared); } @@ -99,6 +102,13 @@ public class ZoneRegistryMock extends AbstractComponent implements ZoneRegistry return this; } + public ZoneRegistryMock addCloud(Cloud... clouds) { + for (var cloud : clouds) { + this.clouds.put(cloud.name(), cloud); + } + return this; + } + public ZoneRegistryMock exclusiveRoutingIn(ZoneApi... zones) { return exclusiveRoutingIn(List.of(zones)); } @@ -201,7 +211,7 @@ public class ZoneRegistryMock extends AbstractComponent implements ZoneRegistry @Override public Cloud cloud(CloudName name) { - return new Cloud(name, false, true, false, false); + return clouds.get(name); } @Override diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java index 4c6e6cf3673..b37fb8123fa 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java @@ -23,6 +23,7 @@ import org.junit.Test; import java.time.Duration; import java.util.Comparator; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.function.UnaryOperator; import java.util.stream.Collectors; @@ -298,7 +299,7 @@ public class MetricsReporterTest { // All nodes upgrade to initial OS version var version0 = Version.fromString("8.0"); - tester.controller().upgradeOsIn(cloud, version0, false); + tester.controller().upgradeOsIn(cloud, version0, Optional.empty(), false); osUpgrader.maintain(); tester.configServer().setOsVersion(version0, SystemApplication.tenantHost.id(), zone); tester.configServer().setOsVersion(version0, SystemApplication.configServerHost.id(), zone); @@ -312,7 +313,7 @@ public class MetricsReporterTest { var currentVersion = i == 0 ? version0 : targets.get(i - 1); var version = targets.get(i); // System starts upgrading to next OS version - tester.controller().upgradeOsIn(cloud, version, false); + tester.controller().upgradeOsIn(cloud, version, Optional.empty(), false); runAll(osUpgrader, statusUpdater, reporter); assertOsChangeDuration(Duration.ZERO, hosts, currentVersion); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java index 7d8473c94c9..c8319efa348 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java @@ -17,43 +17,45 @@ import com.yahoo.vespa.hosted.controller.versions.NodeVersion; import org.junit.Test; import java.time.Duration; +import java.util.Collection; import java.util.List; +import java.util.Optional; import java.util.function.Function; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; /** * @author mpolden */ public class OsUpgraderTest { - private static final ZoneApi zone1 = ZoneApiMock.newBuilder().withId("prod.eu-west-1").build(); - private static final ZoneApi zone2 = ZoneApiMock.newBuilder().withId("prod.us-west-1").build(); - private static final ZoneApi zone3 = ZoneApiMock.newBuilder().withId("prod.us-central-1").build(); - private static final ZoneApi zone4 = ZoneApiMock.newBuilder().withId("prod.us-east-3").build(); - private static final ZoneApi zone5 = ZoneApiMock.newBuilder().withId("prod.us-north-1").withCloud("other").build(); - private final ControllerTester tester = new ControllerTester(); - private final OsVersionStatusUpdater statusUpdater = new OsVersionStatusUpdater(tester.controller(), Duration.ofDays(1) - ); + private final OsVersionStatusUpdater statusUpdater = new OsVersionStatusUpdater(tester.controller(), Duration.ofDays(1)); + @Test public void upgrade_os() { - OsUpgrader osUpgrader = osUpgrader( - UpgradePolicy.create() - .upgrade(zone1) - .upgradeInParallel(zone2, zone3) - .upgrade(zone5) // Belongs to a different cloud and is ignored by this upgrader - .upgrade(zone4), - SystemName.cd - ); + Cloud cloud1 = new Cloud(CloudName.from("c1"), false, true, false, true); + Cloud cloud2 = new Cloud(CloudName.from("c2"), false, true, false, true); + ZoneApi zone1 = zone("prod.eu-west-1", cloud1); + ZoneApi zone2 = zone("prod.us-west-1", cloud1); + ZoneApi zone3 = zone("prod.us-central-1", cloud1); + ZoneApi zone4 = zone("prod.us-east-3", cloud1); + ZoneApi zone5 = zone("prod.us-north-1", cloud2); + UpgradePolicy upgradePolicy = UpgradePolicy.create() + .upgrade(zone1) + .upgradeInParallel(zone2, zone3) + .upgrade(zone5) // Belongs to a different cloud and is ignored by this upgrader + .upgrade(zone4); + OsUpgrader osUpgrader = osUpgrader(upgradePolicy, SystemName.cd, cloud1, cloud2); // Bootstrap system tester.configServer().bootstrap(List.of(zone1.getId(), zone2.getId(), zone3.getId(), zone4.getId(), zone5.getId()), List.of(SystemApplication.tenantHost)); - // Add system applications that exist in a real system, but are currently not upgraded + // Add system applications that exist in a real system, but isn't upgraded tester.configServer().addNodes(List.of(zone1.getId(), zone2.getId(), zone3.getId(), zone4.getId(), zone5.getId()), List.of(SystemApplication.configServer)); @@ -63,9 +65,8 @@ public class OsUpgraderTest { // New OS version released Version version1 = Version.fromString("7.1"); - CloudName cloud = CloudName.defaultName(); - tester.controller().upgradeOsIn(cloud, Version.fromString("7.0"), false); - tester.controller().upgradeOsIn(cloud, version1, false); + tester.controller().upgradeOsIn(cloud1.name(), Version.fromString("7.0"), Optional.empty(), false); + tester.controller().upgradeOsIn(cloud1.name(), version1, Optional.empty(), false); assertEquals(1, tester.controller().osVersionTargets().size()); // Only allows one version per cloud statusUpdater.maintain(); @@ -103,10 +104,74 @@ public class OsUpgraderTest { osUpgrader.maintain(); assertWanted(version1, SystemApplication.tenantHost, zone1.getId(), zone2.getId(), zone3.getId(), zone4.getId()); statusUpdater.maintain(); - assertTrue("All nodes on target version", tester.controller().osVersionStatus().nodesIn(cloud).stream() + assertTrue("All nodes on target version", tester.controller().osVersionStatus().nodesIn(cloud1.name()).stream() .allMatch(node -> node.currentVersion().equals(version1))); } + @Test + public void upgrade_os_with_budget() { + Cloud cloud = new Cloud(CloudName.from("cloud"), false, true, true, true); + ZoneApi zone1 = zone("dev.us-east-1", cloud); + ZoneApi zone2 = zone("prod.us-west-1", cloud); + ZoneApi zone3 = zone("prod.us-central-1", cloud); + ZoneApi zone4 = zone("prod.eu-west-1", cloud); + UpgradePolicy upgradePolicy = UpgradePolicy.create() + .upgrade(zone1) + .upgradeInParallel(zone2, zone3) + .upgrade(zone4); + OsUpgrader osUpgrader = osUpgrader(upgradePolicy, SystemName.cd, cloud); + + // Bootstrap system + tester.configServer().bootstrap(List.of(zone1.getId(), zone2.getId(), zone3.getId(), zone4.getId()), + List.of(SystemApplication.tenantHost)); + tester.configServer().addNodes(List.of(zone1.getId(), zone2.getId(), zone3.getId(), zone4.getId()), + List.of(SystemApplication.configServerHost)); // Not supported yet + + // Upgrade without budget fails + Version version = Version.fromString("7.1"); + try { + tester.controller().upgradeOsIn(cloud.name(), version, Optional.empty(), false); + fail("Expected exception"); + } catch (IllegalArgumentException ignored) {} + + // Upgrade with budget + tester.controller().upgradeOsIn(cloud.name(), version, Optional.of(Duration.ofHours(12)), false); + assertEquals(Duration.ofHours(12), tester.controller().osVersionTarget(cloud.name()).get().upgradeBudget().get()); + statusUpdater.maintain(); + osUpgrader.maintain(); + + // First zone upgrades + assertWanted(Version.emptyVersion, SystemApplication.configServerHost, zone1.getId()); + assertEquals("Dev zone gets a zero budget", Duration.ZERO, upgradeBudget(zone1.getId(), SystemApplication.tenantHost, version)); + completeUpgrade(version, SystemApplication.tenantHost, zone1.getId()); + + // Next set of zones upgrade + osUpgrader.maintain(); + for (var zone : List.of(zone2.getId(), zone3.getId())) { + assertEquals("Parallel prod zones share the budget of a single zone", Duration.ofHours(6), + upgradeBudget(zone, SystemApplication.tenantHost, version)); + completeUpgrade(version, SystemApplication.tenantHost, zone); + } + + // Last zone upgrades + osUpgrader.maintain(); + assertEquals("Last prod zone gets the budget of a single zone", Duration.ofHours(6), + upgradeBudget(zone4.getId(), SystemApplication.tenantHost, version)); + completeUpgrade(version, SystemApplication.tenantHost, zone4.getId()); + + // All host applications upgraded + statusUpdater.maintain(); + assertTrue("All nodes on target version", tester.controller().osVersionStatus().nodesIn(cloud.name()).stream() + .allMatch(node -> node.currentVersion().equals(version))); + } + + private Duration upgradeBudget(ZoneId zone, SystemApplication application, Version version) { + var upgradeBudget = tester.configServer().nodeRepository().osUpgradeBudget(zone, application.nodeType(), version); + assertTrue("Expected budget for upgrade to " + version + " of " + application.id() + " in " + zone, + upgradeBudget.isPresent()); + return upgradeBudget.get(); + } + private List nodesOn(Version version) { return tester.controller().osVersionStatus().versions().entrySet().stream() .filter(entry -> entry.getKey().version().equals(version)) @@ -165,12 +230,19 @@ public class OsUpgraderTest { return tester.configServer().nodeRepository(); } - private OsUpgrader osUpgrader(UpgradePolicy upgradePolicy, SystemName system) { + private OsUpgrader osUpgrader(UpgradePolicy upgradePolicy, SystemName system, Cloud managedCloud, Cloud... otherClouds) { + var zones = upgradePolicy.asList().stream().flatMap(Collection::stream).collect(Collectors.toList()); tester.zoneRegistry() - .setZones(zone1, zone2, zone3, zone4, zone5) + .setZones(zones) + .addCloud(managedCloud) + .addCloud(otherClouds) .setSystemName(system) - .setOsUpgradePolicy(CloudName.defaultName(), upgradePolicy); - return new OsUpgrader(tester.controller(), Duration.ofDays(1), Cloud.defaultCloud()); + .setOsUpgradePolicy(managedCloud.name(), upgradePolicy); + return new OsUpgrader(tester.controller(), Duration.ofDays(1), managedCloud); + } + + private static ZoneApi zone(String id, Cloud cloud) { + return ZoneApiMock.newBuilder().withId(id).withCloud(cloud.name().value()).build(); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java index 5ddd2064c32..e9a1adcfe88 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java @@ -11,6 +11,7 @@ import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus; import org.junit.Test; import java.time.Duration; +import java.util.Optional; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -40,7 +41,7 @@ public class OsVersionStatusUpdaterTest { // Setting a new target adds it to current status Version version1 = Version.fromString("7.1"); CloudName cloud = CloudName.defaultName(); - tester.controller().upgradeOsIn(cloud, version1, false); + tester.controller().upgradeOsIn(cloud, version1, Optional.empty(), false); statusUpdater.maintain(); var osVersions = tester.controller().osVersionStatus().versions(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java index c1f6b116de2..42515fd0d3b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/os/OsApiTest.java @@ -52,6 +52,7 @@ public class OsApiTest extends ControllerContainerTest { addUserToHostedOperatorRole(operator); zoneRegistryMock().setSystemName(SystemName.cd) .setZones(zone1, zone2, zone3) + .addCloud(cloud1, cloud2) .setOsUpgradePolicy(cloud1.name(), UpgradePolicy.create().upgrade(zone1).upgrade(zone2)) .setOsUpgradePolicy(cloud2.name(), UpgradePolicy.create().upgrade(zone3)); osUpgraders = List.of( -- cgit v1.2.3