diff options
10 files changed, 246 insertions, 92 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/zone/NodeSlice.java b/config-provisioning/src/main/java/com/yahoo/config/provision/zone/NodeSlice.java new file mode 100644 index 00000000000..f7f6bd10603 --- /dev/null +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/zone/NodeSlice.java @@ -0,0 +1,46 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.config.provision.zone; + +import java.util.Objects; +import java.util.OptionalDouble; +import java.util.OptionalLong; + +/** + * A slice of nodes, satisfied by either a minimum count or a fraction. + * + * @author mpolden + */ +public record NodeSlice(OptionalDouble fraction, OptionalLong minCount) { + + public static final NodeSlice ALL = minCount(Long.MAX_VALUE); + + public NodeSlice { + Objects.requireNonNull(fraction); + Objects.requireNonNull(minCount); + if (fraction.isEmpty() == minCount.isEmpty()) { + throw new IllegalArgumentException("Exactly one of 'fraction' or 'minCount' must be set"); + } + if (fraction.isPresent() && fraction.getAsDouble() > 1.0D) { + throw new IllegalArgumentException("Fraction must be <= 1.0, got " + fraction.getAsDouble()); + } + } + + /** Returns whether this slice is satisfied by given node count, out of totalCount */ + public boolean satisfiedBy(long count, long totalCount) { + if (fraction.isPresent()) { + return count >= totalCount * fraction.getAsDouble(); + } + return count >= Math.min(minCount.orElse(0), totalCount); + } + + /** Returns a slice matching the given fraction of nodes */ + public static NodeSlice fraction(double fraction) { + return new NodeSlice(OptionalDouble.of(fraction), OptionalLong.empty()); + } + + /** Returns a slice matching the given minimum number of nodes */ + public static NodeSlice minCount(long count) { + return new NodeSlice(OptionalDouble.empty(), OptionalLong.of(count)); + } + +} diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/zone/UpgradePolicy.java b/config-provisioning/src/main/java/com/yahoo/config/provision/zone/UpgradePolicy.java index 85cc384660d..1c5bfad4c47 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/zone/UpgradePolicy.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/zone/UpgradePolicy.java @@ -4,6 +4,7 @@ package com.yahoo.config.provision.zone; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.Set; /** @@ -12,14 +13,13 @@ import java.util.Set; * * @author mpolden */ -public class UpgradePolicy { +public record UpgradePolicy(List<Step> steps) { - private final List<Set<ZoneApi>> steps; - - private UpgradePolicy(List<Set<ZoneApi>> steps) { + public UpgradePolicy(List<Step> steps) { + Objects.requireNonNull(steps); for (int i = 0; i < steps.size(); i++) { for (int j = 0; j < i; j++) { - if (!Collections.disjoint(steps.get(i), steps.get(j))) { + if (!Collections.disjoint(steps.get(i).zones(), steps.get(j).zones())) { throw new IllegalArgumentException("One or more zones are declared in multiple steps"); } } @@ -27,14 +27,9 @@ public class UpgradePolicy { this.steps = List.copyOf(steps); } - /** Returns the steps in this */ - public List<Set<ZoneApi>> steps() { - return steps; - } - /** Returns a copy of this with the step order inverted */ public UpgradePolicy inverted() { - List<Set<ZoneApi>> copy = new ArrayList<>(steps); + List<Step> copy = new ArrayList<>(steps); Collections.reverse(copy); return new UpgradePolicy(copy); } @@ -43,21 +38,23 @@ public class UpgradePolicy { return new UpgradePolicy.Builder(); } - public static class Builder { + public record Builder(List<Step> steps) { - private final List<Set<ZoneApi>> steps = new ArrayList<>(); + private Builder() { + this(new ArrayList<>()); + } - private Builder() {} + public Builder upgrade(Step step) { + this.steps.add(step); + return this; + } - /** Upgrade given zone as the next step */ public Builder upgrade(ZoneApi zone) { return upgradeInParallel(zone); } - /** Upgrade given zones in parallel as the next step */ public Builder upgradeInParallel(ZoneApi... zone) { - this.steps.add(Set.of(zone)); - return this; + return upgrade(Step.of(zone)); } public UpgradePolicy build() { @@ -66,4 +63,28 @@ public class UpgradePolicy { } + /** + * An upgrade step, consisting of one or more zones. If a step contains multiple zones, those will be upgraded in + * parallel. + */ + public record Step(Set<ZoneApi> zones, NodeSlice nodeSlice) { + + public Step(Set<ZoneApi> zones, NodeSlice nodeSlice) { + if (zones.isEmpty()) throw new IllegalArgumentException("A step must contain at least one zone"); + this.zones = Set.copyOf(Objects.requireNonNull(zones)); + this.nodeSlice = Objects.requireNonNull(nodeSlice); + } + + /** Create a step for given zones, which requires all nodes to complete upgrade */ + public static Step of(ZoneApi... zone) { + return new Step(Set.of(zone), NodeSlice.ALL); + } + + /** Returns a copy of this step, requiring only the given slice of nodes for each zone in this step to upgrade */ + public Step require(NodeSlice slice) { + return new Step(zones, slice); + } + + } + } diff --git a/config-provisioning/src/test/java/com/yahoo/config/provision/zone/NodeSliceTest.java b/config-provisioning/src/test/java/com/yahoo/config/provision/zone/NodeSliceTest.java new file mode 100644 index 00000000000..b13eba5fa32 --- /dev/null +++ b/config-provisioning/src/test/java/com/yahoo/config/provision/zone/NodeSliceTest.java @@ -0,0 +1,32 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.config.provision.zone; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * @author mpolden + */ +public class NodeSliceTest { + + @Test + void node_slice() { + NodeSlice fraction = NodeSlice.fraction(0.6); + assertFalse(fraction.satisfiedBy(0, 4)); + assertFalse(fraction.satisfiedBy(1, 4)); + assertFalse(fraction.satisfiedBy(2, 4)); + assertTrue(fraction.satisfiedBy(3, 4)); + assertTrue(fraction.satisfiedBy(4, 4)); + assertTrue(fraction.satisfiedBy(5, 4)); + + NodeSlice fixed = NodeSlice.minCount(5); + assertFalse(fixed.satisfiedBy(0, 5)); + assertFalse(fixed.satisfiedBy(4, 5)); + assertTrue(fixed.satisfiedBy(3, 3)); + assertTrue(fixed.satisfiedBy(5, 5)); + assertTrue(fixed.satisfiedBy(6, 5)); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java index 82413f21222..1454d78ce33 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.zone.NodeSlice; import com.yahoo.config.provision.zone.UpgradePolicy; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.text.Text; @@ -25,6 +26,7 @@ import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.logging.Logger; +import java.util.stream.Collectors; /** * Base class for maintainers that upgrade zone infrastructure. @@ -57,22 +59,22 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten int failures = 0; // Invert zone order if we're downgrading UpgradePolicy policy = target.downgrade() ? upgradePolicy.inverted() : upgradePolicy; - for (Set<ZoneApi> step : policy.steps()) { + for (UpgradePolicy.Step step : policy.steps()) { boolean converged = true; - for (ZoneApi zone : step) { + for (ZoneApi zone : step.zones()) { try { attempts++; - converged &= upgradeAll(target, applications, zone); + converged &= upgradeAll(target, applications, zone, step.nodeSlice()); } catch (UnreachableNodeRepositoryException e) { failures++; converged = false; log.warning(Text.format("%s: Failed to communicate with node repository in %s, continuing with next parallel zone: %s", - this, zone, Exceptions.toMessageString(e))); + this, zone, Exceptions.toMessageString(e))); } catch (Exception e) { failures++; converged = false; log.warning(Text.format("%s: Failed to upgrade zone: %s, continuing with next parallel zone: %s", - this, zone, Exceptions.toMessageString(e))); + this, zone, Exceptions.toMessageString(e))); } } if (!converged) { @@ -83,7 +85,7 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten } /** Returns whether all applications have converged to the target version in zone */ - private boolean upgradeAll(TARGET target, List<SystemApplication> applications, ZoneApi zone) { + private boolean upgradeAll(TARGET target, List<SystemApplication> applications, ZoneApi zone, NodeSlice nodeSlice) { Map<SystemApplication, Set<SystemApplication>> dependenciesByApplication = new HashMap<>(); if (target.downgrade()) { // Invert dependencies when we're downgrading for (var application : applications) { @@ -100,28 +102,25 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten for (var kv : dependenciesByApplication.entrySet()) { SystemApplication application = kv.getKey(); Set<SystemApplication> dependencies = kv.getValue(); - if (convergedOn(target, dependencies, zone)) { - if (changeTargetTo(target, application, zone)) { + boolean allConverged = dependencies.stream().allMatch(app -> convergedOn(target, app, zone, nodeSlice)); + if (allConverged) { + if (changeTargetTo(target, application, zone, nodeSlice)) { upgrade(target, application, zone); } - converged &= convergedOn(target, application, zone); + converged &= convergedOn(target, application, zone, nodeSlice); } } return converged; } - private boolean convergedOn(TARGET target, Set<SystemApplication> applications, ZoneApi zone) { - return applications.stream().allMatch(application -> convergedOn(target, application, zone)); - } - /** Returns whether target version for application in zone should be changed */ - protected abstract boolean changeTargetTo(TARGET target, SystemApplication application, ZoneApi zone); + protected abstract boolean changeTargetTo(TARGET target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice); /** Upgrade component to target version. Implementation should be idempotent */ protected abstract void upgrade(TARGET target, SystemApplication application, ZoneApi zone); /** Returns whether application has converged to target version in zone */ - protected abstract boolean convergedOn(TARGET target, SystemApplication application, ZoneApi zone); + protected abstract boolean convergedOn(TARGET target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice); /** Returns the version target for the component upgraded by this, if any */ protected abstract Optional<TARGET> target(); @@ -129,19 +128,34 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten /** Returns whether the upgrader should expect given node to upgrade */ protected abstract boolean expectUpgradeOf(Node node, SystemApplication application, ZoneApi zone); - /** Find the minimum value of a version field in a zone by comparing all nodes */ - protected final Optional<Version> minVersion(ZoneApi zone, SystemApplication application, Function<Node, Version> versionField) { + /** Find the highest version used by nodes satisfying nodeSlice in zone. If no such slice exists, the lowest known version is returned */ + protected final Optional<Version> versionOf(NodeSlice nodeSlice, ZoneApi zone, SystemApplication application, Function<Node, Version> versionField) { try { - return controller().serviceRegistry().configServer() - .nodeRepository() - .list(zone.getVirtualId(), NodeFilter.all().applications(application.id())) - .stream() - .filter(node -> expectUpgradeOf(node, application, zone)) - .map(versionField) - .min(Comparator.naturalOrder()); + Map<Version, Long> nodeCountByVersion = controller().serviceRegistry().configServer() + .nodeRepository() + .list(zone.getVirtualId(), NodeFilter.all().applications(application.id())) + .stream() + .filter(node -> expectUpgradeOf(node, application, zone)) + .collect(Collectors.groupingBy(versionField, + Collectors.counting())); + long totalNodes = nodeCountByVersion.values().stream().reduce(Long::sum).orElse(0L); + Set<Version> versionsOfMatchingSlices = new HashSet<>(); + for (var kv : nodeCountByVersion.entrySet()) { + long nodesOnVersion = kv.getValue(); + if (nodeSlice.satisfiedBy(nodesOnVersion, totalNodes)) { + versionsOfMatchingSlices.add(kv.getKey()); + } + } + if (!versionsOfMatchingSlices.isEmpty()) { + // Choose the highest version in case we have several matching slices + return versionsOfMatchingSlices.stream().max(Comparator.naturalOrder()); + } + // No matching slices found, fall back to the lowest known version + return nodeCountByVersion.keySet().stream().min(Comparator.naturalOrder()); } catch (Exception e) { throw new UnreachableNodeRepositoryException(Text.format("Failed to get version for %s in %s: %s", - application.id(), zone, Exceptions.toMessageString(e))); + application.id(), zone, + Exceptions.toMessageString(e))); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java index 4850f005ac7..46b504cadff 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgrader.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.zone.NodeSlice; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.text.Text; import com.yahoo.vespa.hosted.controller.Controller; @@ -54,8 +55,9 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> { } @Override - protected boolean convergedOn(OsVersionTarget target, SystemApplication application, ZoneApi zone) { - return !currentVersion(zone, application, target.osVersion().version()).isBefore(target.osVersion().version()); + protected boolean convergedOn(OsVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) { + Version currentVersion = versionOf(nodeSlice, zone, application, Node::currentOsVersion).orElse(target.osVersion().version()); + return !currentVersion.isBefore(target.osVersion().version()); } @Override @@ -74,7 +76,7 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> { } @Override - protected boolean changeTargetTo(OsVersionTarget target, SystemApplication application, ZoneApi zone) { + protected boolean changeTargetTo(OsVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) { if (!application.shouldUpgradeOs()) return false; return controller().serviceRegistry().configServer().nodeRepository() .targetVersionsOf(zone.getVirtualId()) @@ -83,15 +85,11 @@ public class OsUpgrader extends InfrastructureUpgrader<OsVersionTarget> { .orElse(true); } - private Version currentVersion(ZoneApi zone, SystemApplication application, Version defaultVersion) { - return minVersion(zone, application, Node::currentOsVersion).orElse(defaultVersion); - } - /** Returns the available upgrade budget for given zone */ private Duration zoneBudgetOf(Duration totalBudget, ZoneApi zone) { if (!spendBudgetOn(zone)) return Duration.ZERO; long consecutiveZones = upgradePolicy.steps().stream() - .filter(parallelZones -> parallelZones.stream().anyMatch(this::spendBudgetOn)) + .filter(step -> step.zones().stream().anyMatch(this::spendBudgetOn)) .count(); return totalBudget.dividedBy(consecutiveZones); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java index 8d5851be62f..86587c8e9f7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; +import com.yahoo.config.provision.zone.NodeSlice; import com.yahoo.config.provision.zone.RoutingMethod; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.text.Text; @@ -39,12 +40,12 @@ public class SystemUpgrader extends InfrastructureUpgrader<VespaVersionTarget> { } @Override - protected boolean convergedOn(VespaVersionTarget target, SystemApplication application, ZoneApi zone) { - Optional<Version> minVersion = minVersion(zone, application, Node::currentVersion); + protected boolean convergedOn(VespaVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) { + Optional<Version> currentVersion = versionOf(nodeSlice, zone, application, Node::currentVersion); // Skip application convergence check if there are no nodes belonging to the application in the zone - if (minVersion.isEmpty()) return true; + if (currentVersion.isEmpty()) return true; - return minVersion.get().equals(target.version()) && + return currentVersion.get().equals(target.version()) && application.configConvergedIn(zone.getId(), controller(), Optional.of(target.version())); } @@ -73,13 +74,13 @@ public class SystemUpgrader extends InfrastructureUpgrader<VespaVersionTarget> { } @Override - protected boolean changeTargetTo(VespaVersionTarget target, SystemApplication application, ZoneApi zone) { + protected boolean changeTargetTo(VespaVersionTarget target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice) { if (application.hasApplicationPackage()) { // For applications with package we do not have a zone-wide version target. This means that we must check // the wanted version of each node. boolean zoneHasSharedRouting = controller().zoneRegistry().routingMethods(zone.getId()).stream() .anyMatch(RoutingMethod::isShared); - return minVersion(zone, application, Node::wantedVersion) + return versionOf(nodeSlice, zone, application, Node::wantedVersion) .map(wantedVersion -> !wantedVersion.equals(target.version())) .orElse(zoneHasSharedRouting); // Always upgrade if zone uses shared routing, but has no nodes allocated yet diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java index d6356f294dc..6f9888b79e0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersionStatus.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.versions; import com.google.common.collect.ImmutableMap; import com.yahoo.component.Version; import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.zone.UpgradePolicy; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter; @@ -83,6 +84,7 @@ public record OsVersionStatus(Map<OsVersion, List<NodeVersion>> versions) { private static List<ZoneApi> zonesToUpgrade(Controller controller) { return controller.zoneRegistry().osUpgradePolicies().stream() .flatMap(upgradePolicy -> upgradePolicy.steps().stream()) + .map(UpgradePolicy.Step::zones) .flatMap(Collection::stream) .toList(); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneApiMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneApiMock.java index d173fcb0e18..4bb35d748db 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneApiMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneApiMock.java @@ -76,6 +76,11 @@ public class ZoneApiMock implements ZoneApi { return Objects.hash(id); } + @Override + public String toString() { + return id.toString(); + } + public static class Builder { private SystemName systemName = SystemName.defaultSystem(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java index a128bfb897d..1c8a7ac641d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgraderTest.java @@ -3,7 +3,9 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.zone.NodeSlice; import com.yahoo.config.provision.zone.UpgradePolicy; +import com.yahoo.config.provision.zone.UpgradePolicy.Step; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.hosted.controller.ControllerTester; @@ -45,12 +47,12 @@ public class OsUpgraderTest { ZoneApi zone4 = zone("prod.us-east-3", cloud1); ZoneApi zone5 = zone("prod.us-north-1", cloud2); UpgradePolicy upgradePolicy = UpgradePolicy.builder() - .upgrade(zone0) - .upgrade(zone1) - .upgradeInParallel(zone2, zone3) - .upgrade(zone5) // Belongs to a different cloud and is ignored by this upgrader - .upgrade(zone4) - .build(); + .upgrade(zone0) + .upgrade(zone1) + .upgrade(Step.of(zone2, zone3).require(NodeSlice.minCount(1))) + .upgrade(zone5) // Belongs to a different cloud and is ignored by this upgrader + .upgrade(zone4) + .build(); OsUpgrader osUpgrader = osUpgrader(upgradePolicy, cloud1, false); // Bootstrap system @@ -103,13 +105,14 @@ public class OsUpgraderTest { // zone 4: still on previous version assertWanted(Version.emptyVersion, SystemApplication.tenantHost, zone4); - // zone 2 and 3: completes upgrade - completeUpgrade(version1, SystemApplication.tenantHost, zone2, zone3); + // zone 2 and 3: enough nodes upgrade to satisfy node slice of this step + completeUpgrade(1, version1, SystemApplication.tenantHost, zone2); + completeUpgrade(1, version1, SystemApplication.tenantHost, zone3); assertEquals(Version.emptyVersion, - nodeRepository().list(zone2.getVirtualId(), NodeFilter.all().hostnames(nodeDeferringOsUpgrade.hostname())) - .get(0) - .currentOsVersion(), - "Current version is unchanged for node deferring OS upgrade"); + nodeRepository().list(zone2.getVirtualId(), NodeFilter.all().hostnames(nodeDeferringOsUpgrade.hostname())) + .get(0) + .currentOsVersion(), + "Current version is unchanged for node deferring OS upgrade"); // zone 4: begins upgrading osUpgrader.maintain(); @@ -118,6 +121,9 @@ public class OsUpgraderTest { // zone 4: completes upgrade completeUpgrade(version1, SystemApplication.tenantHost, zone4); + // zone 2 and 3: stragglers complete upgrade + completeUpgrade(version1, SystemApplication.tenantHost, zone2, zone3); + // Next run does nothing as all zones are upgraded osUpgrader.maintain(); assertWanted(version1, SystemApplication.tenantHost, zone1, zone2, zone3, zone4); @@ -223,14 +229,14 @@ public class OsUpgraderTest { osUpgrader.maintain(); assertWanted(version, SystemApplication.tenantHost, zone1); Version chosenVersion = Version.fromString("7.1.1"); // Upgrade mechanism chooses a slightly newer version - completeUpgrade(version, chosenVersion, SystemApplication.tenantHost, zone1); + completeUpgrade(Integer.MAX_VALUE, version, chosenVersion, SystemApplication.tenantHost, zone1); statusUpdater.maintain(); assertEquals(3, nodesOn(chosenVersion).size()); // zone 2 upgrades osUpgrader.maintain(); assertWanted(version, SystemApplication.tenantHost, zone2); - completeUpgrade(version, chosenVersion, SystemApplication.tenantHost, zone2); + completeUpgrade(Integer.MAX_VALUE, version, chosenVersion, SystemApplication.tenantHost, zone2); statusUpdater.maintain(); assertEquals(6, nodesOn(chosenVersion).size()); @@ -272,7 +278,7 @@ public class OsUpgraderTest { ZoneApi... zones) { for (ZoneApi zone : zones) { for (Node node : nodesRequiredToUpgrade(zone, application)) { - assertEquals(version, versionField.apply(node), application + " version in " + zone); + assertEquals(version, versionField.apply(node), application + " version in " + zone.getId()); } } } @@ -305,18 +311,30 @@ public class OsUpgraderTest { /** Simulate OS upgrade of nodes allocated to application. In a real system this is done by the node itself */ private void completeUpgrade(Version version, SystemApplication application, ZoneApi... zones) { - completeUpgrade(version, version, application, zones); + completeUpgrade(-1, version, application, zones); + } + + private void completeUpgrade(int nodeCount, Version version, SystemApplication application, ZoneApi... zones) { + completeUpgrade(nodeCount, version, version, application, zones); } - private void completeUpgrade(Version wantedVersion, Version version, SystemApplication application, ZoneApi... zones) { + private void completeUpgrade(int nodeCount, Version wantedVersion, Version version, SystemApplication application, ZoneApi... zones) { assertWanted(wantedVersion, application, zones); for (ZoneApi zone : zones) { - for (Node node : nodesRequiredToUpgrade(zone, application)) { + int nodesUpgraded = 0; + List<Node> nodes = nodesRequiredToUpgrade(zone, application); + for (Node node : nodes) { + if (node.currentVersion().equals(wantedVersion)) continue; nodeRepository().putNodes(zone.getVirtualId(), Node.builder(node).wantedOsVersion(version) .currentOsVersion(version) .build()); + if (++nodesUpgraded == nodeCount) { + break; + } + } + if (nodesUpgraded == nodes.size()) { + assertCurrent(version, application, zone); } - assertCurrent(version, application, zone); } } @@ -325,7 +343,7 @@ public class OsUpgraderTest { } private OsUpgrader osUpgrader(UpgradePolicy upgradePolicy, CloudName cloud, boolean reprovisionToUpgradeOs) { - var zones = upgradePolicy.steps().stream().flatMap(Collection::stream).collect(Collectors.toList()); + var zones = upgradePolicy.steps().stream().map(Step::zones).flatMap(Collection::stream).collect(Collectors.toList()); tester.zoneRegistry() .setZones(zones) .setOsUpgradePolicy(cloud, upgradePolicy); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java index 3ccd7e695c7..7426e0dd23b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java @@ -68,13 +68,14 @@ public class SystemUpgraderTest { assertWantedVersion(SystemApplication.configServer, version2, zone1); // Other zones remain on previous version assertWantedVersion(SystemApplication.configServer, version1, zone2, zone3, zone4); - // Zone application is not upgraded yet + // Proxy application is not upgraded yet assertWantedVersion(SystemApplication.proxy, version1, zone1, zone2, zone3, zone4); - // zone1: zone-config-server upgrades + // zone1: config server upgrades and proxy application completeUpgrade(SystemApplication.configServer, version2, zone1); + systemUpgrader.maintain(); - // zone 1: proxy-application upgrades + // zone 1: proxy application upgrades systemUpgrader.maintain(); assertWantedVersion(SystemApplication.proxy, version2, zone1); completeUpgrade(SystemApplication.proxy, version2, zone1); @@ -85,41 +86,45 @@ public class SystemUpgraderTest { assertWantedVersion(SystemApplication.configServer, version1, zone2, zone3, zone4); assertWantedVersion(SystemApplication.proxy, version1, zone2, zone3, zone4); - // zone 2 and 3: upgrade does not start until zone 1 zone-application config converges + // zone 2 and 3: upgrade does not start until zone 1 proxy application config converges systemUpgrader.maintain(); assertWantedVersion(SystemApplication.configServer, version1, zone2, zone3); convergeServices(SystemApplication.proxy, zone1); - // zone 2 and 3: zone-config-server upgrades, first in zone 2, then in zone 3 + // zone 2 and 3: config server upgrades, first in zone 2, then in zone 3 systemUpgrader.maintain(); assertWantedVersion(SystemApplication.configServer, version2, zone2, zone3); assertWantedVersion(SystemApplication.configServer, version1, zone4); assertWantedVersion(SystemApplication.proxy, version1, zone2, zone3, zone4); completeUpgrade(SystemApplication.configServer, version2, zone2); - // zone-application starts upgrading in zone 2, while zone-config-server completes upgrade in zone 3 + // proxy application starts upgrading in zone 2, while config server completes upgrade in zone 3 systemUpgrader.maintain(); assertWantedVersion(SystemApplication.proxy, version2, zone2); assertWantedVersion(SystemApplication.proxy, version1, zone3); completeUpgrade(SystemApplication.configServer, version2, zone3); - // zone 2 and 3: proxy-application upgrades in parallel + // zone 2 and 3: proxy application upgrades in parallel systemUpgrader.maintain(); assertWantedVersion(SystemApplication.proxy, version2, zone2, zone3); completeUpgrade(SystemApplication.proxy, version2, zone2, zone3); convergeServices(SystemApplication.proxy, zone2, zone3); - // zone 4: zone-config-server upgrades + // zone 4: config server upgrades systemUpgrader.maintain(); assertWantedVersion(SystemApplication.configServer, version2, zone4); assertWantedVersion(SystemApplication.proxy, version1, zone4); - completeUpgrade(SystemApplication.configServer, version2, zone4); + // zone 4: proxy application does not upgrade until all config servers are done + completeUpgrade(2, SystemApplication.configServer, version2, zone4); + systemUpgrader.maintain(); + assertWantedVersion(SystemApplication.proxy, version1, zone4); + completeUpgrade(1, SystemApplication.configServer, version2, zone4); // System version remains unchanged until final application upgrades tester.computeVersionStatus(); assertSystemVersion(version1); - // zone 4: proxy-application upgrades + // zone 4: proxy application upgrades systemUpgrader.maintain(); assertWantedVersion(SystemApplication.proxy, version2, zone4); completeUpgrade(SystemApplication.proxy, version2, zone4); @@ -157,7 +162,7 @@ public class SystemUpgraderTest { systemUpgrader.maintain(); completeUpgrade(SystemApplication.proxy, version2, zone1); tester.computeVersionStatus(); - assertSystemVersion(version1); // Unchanged until proxy-application converges + assertSystemVersion(version1); // Unchanged until proxy application converges // Controller upgrades again Version version3 = Version.fromString("6.7"); @@ -165,7 +170,7 @@ public class SystemUpgraderTest { assertSystemVersion(version1); assertControllerVersion(version3); - // zone 1: proxy-application converges and system version changes + // zone 1: proxy application converges and system version changes convergeServices(SystemApplication.proxy, zone1); tester.computeVersionStatus(); assertSystemVersion(version2); @@ -370,16 +375,28 @@ public class SystemUpgraderTest { tester.computeVersionStatus(); } - /** Simulate upgrade of nodes allocated to given application. In a real system this is done by the node itself */ private void completeUpgrade(SystemApplication application, Version version, ZoneApi first, ZoneApi... rest) { + completeUpgrade(-1, application, version, first, rest); + } + + /** Simulate upgrade of nodes allocated to given application. In a real system this is done by the node itself */ + private void completeUpgrade(int nodeCount, SystemApplication application, Version version, ZoneApi first, ZoneApi... rest) { assertWantedVersion(application, version, first, rest); Stream.concat(Stream.of(first), Stream.of(rest)).forEach(zone -> { - for (Node node : listNodes(zone, application)) { + int nodesUpgraded = 0; + List<Node> nodes = listNodes(zone, application); + for (Node node : nodes) { + if (node.currentVersion().equals(node.wantedVersion())) continue; nodeRepository().putNodes( zone.getId(), Node.builder(node).currentVersion(node.wantedVersion()).build()); + if (++nodesUpgraded == nodeCount) { + break; + } + } + if (nodesUpgraded == nodes.size()) { + assertCurrentVersion(application, version, zone); } - assertCurrentVersion(application, version, zone); }); } |