From 129aba3a591be60a5194aae79e3f6c1765efc222 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Fri, 15 May 2020 17:03:37 +0200 Subject: Shuffle upgrades * Avoids instances always upgrading in the same order * Prevents clumping together upgrades of instances under the same tenant --- .../controller/application/InstanceList.java | 33 +++++++------- .../hosted/controller/maintenance/Upgrader.java | 5 ++- .../controller/maintenance/UpgraderTest.java | 52 ++++++++++++++++++++++ 3 files changed, 74 insertions(+), 16 deletions(-) (limited to 'controller-server') diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java index cc72074295f..773c927b588 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java @@ -21,13 +21,16 @@ import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.outOfCapaci import static java.util.Comparator.comparing; import static java.util.Comparator.naturalOrder; +/** + * @author jonmv + */ public class InstanceList extends AbstractFilteringList { - private final Map statuses; + private final Map instances; - private InstanceList(Collection items, boolean negate, Map statuses) { - super(items, negate, (i, n) -> new InstanceList(i, n, statuses)); - this.statuses = statuses; + private InstanceList(Collection items, boolean negate, Map instances) { + super(items, negate, (i, n) -> new InstanceList(i, n, instances)); + this.instances = instances; } public static InstanceList from(DeploymentStatusList statuses) { @@ -53,9 +56,9 @@ public class InstanceList extends AbstractFilteringList statuses.get(id).instanceSteps().get(id.instance()) - .readyAt(Change.of(version)) - .map(readyAt -> ! readyAt.isAfter(instant)).orElse(false)); + return matching(id -> instances.get(id).instanceSteps().get(id.instance()) + .readyAt(Change.of(version)) + .map(readyAt -> ! readyAt.isAfter(instant)).orElse(false)); } /** Returns the subset of instances which have at least one productiog deployment */ @@ -71,7 +74,7 @@ public class InstanceList extends AbstractFilteringList instance(id).change().hasTargets() || statuses.get(id).outstandingChange(id.instance()).hasTargets()); + return matching(id -> instance(id).change().hasTargets() || instances.get(id).outstandingChange(id.instance()).hasTargets()); } /** Returns the subset of instances which are currently deploying a change */ @@ -81,9 +84,9 @@ public class InstanceList extends AbstractFilteringList ! statuses.get(id).instanceJobs().get(id).failing() - .not().withStatus(outOfCapacity) - .lastCompleted().on(version).isEmpty()); + return matching(id -> ! instances.get(id).instanceJobs().get(id).failing() + .not().withStatus(outOfCapacity) + .lastCompleted().on(version).isEmpty()); } /** Returns the subset of instances which are not pinned to a certain Vespa version. */ @@ -93,12 +96,12 @@ public class InstanceList extends AbstractFilteringList ! statuses.get(id).instanceJobs().get(id).failing().not().withStatus(outOfCapacity).isEmpty()); + return matching(id -> ! instances.get(id).instanceJobs().get(id).failing().not().withStatus(outOfCapacity).isEmpty()); } /** Returns the subset of instances which are currently failing an upgrade. */ public InstanceList failingUpgrade() { - return matching(id -> ! statuses.get(id).instanceJobs().get(id).failing().not().failingApplicationChange().isEmpty()); + return matching(id -> ! instances.get(id).instanceJobs().get(id).failing().not().failingApplicationChange().isEmpty()); } /** Returns the subset of instances which are upgrading (to any version), not considering block windows. */ @@ -123,7 +126,7 @@ public class InstanceList extends AbstractFilteringList ! statuses.get(id).instanceJobs().get(id).firstFailing().on(version).isEmpty()); + return matching(id -> ! instances.get(id).instanceJobs().get(id).firstFailing().on(version).isEmpty()); } /** Returns this list sorted by increasing oldest production deployment version. Applications without any deployments are ordered first. */ @@ -135,7 +138,7 @@ public class InstanceList extends AbstractFilteringList controller().applications().deploymentTrigger().triggerChange(instance, Change.of(version))); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index b49686237d8..cc92c6cd271 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -2,21 +2,30 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.test.ManualClock; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; import com.yahoo.vespa.hosted.controller.deployment.DeploymentContext; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; +import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import org.junit.Test; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; +import java.util.Set; +import java.util.stream.Collectors; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.devUsEast1; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.productionUsCentral1; @@ -1063,6 +1072,49 @@ public class UpgraderTest { assertEquals(174, upgrades); } + @Test + public void testUpgradeShuffling() { + // Deploy applications on initial version + var default0 = createAndDeploy("default0", "default"); + var default1 = createAndDeploy("default1", "default"); + var default2 = createAndDeploy("default2", "default"); + var applications = Map.of(default0.instanceId(), default0, + default1.instanceId(), default1, + default2.instanceId(), default2); + + // Throttle upgrades per run + ((ManualClock) tester.controller().clock()).setInstant(Instant.ofEpochMilli(1589787109000L)); // Fixed random seed + Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10), + tester.controllerTester().curator()); + upgrader.setUpgradesPerMinute(0.1); + + // Trigger some upgrades + List versions = List.of(Version.fromString("6.2"), Version.fromString("6.3")); + Set> upgradeOrders = new HashSet<>(versions.size()); + for (var version : versions) { + // Upgrade system + tester.controllerTester().upgradeSystem(version); + List upgraderOrder = new ArrayList<>(applications.size()); + + // Upgrade all applications + for (int i = 0; i < applications.size(); i++) { + upgrader.maintain(); + tester.triggerJobs(); + Set triggered = tester.jobs().active().stream() + .map(Run::id) + .map(RunId::application) + .collect(Collectors.toSet()); + assertEquals("Expected number of applications is triggered", 1, triggered.size()); + ApplicationId application = triggered.iterator().next(); + upgraderOrder.add(application); + applications.get(application).completeRollout(); + tester.clock().advance(Duration.ofMinutes(1)); + } + upgradeOrders.add(upgraderOrder); + } + assertEquals("Upgrade orders are distinct", versions.size(), upgradeOrders.size()); + } + private ApplicationPackage applicationPackage(String upgradePolicy) { return new ApplicationPackageBuilder().upgradePolicy(upgradePolicy) .region("us-west-1") -- cgit v1.2.3