diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-05-18 10:33:27 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-18 10:33:27 +0200 |
commit | 51914f5d996ef405968ba44d08c25a0455ec56f8 (patch) | |
tree | 9a77b14790f9417640aeceee27a9d8881b572af1 /controller-server | |
parent | 8f7170a652e8abaa496b10d3c629559997e123c7 (diff) | |
parent | 129aba3a591be60a5194aae79e3f6c1765efc222 (diff) |
Merge pull request #13274 from vespa-engine/mpolden/shuffle-upgrades
Shuffle upgrades
Diffstat (limited to 'controller-server')
3 files changed, 74 insertions, 16 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java index cc72074295f..773c927b588 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java @@ -21,13 +21,16 @@ import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.outOfCapaci import static java.util.Comparator.comparing; import static java.util.Comparator.naturalOrder; +/** + * @author jonmv + */ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceList> { - private final Map<ApplicationId, DeploymentStatus> statuses; + private final Map<ApplicationId, DeploymentStatus> instances; - private InstanceList(Collection<? extends ApplicationId> items, boolean negate, Map<ApplicationId, DeploymentStatus> statuses) { - super(items, negate, (i, n) -> new InstanceList(i, n, statuses)); - this.statuses = statuses; + private InstanceList(Collection<? extends ApplicationId> items, boolean negate, Map<ApplicationId, DeploymentStatus> instances) { + super(items, negate, (i, n) -> new InstanceList(i, n, instances)); + this.instances = instances; } public static InstanceList from(DeploymentStatusList statuses) { @@ -53,9 +56,9 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL /** Returns the subset of instances that are allowed to upgrade to the given version at the given time */ public InstanceList canUpgradeAt(Version version, Instant instant) { - return matching(id -> statuses.get(id).instanceSteps().get(id.instance()) - .readyAt(Change.of(version)) - .map(readyAt -> ! readyAt.isAfter(instant)).orElse(false)); + return matching(id -> instances.get(id).instanceSteps().get(id.instance()) + .readyAt(Change.of(version)) + .map(readyAt -> ! readyAt.isAfter(instant)).orElse(false)); } /** Returns the subset of instances which have at least one productiog deployment */ @@ -71,7 +74,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL /** Returns the subset of instances which have changes left to deploy; blocked, or deploying */ public InstanceList withChanges() { - return matching(id -> instance(id).change().hasTargets() || statuses.get(id).outstandingChange(id.instance()).hasTargets()); + return matching(id -> instance(id).change().hasTargets() || instances.get(id).outstandingChange(id.instance()).hasTargets()); } /** Returns the subset of instances which are currently deploying a change */ @@ -81,9 +84,9 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL /** Returns the subset of instances which currently have failing jobs on the given version */ public InstanceList failingOn(Version version) { - return matching(id -> ! statuses.get(id).instanceJobs().get(id).failing() - .not().withStatus(outOfCapacity) - .lastCompleted().on(version).isEmpty()); + return matching(id -> ! instances.get(id).instanceJobs().get(id).failing() + .not().withStatus(outOfCapacity) + .lastCompleted().on(version).isEmpty()); } /** Returns the subset of instances which are not pinned to a certain Vespa version. */ @@ -93,12 +96,12 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL /** Returns the subset of instances which are not currently failing any jobs. */ public InstanceList failing() { - return matching(id -> ! statuses.get(id).instanceJobs().get(id).failing().not().withStatus(outOfCapacity).isEmpty()); + return matching(id -> ! instances.get(id).instanceJobs().get(id).failing().not().withStatus(outOfCapacity).isEmpty()); } /** Returns the subset of instances which are currently failing an upgrade. */ public InstanceList failingUpgrade() { - return matching(id -> ! statuses.get(id).instanceJobs().get(id).failing().not().failingApplicationChange().isEmpty()); + return matching(id -> ! instances.get(id).instanceJobs().get(id).failing().not().failingApplicationChange().isEmpty()); } /** Returns the subset of instances which are upgrading (to any version), not considering block windows. */ @@ -123,7 +126,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL /** Returns the subset of instances which started failing on the given version */ public InstanceList startedFailingOn(Version version) { - return matching(id -> ! statuses.get(id).instanceJobs().get(id).firstFailing().on(version).isEmpty()); + return matching(id -> ! instances.get(id).instanceJobs().get(id).firstFailing().on(version).isEmpty()); } /** Returns this list sorted by increasing oldest production deployment version. Applications without any deployments are ordered first. */ @@ -135,7 +138,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL } private Application application(ApplicationId id) { - return statuses.get(id).application(); + return instances.get(id).application(); } private Instance instance(ApplicationId id) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 6df467c8790..bdb7b0860e5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; -import com.yahoo.concurrent.maintenance.JobControl; import com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy; import com.yahoo.config.provision.ApplicationId; import com.yahoo.vespa.curator.Lock; @@ -20,6 +19,7 @@ import java.util.LinkedHashMap; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Random; import java.util.function.BinaryOperator; import java.util.function.Function; import java.util.logging.Logger; @@ -39,10 +39,12 @@ public class Upgrader extends ControllerMaintainer { private static final Logger log = Logger.getLogger(Upgrader.class.getName()); private final CuratorDb curator; + private final Random random; public Upgrader(Controller controller, Duration interval, CuratorDb curator) { super(controller, interval); this.curator = Objects.requireNonNull(curator, "curator cannot be null"); + this.random = new Random(controller.clock().instant().toEpochMilli()); // Seed with clock for test determinism } /** @@ -115,6 +117,7 @@ public class Upgrader extends ControllerMaintainer { .not().deploying() .onLowerVersionThan(version) .canUpgradeAt(version, controller().clock().instant()) + .shuffle(random) // Shuffle so we do not always upgrade instances in the same order .byIncreasingDeployedVersion() .first(numberToUpgrade).asList() .forEach(instance -> controller().applications().deploymentTrigger().triggerChange(instance, Change.of(version))); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index b49686237d8..cc92c6cd271 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -2,21 +2,30 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.test.ManualClock; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; import com.yahoo.vespa.hosted.controller.deployment.DeploymentContext; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; +import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import org.junit.Test; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; +import java.util.Set; +import java.util.stream.Collectors; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.devUsEast1; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.productionUsCentral1; @@ -1063,6 +1072,49 @@ public class UpgraderTest { assertEquals(174, upgrades); } + @Test + public void testUpgradeShuffling() { + // Deploy applications on initial version + var default0 = createAndDeploy("default0", "default"); + var default1 = createAndDeploy("default1", "default"); + var default2 = createAndDeploy("default2", "default"); + var applications = Map.of(default0.instanceId(), default0, + default1.instanceId(), default1, + default2.instanceId(), default2); + + // Throttle upgrades per run + ((ManualClock) tester.controller().clock()).setInstant(Instant.ofEpochMilli(1589787109000L)); // Fixed random seed + Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10), + tester.controllerTester().curator()); + upgrader.setUpgradesPerMinute(0.1); + + // Trigger some upgrades + List<Version> versions = List.of(Version.fromString("6.2"), Version.fromString("6.3")); + Set<List<ApplicationId>> upgradeOrders = new HashSet<>(versions.size()); + for (var version : versions) { + // Upgrade system + tester.controllerTester().upgradeSystem(version); + List<ApplicationId> upgraderOrder = new ArrayList<>(applications.size()); + + // Upgrade all applications + for (int i = 0; i < applications.size(); i++) { + upgrader.maintain(); + tester.triggerJobs(); + Set<ApplicationId> triggered = tester.jobs().active().stream() + .map(Run::id) + .map(RunId::application) + .collect(Collectors.toSet()); + assertEquals("Expected number of applications is triggered", 1, triggered.size()); + ApplicationId application = triggered.iterator().next(); + upgraderOrder.add(application); + applications.get(application).completeRollout(); + tester.clock().advance(Duration.ofMinutes(1)); + } + upgradeOrders.add(upgraderOrder); + } + assertEquals("Upgrade orders are distinct", versions.size(), upgradeOrders.size()); + } + private ApplicationPackage applicationPackage(String upgradePolicy) { return new ApplicationPackageBuilder().upgradePolicy(upgradePolicy) .region("us-west-1") |