aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2022-01-05 10:50:42 +0100
committerGitHub <noreply@github.com>2022-01-05 10:50:42 +0100
commitcfaa0be19da00466a73b57a9630477eabe46a8bd (patch)
treeae162f69ebf3a80dac151fb875454f5e295b727b /controller-server
parent0188f4acf60f33b09940c35c6e1c4d435ceb2891 (diff)
parent4794cba6b41db063433e35e0d95e5bc879b06be7 (diff)
Merge pull request #20654 from vespa-engine/mpolden/skip-rescheduling
Do not reschedule upgrade for instances without production steps
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java29
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java83
2 files changed, 60 insertions, 52 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java
index 0e8a59134ed..85762cf530a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java
@@ -1,7 +1,6 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.application;
-import com.google.common.collect.ImmutableMap;
import com.yahoo.collections.AbstractFilteringList;
import com.yahoo.component.Version;
import com.yahoo.config.application.api.DeploymentSpec;
@@ -14,6 +13,7 @@ import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatusList;
import java.time.Instant;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -31,16 +31,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL
private InstanceList(Collection<? extends ApplicationId> items, boolean negate, Map<ApplicationId, DeploymentStatus> instances) {
super(items, negate, (i, n) -> new InstanceList(i, n, instances));
- this.instances = instances;
- }
-
- public static InstanceList from(DeploymentStatusList statuses) {
- ImmutableMap.Builder<ApplicationId, DeploymentStatus> builder = ImmutableMap.builder();
- for (DeploymentStatus status : statuses.asList())
- for (InstanceName instance : status.application().deploymentSpec().instanceNames())
- builder.put(status.application().id().instance(instance), status);
- Map<ApplicationId, DeploymentStatus> map = builder.build();
- return new InstanceList(map.keySet(), false, map);
+ this.instances = Map.copyOf(instances);
}
/**
@@ -62,7 +53,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL
.map(readyAt -> ! readyAt.isAfter(instant)).orElse(false));
}
- /** Returns the subset of instances which have at least one productiog deployment */
+ /** Returns the subset of instances which have at least one production deployment */
public InstanceList withProductionDeployment() {
return matching(id -> instance(id).productionDeployments().size() > 0);
}
@@ -80,9 +71,9 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL
.anyMatch(deployment -> deployment.version().isBefore(version)));
}
- /** Returns the subset of instances which have changes left to deploy; blocked, or deploying */
- public InstanceList withChanges() {
- return matching(id -> instance(id).change().hasTargets() || instances.get(id).outstandingChange(id.instance()).hasTargets());
+ /** Returns the subset of instances that has completed deployment of given change */
+ public InstanceList hasCompleted(Change change) {
+ return matching(id -> instances.get(id).jobsToRun(Map.of(id.instance(), change)).isEmpty());
}
/** Returns the subset of instances which are currently deploying a change */
@@ -153,4 +144,12 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL
return application(id).require(id.instance());
}
+ public static InstanceList from(DeploymentStatusList statuses) {
+ Map<ApplicationId, DeploymentStatus> instances = new HashMap<>();
+ for (DeploymentStatus status : statuses.asList())
+ for (InstanceName instance : status.application().deploymentSpec().instanceNames())
+ instances.put(status.application().id().instance(instance), status);
+ return new InstanceList(instances.keySet(), false, instances);
+ }
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
index 7a7eee7183b..9af838b1f55 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
@@ -58,39 +58,8 @@ public class Upgrader extends ControllerMaintainer {
Collection<Version> defaultTargets = targetVersions(Confidence.normal, versionStatus);
Collection<Version> conservativeTargets = targetVersions(Confidence.high, versionStatus);
- // Cancel upgrades to broken targets (let other ongoing upgrades complete to avoid starvation)
- for (VespaVersion version : versionStatus.versions()) {
- if (version.confidence() == Confidence.broken)
- cancelUpgradesOf(instances().upgradingTo(version.versionNumber())
- .not().with(UpgradePolicy.canary),
- version.versionNumber() + " is broken");
- }
-
- // Canaries should always try the canary target
- cancelUpgradesOf(instances().upgrading()
- .not().upgradingTo(canaryTarget)
- .with(UpgradePolicy.canary),
- "Outdated target version for Canaries");
-
- // Cancel *failed* upgrades to earlier versions, as the new version may fix it
- String reason = "Failing on outdated version";
- cancelUpgradesOf(instances().upgrading()
- .failing()
- .not().upgradingTo(defaultTargets)
- .with(UpgradePolicy.defaultPolicy),
- reason);
- cancelUpgradesOf(instances().upgrading()
- .failing()
- .not().upgradingTo(conservativeTargets)
- .with(UpgradePolicy.conservative),
- reason);
-
- // Schedule the right upgrades
- InstanceList instances = instances();
- Optional<Integer> targetMajorVersion = targetMajorVersion();
- upgrade(instances.with(UpgradePolicy.canary), canaryTarget, targetMajorVersion, instances.size());
- defaultTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.defaultPolicy), target, targetMajorVersion, numberOfApplicationsToUpgrade()));
- conservativeTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.conservative), target, targetMajorVersion, numberOfApplicationsToUpgrade()));
+ cancelUpgrades(versionStatus, canaryTarget, defaultTargets, conservativeTargets);
+ upgrade(versionStatus, canaryTarget, defaultTargets, conservativeTargets);
return 1.0;
}
@@ -108,22 +77,62 @@ public class Upgrader extends ControllerMaintainer {
}
/** Returns a list of all production application instances, except those which are pinned, which we should not manipulate here. */
- private InstanceList instances() {
- return InstanceList.from(controller().jobController().deploymentStatuses(ApplicationList.from(controller().applications().readable())))
+ private InstanceList instances(Version systemVersion) {
+ return InstanceList.from(controller().jobController().deploymentStatuses(ApplicationList.from(controller().applications().readable()), systemVersion))
.withDeclaredJobs()
.unpinned();
}
+ private void cancelUpgrades(VersionStatus versionStatus, Version canaryTarget, Collection<Version> defaultTargets, Collection<Version> conservativeTargets) {
+ InstanceList instances = instances(controller().systemVersion(versionStatus));
+ // Cancel upgrades to broken targets (let other ongoing upgrades complete to avoid starvation)
+ for (VespaVersion version : versionStatus.versions()) {
+ if (version.confidence() == Confidence.broken)
+ cancelUpgradesOf(instances.upgradingTo(version.versionNumber())
+ .not().with(UpgradePolicy.canary),
+ version.versionNumber() + " is broken");
+ }
+
+ // Canaries should always try the canary target
+ cancelUpgradesOf(instances.upgrading()
+ .not().upgradingTo(canaryTarget)
+ .with(UpgradePolicy.canary),
+ "Outdated target version for Canaries");
+
+ // Cancel *failed* upgrades to earlier versions, as the new version may fix it
+ String reason = "Failing on outdated version";
+ cancelUpgradesOf(instances.upgrading()
+ .failing()
+ .not().upgradingTo(defaultTargets)
+ .with(UpgradePolicy.defaultPolicy),
+ reason);
+ cancelUpgradesOf(instances.upgrading()
+ .failing()
+ .not().upgradingTo(conservativeTargets)
+ .with(UpgradePolicy.conservative),
+ reason);
+ }
+
+ private void upgrade(VersionStatus versionStatus, Version canaryTarget, Collection<Version> defaultTargets, Collection<Version> conservativeTargets) {
+ InstanceList instances = instances(controller().systemVersion(versionStatus));
+ Optional<Integer> targetMajorVersion = targetMajorVersion();
+ upgrade(instances.with(UpgradePolicy.canary), canaryTarget, targetMajorVersion, instances.size());
+ defaultTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.defaultPolicy), target, targetMajorVersion, numberOfApplicationsToUpgrade()));
+ conservativeTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.conservative), target, targetMajorVersion, numberOfApplicationsToUpgrade()));
+ }
+
private void upgrade(InstanceList instances, Version version, Optional<Integer> targetMajorVersion, int numberToUpgrade) {
+ Change change = Change.of(version);
instances.not().failingOn(version)
.allowMajorVersion(version.getMajor(), targetMajorVersion.orElse(version.getMajor()))
.not().deploying()
+ .not().hasCompleted(change) // Avoid rescheduling change for instances without production steps
.onLowerVersionThan(version)
.canUpgradeAt(version, controller().clock().instant())
.shuffle(random) // Shuffle so we do not always upgrade instances in the same order
.byIncreasingDeployedVersion()
- .first(numberToUpgrade).asList()
- .forEach(instance -> controller().applications().deploymentTrigger().triggerChange(instance, Change.of(version)));
+ .first(numberToUpgrade)
+ .forEach(instance -> controller().applications().deploymentTrigger().triggerChange(instance, change));
}
private void cancelUpgradesOf(InstanceList instances, String reason) {