summaryrefslogtreecommitdiffstats
path: root/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
blob: 06712ad7862aee6d5aa3286ab2101f00426a49c7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;

import com.yahoo.component.Version;
import com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy;
import com.yahoo.vespa.curator.Lock;
import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.application.ApplicationList;
import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
import com.yahoo.vespa.hosted.controller.versions.VespaVersion;
import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence;
import com.yahoo.yolean.Exceptions;

import java.time.Duration;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Maintenance job which schedules applications for Vespa version upgrade
 *
 * @author bratseth
 * @author mpolden
 */
public class Upgrader extends Maintainer {

    private static final Logger log = Logger.getLogger(Upgrader.class.getName());

    private final CuratorDb curator;

    public Upgrader(Controller controller, Duration interval, JobControl jobControl, CuratorDb curator) {
        super(controller, interval, jobControl);
        this.curator = Objects.requireNonNull(curator, "curator cannot be null");
    }

    /**
     * Schedule application upgrades. Note that this implementation must be idempotent.
     */
    @Override
    public void maintain() {
        // Determine target versions for each upgrade policy
        Optional<Version> canaryTarget = controller().versionStatus().systemVersion().map(VespaVersion::versionNumber);
        Optional<Version> defaultTarget = newestVersionWithConfidence(Confidence.normal);
        Optional<Version> conservativeTarget = newestVersionWithConfidence(Confidence.high);

        // Cancel upgrades to broken targets (let other ongoing upgrades complete to avoid starvation)
        for (VespaVersion version : controller().versionStatus().versions()) {
            if (version.confidence() == Confidence.broken)
                cancelUpgradesOf(applications().without(UpgradePolicy.canary).upgradingTo(version.versionNumber()),
                                 version.versionNumber() + " is broken");
        }

        // Canaries should always try the canary target
        cancelUpgradesOf(applications().with(UpgradePolicy.canary).upgrading().notUpgradingTo(canaryTarget),
                         "Outdated target version for Canaries");

        // Cancel *failed* upgrades to earlier versions, as the new version may fix it
        String reason = "Failing on outdated version";
        cancelUpgradesOf(applications().with(UpgradePolicy.defaultPolicy).upgrading().failing().notUpgradingTo(defaultTarget), reason);
        cancelUpgradesOf(applications().with(UpgradePolicy.conservative).upgrading().failing().notUpgradingTo(conservativeTarget), reason);

        // Schedule the right upgrades

        canaryTarget.ifPresent(target -> upgrade(applications().with(UpgradePolicy.canary), target));
        defaultTarget.ifPresent(target -> upgrade(applications().with(UpgradePolicy.defaultPolicy), target));
        conservativeTarget.ifPresent(target -> upgrade(applications().with(UpgradePolicy.conservative), target));
    }

    private Optional<Version> newestVersionWithConfidence(Confidence confidence) {
        return reversed(controller().versionStatus().versions()).stream()
                                                                // Ensure we never pick a version newer than the system
                                                                .filter(v -> !v.versionNumber().isAfter(controller().systemVersion()))
                                                                .filter(v -> v.confidence().equalOrHigherThan(confidence))
                                                                .findFirst()
                                                                .map(VespaVersion::versionNumber);
    }

    private List<VespaVersion> reversed(List<VespaVersion> versions) {
        List<VespaVersion> reversed = new ArrayList<>(versions.size());
        for (int i = 0; i < versions.size(); i++)
            reversed.add(versions.get(versions.size() - 1 - i));
        return reversed;
    }

    /** Returns a list of all applications */
    private ApplicationList applications() { return ApplicationList.from(controller().applications().asList()); }

    private void upgrade(ApplicationList applications, Version version) {
        applications = applications.hasProductionDeployment();
        applications = applications.onLowerVersionThan(version);
        applications = applications.allowMajorVersion(version.getMajor());
        applications = applications.notDeployingAt(controller().clock().instant()); // wait with applications deploying an application change or already upgrading
        applications = applications.notFailingOn(version); // try to upgrade only if it hasn't failed on this version
        applications = applications.canUpgradeAt(controller().clock().instant()); // wait with applications that are currently blocking upgrades
        applications = applications.byIncreasingDeployedVersion(); // start with lowest versions
        applications = applications.first(numberOfApplicationsToUpgrade()); // throttle upgrades
        for (Application application : applications.asList()) {
            try {
                controller().applications().deploymentTrigger().triggerChange(application.id(), application.change().with(version));
            } catch (IllegalArgumentException e) {
                log.log(Level.INFO, "Could not trigger change: " + Exceptions.toMessageString(e));
            }
        }
    }

    private void cancelUpgradesOf(ApplicationList applications, String reason) {
        if (applications.isEmpty()) return;
        log.info("Cancelling upgrading of " + applications.asList().size() + " applications: " + reason);
        for (Application application : applications.asList())
            controller().applications().deploymentTrigger().cancelChange(application.id(), true);
    }

    /** Returns the number of applications to upgrade in this run */
    private int numberOfApplicationsToUpgrade() {
        return Math.max(1, (int) (maintenanceInterval().getSeconds() * (upgradesPerMinute() / 60)));
    }

    /** Returns number of upgrades per minute */
    public double upgradesPerMinute() {
        return curator.readUpgradesPerMinute();
    }

    /** Sets the number of upgrades per minute */
    public void setUpgradesPerMinute(double n) {
        curator.writeUpgradesPerMinute(n);
    }

    /** Override confidence for given version. This will cause the computed confidence to be ignored */
    public void overrideConfidence(Version version, Confidence confidence) {
        try (Lock lock = curator.lockConfidenceOverrides()) {
            Map<Version, Confidence> overrides = new LinkedHashMap<>(curator.readConfidenceOverrides());
            overrides.put(version, confidence);
            curator.writeConfidenceOverrides(overrides);
        }
    }

    /** Returns all confidence overrides */
    public Map<Version, Confidence> confidenceOverrides() {
        return curator.readConfidenceOverrides();
    }

    /** Remove confidence override for given version */
    public void removeConfidenceOverride(Version version) {
        controller().removeConfidenceOverride(v -> v.equals(version));
    }
}