aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java
blob: bec7c40d2a9e03f8f7e35a1f5e946d3dc700af4c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller;

import com.yahoo.component.Version;
import com.yahoo.config.provision.CloudName;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
import com.yahoo.vespa.hosted.controller.versions.CertifiedOsVersion;
import com.yahoo.vespa.hosted.controller.versions.OsVersion;
import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus;
import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget;

import java.time.Instant;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.logging.Logger;
import java.util.stream.Collectors;

/**
 *  A singleton owned by {@link Controller} which contains the methods and state for controlling OS upgrades.
 *
 * @author mpolden
 */
public record OsController(Controller controller) {

    private static final Logger LOG = Logger.getLogger(OsController.class.getName());

    public OsController {
        Objects.requireNonNull(controller);
    }

    /** Returns the target OS version for infrastructure in this system. The controller will drive infrastructure OS
     * upgrades to this version */
    public Optional<OsVersionTarget> target(CloudName cloud) {
        return targets().stream().filter(target -> target.osVersion().cloud().equals(cloud)).findFirst();
    }

    /** Returns all target OS versions in this system */
    public Set<OsVersionTarget> targets() {
        return curator().readOsVersionTargets();
    }

    /**
     * Set the target OS version for given cloud in this system.
     *
     * @param version The target OS version
     * @param cloud   The cloud to upgrade
     * @param force   Allow downgrades, and override pinned target (if any)
     * @param pin     Pin this version. This prevents automatic scheduling of upgrades until version is unpinned
     */
    public void upgradeTo(Version version, CloudName cloud, boolean force, boolean pin) {
        requireNonEmpty(version);
        requireCloud(cloud);
        Instant scheduledAt = controller.clock().instant();
        try (Mutex lock = curator().lockOsVersions()) {
            Map<CloudName, OsVersionTarget> targets = curator().readOsVersionTargets().stream()
                                                               .collect(Collectors.toMap(t -> t.osVersion().cloud(),
                                                                                          Function.identity()));

            OsVersionTarget currentTarget = targets.get(cloud);
            boolean downgrade = false;
            if (currentTarget != null) {
                boolean versionChange = !currentTarget.osVersion().version().equals(version);
                downgrade = version.isBefore(currentTarget.osVersion().version());
                if (versionChange && currentTarget.pinned() && !force) {
                    throw new IllegalArgumentException("Cannot " + (downgrade ? "downgrade" : "upgrade") + " cloud " +
                                                       cloud.value() + "' to version " + version.toFullString() +
                                                       ": Current target is pinned. Add 'force' parameter to override");
                }
                if (downgrade && !force) {
                    throw new IllegalArgumentException("Cannot downgrade cloud '" + cloud.value() + "' to version " +
                                                       version.toFullString() + ": Missing 'force' parameter");
                }
                if (!versionChange && currentTarget.pinned() == pin) return; // No change
            }

            OsVersionTarget newTarget = new OsVersionTarget(new OsVersion(version, cloud), scheduledAt, pin, downgrade);
            targets.put(cloud, newTarget);
            curator().writeOsVersionTargets(new TreeSet<>(targets.values()));
            LOG.info("Triggered OS " + (downgrade ? "downgrade" : "upgrade") + " to " + version.toFullString() +
                     " in cloud " + cloud.value());
        }
    }

    /** Clear the target OS version for given cloud in this system */
    public void cancelUpgrade(CloudName cloudName) {
        try (Mutex lock = curator().lockOsVersions()) {
            Map<CloudName, OsVersionTarget> targets = curator().readOsVersionTargets().stream()
                                                               .collect(Collectors.toMap(t -> t.osVersion().cloud(),
                                                                                          Function.identity()));
            if (targets.remove(cloudName) == null) {
                throw new IllegalArgumentException("Cloud '" + cloudName.value() + " has no OS upgrade target");
            }
            curator().writeOsVersionTargets(new TreeSet<>(targets.values()));
        }
    }

    /** Returns the current OS version status */
    public OsVersionStatus status() {
        return curator().readOsVersionStatus();
    }

    /** Replace the current OS version status with a new one */
    public void updateStatus(OsVersionStatus newStatus) {
        try (Mutex lock = curator().lockOsVersionStatus()) {
            OsVersionStatus currentStatus = curator().readOsVersionStatus();
            for (CloudName cloud : controller.clouds()) {
                Set<Version> newVersions = newStatus.versionsIn(cloud);
                if (currentStatus.versionsIn(cloud).size() > 1 && newVersions.size() == 1) {
                    LOG.info("All nodes in " + cloud + " cloud upgraded to OS version " +
                             newVersions.iterator().next().toFullString());
                }
            }
            curator().writeOsVersionStatus(newStatus);
        }
    }

    /** Certify an OS version as compatible with given Vespa version */
    public CertifiedOsVersion certify(Version version, CloudName cloud, Version vespaVersion) {
        requireNonEmpty(version);
        requireNonEmpty(vespaVersion);
        requireCloud(cloud);
        try (Mutex lock = curator().lockCertifiedOsVersions()) {
            OsVersion osVersion = new OsVersion(version, cloud);
            Set<CertifiedOsVersion> certifiedVersions = readCertified();
            Optional<CertifiedOsVersion> matching = certifiedVersions.stream()
                                                                     .filter(cv -> cv.osVersion().equals(osVersion))
                                                                     .findFirst();
            if (matching.isPresent()) {
                return matching.get();
            }
            certifiedVersions = new HashSet<>(certifiedVersions);
            certifiedVersions.add(new CertifiedOsVersion(osVersion, vespaVersion));
            curator().writeCertifiedOsVersions(certifiedVersions);
            return new CertifiedOsVersion(osVersion, vespaVersion);
        }
    }

    /** Revoke certification of an OS version */
    public void uncertify(Version version, CloudName cloud) {
        try (Mutex lock = curator().lockCertifiedOsVersions()) {
            OsVersion osVersion = new OsVersion(version, cloud);
            Set<CertifiedOsVersion> certifiedVersions = readCertified();
            Optional<CertifiedOsVersion> existing = certifiedVersions.stream()
                                                                     .filter(cv -> cv.osVersion().equals(osVersion))
                                                                     .findFirst();
            if (existing.isEmpty()) {
                throw new IllegalArgumentException(osVersion + " is not certified");
            }
            certifiedVersions = new HashSet<>(certifiedVersions);
            certifiedVersions.remove(existing.get());
            curator().writeCertifiedOsVersions(certifiedVersions);
        }
    }

    /** Remove certifications for non-existent OS versions */
    public void removeStaleCertifications(OsVersionStatus currentStatus) {
        try (Mutex lock = curator().lockCertifiedOsVersions()) {
            Map<CloudName, Version> oldestVersionByCloud = currentStatus.versions().keySet().stream()
                                                                        .filter(v -> !v.version().isEmpty())
                                                                        .collect(Collectors.toMap(OsVersion::cloud,
                                                                                                  OsVersion::version,
                                                                                                  BinaryOperator.minBy(Comparator.naturalOrder())));
            if (oldestVersionByCloud.isEmpty()) return;

            Set<CertifiedOsVersion> certifiedVersions = new HashSet<>(readCertified());
            boolean modified = certifiedVersions.removeIf(certifiedVersion -> {
                Version oldestVersion = oldestVersionByCloud.get(certifiedVersion.osVersion().cloud());
                return oldestVersion == null || certifiedVersion.osVersion().version().isBefore(oldestVersion);
            });
            if (modified) {
                curator().writeCertifiedOsVersions(certifiedVersions);
            }
        }
    }

    /** Returns whether given OS version is certified as compatible with the current system version */
    public boolean certified(OsVersion osVersion) {
        if (controller.system().isCd()) return true; // Always certified (this is the system doing the certifying)

        Version systemVersion = controller.readSystemVersion();
        return readCertified().stream()
                              .anyMatch(certifiedOsVersion -> certifiedOsVersion.osVersion().equals(osVersion) &&
                                                              // A later system version is fine, as we don't guarantee that
                                                              // an OS upgrade will always coincide with a Vespa release
                                                              !certifiedOsVersion.vespaVersion().isAfter(systemVersion));
    }

    /** Returns all certified versions */
    public Set<CertifiedOsVersion> readCertified() {
        return controller.curator().readCertifiedOsVersions();
    }

    private void requireCloud(CloudName cloud) {
        if (!controller.clouds().contains(cloud)) {
            throw new IllegalArgumentException("Cloud '" + cloud + "' does not exist in this system");
        }
    }

    private void requireNonEmpty(Version version) {
        if (version.isEmpty()) {
            throw new IllegalArgumentException("Invalid version '" + version.toFullString() + "'");
        }
    }

    private CuratorDb curator() {
        return controller.curator();
    }

}