aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
blob: b1ea4584497673351b43cd734bb7cfa818a46c02 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;

import com.yahoo.component.Version;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.zone.NodeSlice;
import com.yahoo.config.provision.zone.UpgradePolicy;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.text.Text;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter;
import com.yahoo.vespa.hosted.controller.application.SystemApplication;
import com.yahoo.vespa.hosted.controller.versions.VersionTarget;
import com.yahoo.yolean.Exceptions;

import java.time.Duration;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.logging.Logger;
import java.util.stream.Collectors;

/**
 * Base class for maintainers that upgrade zone infrastructure.
 *
 * @author mpolden
 */
public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> extends ControllerMaintainer {

    private static final Logger log = Logger.getLogger(InfrastructureUpgrader.class.getName());

    protected final UpgradePolicy upgradePolicy;
    private final List<SystemApplication> managedApplications;

    public InfrastructureUpgrader(Controller controller, Duration interval, UpgradePolicy upgradePolicy,
                                  List<SystemApplication> managedApplications, String name) {
        super(controller, interval, name, EnumSet.allOf(SystemName.class));
        this.upgradePolicy = upgradePolicy;
        this.managedApplications = List.copyOf(Objects.requireNonNull(managedApplications));
    }

    @Override
    protected double maintain() {
        return target().map(target -> upgradeAll(target, managedApplications))
                       .orElse(0.0);
    }

    /** Deploy a list of system applications until they converge on the given version */
    private double upgradeAll(TARGET target, List<SystemApplication> applications) {
        int attempts = 0;
        int failures = 0;
        // Invert zone order if we're downgrading
        UpgradePolicy policy = target.downgrade() ? upgradePolicy.inverted() : upgradePolicy;
        for (UpgradePolicy.Step step : policy.steps()) {
            boolean converged = true;
            for (ZoneApi zone : step.zones()) {
                try {
                    attempts++;
                    converged &= upgradeAll(target, applications, zone, step.nodeSlice());
                } catch (UnreachableNodeRepositoryException e) {
                    failures++;
                    converged = false;
                    log.warning(Text.format("%s: Failed to communicate with node repository in %s, continuing with next parallel zone: %s",
                                            this, zone, Exceptions.toMessageString(e)));
                } catch (Exception e) {
                    failures++;
                    converged = false;
                    log.warning(Text.format("%s: Failed to upgrade zone: %s, continuing with next parallel zone: %s",
                                            this, zone, Exceptions.toMessageString(e)));
                }
            }
            if (!converged) {
                break;
            }
        }
        return asSuccessFactorDeviation(attempts, failures);
    }

    /** Returns whether all applications have converged to the target version in zone */
    private boolean upgradeAll(TARGET target, List<SystemApplication> applications, ZoneApi zone, NodeSlice nodeSlice) {
        Map<SystemApplication, Set<SystemApplication>> dependenciesByApplication = new HashMap<>();
        if (target.downgrade()) { // Invert dependencies when we're downgrading
            for (var application : applications) {
                dependenciesByApplication.computeIfAbsent(application, k -> new HashSet<>());
                for (var dependency : application.dependencies()) {
                    dependenciesByApplication.computeIfAbsent(dependency, k -> new HashSet<>())
                                             .add(application);
                }
            }
        } else {
            applications.forEach(app -> dependenciesByApplication.put(app, Set.copyOf(app.dependencies())));
        }
        boolean converged = true;
        for (var kv : dependenciesByApplication.entrySet()) {
            SystemApplication application = kv.getKey();
            Set<SystemApplication> dependencies = kv.getValue();
            boolean allConverged = dependencies.stream().allMatch(app -> convergedOn(target, app, zone, nodeSlice));
            if (allConverged) {
                if (changeTargetTo(target, application, zone)) {
                    upgrade(target, application, zone);
                }
                converged &= convergedOn(target, application, zone, nodeSlice);
            }
        }
        return converged;
    }

    /** Returns whether target version for application in zone should be changed */
    protected abstract boolean changeTargetTo(TARGET target, SystemApplication application, ZoneApi zone);

    /** Upgrade component to target version. Implementation should be idempotent */
    protected abstract void upgrade(TARGET target, SystemApplication application, ZoneApi zone);

    /** Returns whether application has converged to target version in zone */
    protected abstract boolean convergedOn(TARGET target, SystemApplication application, ZoneApi zone, NodeSlice nodeSlice);

    /** Returns the version target for the component upgraded by this, if any */
    protected abstract Optional<TARGET> target();

    /** Returns whether the upgrader should expect given node to upgrade */
    protected abstract boolean expectUpgradeOf(Node node, SystemApplication application, ZoneApi zone);

    /**
     * Find the version currently used by a slice of nodes, in given zone. If no such slice exists,
     * the lowest (or highest, when downgrading) overall version is returned.
     */
    protected final Optional<Version> versionOf(NodeSlice nodeSlice, ZoneApi zone, SystemApplication application,
                                                Function<Node, Version> versionField, boolean downgrading) {
        try {
            Map<Version, Long> nodeCountByVersion = controller().serviceRegistry().configServer()
                                                                .nodeRepository()
                                                                .list(zone.getVirtualId(), NodeFilter.all().applications(application.id()))
                                                                .stream()
                                                                .filter(node -> expectUpgradeOf(node, application, zone))
                                                                .collect(Collectors.groupingBy(versionField,
                                                                                               Collectors.counting()));
            long totalNodes = nodeCountByVersion.values().stream().reduce(Long::sum).orElse(0L);
            Set<Version> versionsOfMatchingSlices = new HashSet<>();
            for (var kv : nodeCountByVersion.entrySet()) {
                long nodesOnVersion = kv.getValue();
                if (nodeSlice.satisfiedBy(nodesOnVersion, totalNodes)) {
                    versionsOfMatchingSlices.add(kv.getKey());
                }
            }
            if (!versionsOfMatchingSlices.isEmpty()) {
                return downgrading
                        ? versionsOfMatchingSlices.stream().min(Comparator.naturalOrder())
                        : versionsOfMatchingSlices.stream().max(Comparator.naturalOrder());
            }
            return downgrading
                    ? nodeCountByVersion.keySet().stream().max(Comparator.naturalOrder())
                    : nodeCountByVersion.keySet().stream().min(Comparator.naturalOrder());
        } catch (Exception e) {
            throw new UnreachableNodeRepositoryException(Text.format("Failed to get version for %s in %s: %s",
                                                                     application.id(), zone,
                                                                     Exceptions.toMessageString(e)));
        }
    }

    private static class UnreachableNodeRepositoryException extends RuntimeException {
        private UnreachableNodeRepositoryException(String reason) {
            super(reason);
        }
    }

}