aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java
blob: b03098bf18f24dbe9bfe8e5e9135631abb21a5c0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.versions;

import com.yahoo.component.Version;
import com.yahoo.config.provision.SystemName;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.application.ApplicationList;
import com.yahoo.vespa.hosted.controller.application.InstanceList;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;

import java.time.Instant;
import java.time.ZoneOffset;
import java.util.List;

import static com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy;

/**
 * Information about a particular Vespa version.
 *
 * Vespa versions are identified by their version number and ordered by increasing version numbers.
 * 
 * @author bratseth
 */
public record VespaVersion(Version version,
                           String releaseCommit,
                           Instant committedAt,
                           boolean isControllerVersion,
                           boolean isSystemVersion,
                           boolean isReleased,
                           List<NodeVersion> nodeVersions,
                           Confidence confidence) implements Comparable<VespaVersion> {

    public static Confidence confidenceFrom(DeploymentStatistics statistics, Controller controller, VersionStatus versionStatus) {
        int thisMajorVersion = statistics.version().getMajor();
        InstanceList all = InstanceList.from(controller.jobController().deploymentStatuses(ApplicationList.from(controller.applications().asList())
                                                                                                          .withProductionDeployment()))
                                       .allowingMajorVersion(thisMajorVersion, versionStatus);
        // 'production on this': All production deployment jobs upgrading to this version have completed without failure
        InstanceList productionOnThis = all.matching(instance -> statistics.productionSuccesses().stream().anyMatch(run -> run.id().application().equals(instance)))
                                           .not().failingUpgrade()
                                           .not().upgradingTo(statistics.version());
        InstanceList failingOnThis = all.matching(instance -> statistics.failingUpgrades().stream().anyMatch(run -> run.id().application().equals(instance)));

        // 'broken' if any canary fails, and no non-canary is upgraded
        if  ( ! failingOnThis.with(UpgradePolicy.canary).isEmpty() && productionOnThis.not().with(UpgradePolicy.canary).isEmpty())
            return Confidence.broken;

        // 'broken' if 6 non-canary was broken by this, and that is at least 5% of all
        if (nonCanaryApplicationsBroken(statistics.version(), failingOnThis, productionOnThis))
            return Confidence.broken;

        // 'low' unless all unpinned canary applications are upgraded
        if (productionOnThis.with(UpgradePolicy.canary).unpinned().size() < all.withProductionDeployment().with(UpgradePolicy.canary).unpinned().size())
            return Confidence.low;

        // 'high' if 90% of all unpinned default upgrade applications upgraded
        if (productionOnThis.with(UpgradePolicy.defaultPolicy).unpinned().groupingBy(TenantAndApplicationId::from).size() >=
            all.withProductionDeployment().with(UpgradePolicy.defaultPolicy).unpinned().groupingBy(TenantAndApplicationId::from).size() * 0.9)
            return Confidence.high;

        return Confidence.normal;
    }

    /** Returns the version number of this Vespa version */
    public Version versionNumber() { return version; }

    /** Returns the sha of the release tag commit for this version in git */
    public String releaseCommit() { return releaseCommit; }
    
    /** Returns the time of the release commit */
    public Instant committedAt() { return committedAt; }
    
    /** Returns whether this is the current version of controllers in this system (the lowest version across all
     * controllers) */
    public boolean isControllerVersion() {
        return isControllerVersion;
    }

    /**
     * Returns whether this is the current version of the infrastructure of the system
     * (i.e the lowest version across all controllers and all config servers in all zones).
     * A goal of the controllers is to eventually (limited by safety and upgrade capacity) drive
     * all applications to this version.
     * 
     * Note that the self version may be higher than the current system version if
     * all config servers are not yet upgraded to the version of the controllers.
     */
    public boolean isSystemVersion() { return isSystemVersion; }

    /** Returns whether the artifacts of this release are available in the configured maven repository. */
    public boolean isReleased() { return isReleased; }

    /** Returns the versions of nodes allocated to system applications (across all zones) */
    public List<NodeVersion> nodeVersions() {
        return nodeVersions;
    }

    /** Returns the confidence we have in this versions suitability for production */
    public Confidence confidence() { return confidence; }

    @Override
    public int compareTo(VespaVersion other) {
        return this.versionNumber().compareTo(other.versionNumber());
    }
    
    @Override
    public int hashCode() { return versionNumber().hashCode(); }
    
    @Override
    public boolean equals(Object other) {
        if (other == this) return true;
        if ( ! (other instanceof VespaVersion)) return false;
        return ((VespaVersion)other).versionNumber().equals(this.versionNumber());
    }

    /** The confidence of a version. */
    public enum Confidence {

        /** Rollout was aborted. The system infrastructure should stay on, or roll back to, its current version */
        aborted,

        /** This version has been proven defective */
        broken,
        
        /** We don't have sufficient evidence that this version is working */
        low,

        /** We have sufficient evidence that this version is working */
        normal,

        /** This version works, but we want users to stop using it */
        legacy,

        /** We have overwhelming evidence that this version is working */
        high;
        
        /** Returns true if this confidence is at least as high as the given confidence */
        public boolean equalOrHigherThan(Confidence other) {
            return this.compareTo(other) >= 0;
        }

        /** Returns true if this can be changed to target at given instant */
        public boolean canChangeTo(Confidence target, SystemName system, Instant instant) {
            if (this.equalOrHigherThan(normal)) return true; // Confidence can always change from >= normal
            if (!target.equalOrHigherThan(normal)) return true; // Confidence can always change to < normal

            var hourOfDay = instant.atZone(ZoneOffset.UTC).getHour();
            var dayOfWeek = instant.atZone(ZoneOffset.UTC).getDayOfWeek();
            var hourEnd = system == SystemName.Public ? 13 : 11;
            // Confidence can only be raised between 05:00:00 and 11:59:59Z (13:59:59Z for public), and not during weekends or Friday.
            return    hourOfDay >= 5 && hourOfDay <= hourEnd
                   && dayOfWeek.getValue() < 5;
        }

    }

    private static boolean nonCanaryApplicationsBroken(Version version,
                                                       InstanceList failingOnThis,
                                                       InstanceList productionOnThis) {
        int failingNonCanaries = failingOnThis.startedFailingOn(version)
                                              .not().with(UpgradePolicy.canary)
                                              .groupingBy(TenantAndApplicationId::from).size();
        int productionNonCanaries = productionOnThis.not().with(UpgradePolicy.canary)
                                                    .groupingBy(TenantAndApplicationId::from).size();

        if (productionNonCanaries + failingNonCanaries == 0) return false;

        // 'broken' if 6 non-canary was broken by this, and that is at least 5% of all
        return failingNonCanaries >= 6 && failingNonCanaries >= productionOnThis.groupingBy(TenantAndApplicationId::from).size() * 0.05;
     }

}