summaryrefslogtreecommitdiffstats
path: root/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
blob: a259ed2fdefdac24e0bfec2715e0a390974f5c17 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;

import com.yahoo.component.AbstractComponent;
import com.yahoo.component.annotation.Inject;
import com.yahoo.concurrent.maintenance.Maintainer;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.integration.athenz.AthenzClientFactory;
import com.yahoo.vespa.hosted.controller.api.integration.user.UserManagement;

import java.time.Duration;
import java.time.temporal.TemporalUnit;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.stream.Collectors;

import static java.time.temporal.ChronoUnit.HOURS;
import static java.time.temporal.ChronoUnit.MINUTES;
import static java.time.temporal.ChronoUnit.SECONDS;

/**
 * Maintenance jobs of the controller.
 * Each maintenance job is a singleton instance of its implementing class, created and owned by this,
 * and running its own dedicated thread.
 *
 * @author bratseth
 */
public class ControllerMaintenance extends AbstractComponent {

    private final Upgrader upgrader;
    private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>();

    @Inject
    @SuppressWarnings("unused") // instantiated by Dependency Injection
    public ControllerMaintenance(Controller controller, Metric metric, UserManagement userManagement, AthenzClientFactory athenzClientFactory) {
        Intervals intervals = new Intervals(controller.system());
        upgrader = new Upgrader(controller, intervals.defaultInterval);
        maintainers.add(upgrader);
        maintainers.addAll(osUpgraders(controller, intervals.osUpgrader));
        maintainers.add(new DeploymentExpirer(controller, intervals.defaultInterval));
        maintainers.add(new DeploymentUpgrader(controller, intervals.defaultInterval));
        maintainers.add(new DeploymentIssueReporter(controller, controller.serviceRegistry().deploymentIssues(), intervals.defaultInterval));
        maintainers.add(new MetricsReporter(controller, metric, athenzClientFactory.createZmsClient()));
        maintainers.add(new OutstandingChangeDeployer(controller, intervals.outstandingChangeDeployer));
        maintainers.add(new VersionStatusUpdater(controller, intervals.versionStatusUpdater));
        maintainers.add(new ReadyJobsTrigger(controller, intervals.readyJobsTrigger));
        maintainers.add(new DeploymentMetricsMaintainer(controller, intervals.deploymentMetricsMaintainer));
        maintainers.add(new ApplicationOwnershipConfirmer(controller, intervals.applicationOwnershipConfirmer, controller.serviceRegistry().ownershipIssues()));
        maintainers.add(new SystemUpgrader(controller, intervals.systemUpgrader));
        maintainers.add(new JobRunner(controller, intervals.jobRunner));
        maintainers.add(new OsVersionStatusUpdater(controller, intervals.osVersionStatusUpdater));
        maintainers.add(new OsUpgradeScheduler(controller, intervals.osUpgradeScheduler));
        maintainers.add(new ContactInformationMaintainer(controller, intervals.contactInformationMaintainer));
        maintainers.add(new NameServiceDispatcher(controller, intervals.nameServiceDispatcher));
        maintainers.add(new CostReportMaintainer(controller, intervals.costReportMaintainer, controller.serviceRegistry().costReportConsumer()));
        maintainers.add(new ResourceMeterMaintainer(controller, intervals.resourceMeterMaintainer, metric, controller.serviceRegistry().resourceDatabase()));
        maintainers.add(new ResourceTagMaintainer(controller, intervals.resourceTagMaintainer, controller.serviceRegistry().resourceTagger()));
        maintainers.add(new ApplicationMetaDataGarbageCollector(controller, intervals.applicationMetaDataGarbageCollector));
        maintainers.add(new ArtifactExpirer(controller, intervals.containerImageExpirer));
        maintainers.add(new HostInfoUpdater(controller, intervals.hostInfoUpdater));
        maintainers.add(new ReindexingTriggerer(controller, intervals.reindexingTriggerer));
        maintainers.add(new EndpointCertificateMaintainer(controller, intervals.endpointCertificateMaintainer));
        maintainers.add(new TrafficShareUpdater(controller, intervals.trafficFractionUpdater));
        maintainers.add(new ArchiveUriUpdater(controller, intervals.archiveUriUpdater));
        maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer));
        maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer));
        maintainers.add(new ChangeRequestMaintainer(controller, intervals.changeRequestMaintainer));
        maintainers.add(new VcmrMaintainer(controller, intervals.vcmrMaintainer, metric));
        maintainers.add(new CloudTrialExpirer(controller, intervals.defaultInterval));
        maintainers.add(new RetriggerMaintainer(controller, intervals.retriggerMaintainer));
        maintainers.add(new UserManagementMaintainer(controller, intervals.userManagementMaintainer, controller.serviceRegistry().roleMaintainer()));
        maintainers.add(new BillingDatabaseMaintainer(controller, intervals.billingDatabaseMaintainer));
        maintainers.add(new MeteringMonitorMaintainer(controller, intervals.meteringMonitorMaintainer, controller.serviceRegistry().resourceDatabase(), metric));
    }

    public Upgrader upgrader() { return upgrader; }

    @Override
    public void deconstruct() {
        maintainers.forEach(Maintainer::shutdown);
        maintainers.forEach(Maintainer::awaitShutdown);
    }

    /** Create one OS upgrader per cloud found in the zone registry of controller */
    private static List<OsUpgrader> osUpgraders(Controller controller, Duration interval) {
        return controller.zoneRegistry().zones().controllerUpgraded().zones().stream()
                         .map(ZoneApi::getCloudName)
                         .distinct()
                         .sorted()
                         .map(cloud -> new OsUpgrader(controller, interval, cloud))
                         .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
    }

    private static class Intervals {

        private static final Duration MAX_CD_INTERVAL = Duration.ofHours(1);

        private final SystemName system;

        private final Duration defaultInterval;
        private final Duration outstandingChangeDeployer;
        private final Duration versionStatusUpdater;
        private final Duration readyJobsTrigger;
        private final Duration deploymentMetricsMaintainer;
        private final Duration applicationOwnershipConfirmer;
        private final Duration systemUpgrader;
        private final Duration jobRunner;
        private final Duration osVersionStatusUpdater;
        private final Duration osUpgrader;
        private final Duration osUpgradeScheduler;
        private final Duration contactInformationMaintainer;
        private final Duration nameServiceDispatcher;
        private final Duration costReportMaintainer;
        private final Duration resourceMeterMaintainer;
        private final Duration resourceTagMaintainer;
        private final Duration applicationMetaDataGarbageCollector;
        private final Duration containerImageExpirer;
        private final Duration hostInfoUpdater;
        private final Duration reindexingTriggerer;
        private final Duration endpointCertificateMaintainer;
        private final Duration trafficFractionUpdater;
        private final Duration archiveUriUpdater;
        private final Duration archiveAccessMaintainer;
        private final Duration tenantRoleMaintainer;
        private final Duration changeRequestMaintainer;
        private final Duration vcmrMaintainer;
        private final Duration retriggerMaintainer;
        private final Duration userManagementMaintainer;
        private final Duration billingDatabaseMaintainer;
        private final Duration meteringMonitorMaintainer;

        public Intervals(SystemName system) {
            this.system = Objects.requireNonNull(system);
            this.defaultInterval = duration(system.isCd() ? 1 : 5, MINUTES);
            this.outstandingChangeDeployer = duration(3, MINUTES);
            this.versionStatusUpdater = duration(3, MINUTES);
            this.readyJobsTrigger = duration(1, MINUTES);
            this.deploymentMetricsMaintainer = duration(10, MINUTES);
            this.applicationOwnershipConfirmer = duration(3, HOURS);
            this.systemUpgrader = duration(2, MINUTES);
            this.jobRunner = duration(system.isCd() ? 45 : 90, SECONDS);
            this.osVersionStatusUpdater = duration(2, MINUTES);
            this.osUpgrader = duration(1, MINUTES);
            this.osUpgradeScheduler = duration(3, HOURS);
            this.contactInformationMaintainer = duration(12, HOURS);
            this.nameServiceDispatcher = duration(10, SECONDS);
            this.costReportMaintainer = duration(2, HOURS);
            this.resourceMeterMaintainer = duration(3, MINUTES);
            this.resourceTagMaintainer = duration(30, MINUTES);
            this.applicationMetaDataGarbageCollector = duration(12, HOURS);
            this.containerImageExpirer = duration(12, HOURS);
            this.hostInfoUpdater = duration(12, HOURS);
            this.reindexingTriggerer = duration(1, HOURS);
            this.endpointCertificateMaintainer = duration(1, HOURS);
            this.trafficFractionUpdater = duration(5, MINUTES);
            this.archiveUriUpdater = duration(5, MINUTES);
            this.archiveAccessMaintainer = duration(10, MINUTES);
            this.tenantRoleMaintainer = duration(5, MINUTES);
            this.changeRequestMaintainer = duration(1, HOURS);
            this.vcmrMaintainer = duration(1, HOURS);
            this.retriggerMaintainer = duration(1, MINUTES);
            this.userManagementMaintainer = duration(12, HOURS);
            this.billingDatabaseMaintainer = duration(5, MINUTES);
            this.meteringMonitorMaintainer = duration(30, MINUTES);
        }

        private Duration duration(long amount, TemporalUnit unit) {
            Duration duration = Duration.of(amount, unit);
            if (system.isCd() && duration.compareTo(MAX_CD_INTERVAL) > 0) {
                return MAX_CD_INTERVAL; // Ensure that maintainer is given enough time to run in CD
            }
            return duration;
        }

    }

}