aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java
blob: d998413e675d7d9d3c227bf78465b795079c916c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;

import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.ApplicationController;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.Instance;
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.integration.organization.AccountId;
import com.yahoo.vespa.hosted.controller.api.integration.organization.ApplicationSummary;
import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId;
import com.yahoo.vespa.hosted.controller.api.integration.organization.OwnershipIssues;
import com.yahoo.vespa.hosted.controller.api.integration.organization.User;
import com.yahoo.vespa.hosted.controller.application.ApplicationList;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
import com.yahoo.vespa.hosted.controller.tenant.Tenant;
import com.yahoo.yolean.Exceptions;

import java.time.Duration;
import java.util.HashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;

/**
 * Periodically request application ownership confirmation through filing issues.
 *
 * When to file new issues, escalate inactive ones, etc., is handled by the enclosed OwnershipIssues.
 *
 * @author jonmv
 */
public class ApplicationOwnershipConfirmer extends ControllerMaintainer {

    private final OwnershipIssues ownershipIssues;
    private final ApplicationController applications;
    private final int shards;

    public ApplicationOwnershipConfirmer(Controller controller, Duration interval, OwnershipIssues ownershipIssues) {
        this(controller, interval, ownershipIssues, 24);
    }

    public ApplicationOwnershipConfirmer(Controller controller, Duration interval, OwnershipIssues ownershipIssues, int shards) {
        super(controller, interval);
        this.ownershipIssues = ownershipIssues;
        this.applications = controller.applications();
        if (shards <= 0) throw new IllegalArgumentException("shards must be a positive number, but got " + shards);
        this.shards = shards;
    }

    @Override
    protected double maintain() {
        return ( confirmApplicationOwnerships() +
                 ensureConfirmationResponses() +
                 updateConfirmedApplicationOwners() )
                / 3;
    }

    /** File an ownership issue with the owners of all applications we know about. */
    private double confirmApplicationOwnerships() {
        AtomicInteger attempts = new AtomicInteger(0);
        AtomicInteger failures = new AtomicInteger(0);
        applications()
                       .withProjectId()
                       .withProductionDeployment()
                       .asList()
                       .stream()
                       .filter(application -> application.createdAt().isBefore(controller().clock().instant().minus(Duration.ofDays(90))))
                       .filter(application -> isInCurrentShard(application.id()))
                       .forEach(application -> {
                           try {
                               attempts.incrementAndGet();
                               tenantOf(application.id()).contact().flatMap(contact -> {
                                   return ownershipIssues.confirmOwnership(application.ownershipIssueId(),
                                                                           summaryOf(application.id()),
                                                                           application.issueOwner().orElse(null),
                                                                           application.userOwner().orElse(null),
                                                                           contact);
                               }).ifPresent(newIssueId -> store(newIssueId, application.id()));
                           }
                           catch (RuntimeException e) { // Catch errors due to wrong data in the controller, or issues client timeout.
                               failures.incrementAndGet();
                               log.log(Level.INFO, "Exception caught when attempting to file an issue for '" + application.id() + "': " + Exceptions.toMessageString(e));
                           }
                       });
        return asSuccessFactorDeviation(attempts.get(), failures.get());
    }

    private boolean isInCurrentShard(TenantAndApplicationId id) {
        double participants = Math.max(1, controller().curator().cluster().size());
        long ticksSinceEpoch = Math.round((controller().clock().millis() * participants / interval().toMillis()));
        return (ticksSinceEpoch + id.hashCode()) % shards == 0;
    }

    private ApplicationSummary summaryOf(TenantAndApplicationId application) {
        var app = applications.requireApplication(application);
        var metrics = new HashMap<DeploymentId, ApplicationSummary.Metric>();
        for (Instance instance : app.instances().values()) {
            for (var kv : instance.deployments().entrySet()) {
                var zone = kv.getKey();
                var deploymentMetrics = kv.getValue().metrics();
                metrics.put(new DeploymentId(instance.id(), zone),
                            new ApplicationSummary.Metric(deploymentMetrics.documentCount(),
                                                          deploymentMetrics.queriesPerSecond(),
                                                          deploymentMetrics.writesPerSecond()));
            }
        }
        return new ApplicationSummary(app.id().defaultInstance(), app.activity().lastQueried(), app.activity().lastWritten(),
                                      app.revisions().last().flatMap(version -> version.buildTime()), metrics);
    }

    /** Escalate ownership issues which have not been closed before a defined amount of time has passed. */
    private double ensureConfirmationResponses() {
        AtomicInteger attempts = new AtomicInteger(0);
        AtomicInteger failures = new AtomicInteger(0);
        for (Application application : applications())
            if (isInCurrentShard(application.id()))
                application.ownershipIssueId().ifPresent(issueId -> {
                    try {
                        attempts.incrementAndGet();
                        Tenant tenant = tenantOf(application.id());
                        ownershipIssues.ensureResponse(issueId, tenant.contact());
                    }
                    catch (RuntimeException e) {
                        failures.incrementAndGet();
                        log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e));
                    }
                });
        return asSuccessFactorDeviation(attempts.get(), failures.get());
    }

    private double updateConfirmedApplicationOwners() {
        AtomicInteger attempts = new AtomicInteger(0);
        AtomicInteger failures = new AtomicInteger(0);
        applications()
                .withProjectId()
                .withProductionDeployment()
                .asList()
                .stream()
                .filter(application -> isInCurrentShard(application.id()))
                .filter(application -> application.ownershipIssueId().isPresent())
                .forEach(application -> {
                    attempts.incrementAndGet();
                    IssueId issueId = application.ownershipIssueId().get();
                    try {
                        ownershipIssues.getConfirmedOwner(issueId).ifPresent(owner -> {
                            controller().applications().lockApplicationIfPresent(application.id(), lockedApplication ->
                                    controller().applications().store(lockedApplication.withOwner(owner)));
                        });
                    }
                    catch (RuntimeException e) {
                        failures.incrementAndGet();
                        log.log(Level.INFO, "Exception caught when attempting to find confirmed owner of issue with id '" + issueId + "': " + Exceptions.toMessageString(e));
                    }
                });
        return asSuccessFactorDeviation(attempts.get(), failures.get());
    }

    private ApplicationList applications() {
        return ApplicationList.from(controller().applications().readable());
    }

    private AccountId determineAssignee(Application application) {
        return application.issueOwner().orElse(null);
    }

    private User determineLegacyAssignee(Application application) {
        return application.userOwner().orElse(null);
    }

    private Tenant tenantOf(TenantAndApplicationId applicationId) {
        return controller().tenants().get(applicationId.tenant())
                .orElseThrow(() -> new IllegalStateException("No tenant found for application " + applicationId));
    }

    protected void store(IssueId issueId, TenantAndApplicationId applicationId) {
        controller().applications().lockApplicationIfPresent(applicationId, application ->
                controller().applications().store(application.withOwnershipIssueId(issueId)));
    }
}