summaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
blob: a4a4c42d9c1480c98c4ac202c8717ef3b748a2de (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision;

import com.yahoo.component.AbstractComponent;
import com.yahoo.component.annotation.Inject;
import com.yahoo.concurrent.maintenance.JobControl;
import com.yahoo.config.provision.ApplicationTransaction;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.EndpointsChecker.HealthChecker;
import com.yahoo.config.provision.EndpointsChecker.HealthCheckerProvider;
import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.Zone;
import com.yahoo.config.provisioning.NodeRepositoryConfig;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.JacksonFlag;
import com.yahoo.vespa.flags.PermanentFlags;
import com.yahoo.vespa.flags.custom.SharedHost;
import com.yahoo.vespa.hosted.provision.Node.State;
import com.yahoo.vespa.hosted.provision.applications.Applications;
import com.yahoo.vespa.hosted.provision.archive.ArchiveUriManager;
import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancers;
import com.yahoo.vespa.hosted.provision.maintenance.InfrastructureVersions;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.NodeAcl;
import com.yahoo.vespa.hosted.provision.node.Nodes;
import com.yahoo.vespa.hosted.provision.os.OsVersions;
import com.yahoo.vespa.hosted.provision.persistence.CuratorDb;
import com.yahoo.vespa.hosted.provision.persistence.DnsNameResolver;
import com.yahoo.vespa.hosted.provision.persistence.JobControlFlags;
import com.yahoo.vespa.hosted.provision.persistence.NameResolver;
import com.yahoo.vespa.hosted.provision.provisioning.ContainerImages;
import com.yahoo.vespa.hosted.provision.provisioning.FirmwareChecks;
import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceLimits;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider.ProtoHealthChecker;
import com.yahoo.vespa.orchestrator.Orchestrator;

import java.time.Clock;
import java.util.List;
import java.util.Optional;

/**
 * The top level singleton in the node repo, providing access to all its state as child objects.
 *
 * @author bratseth
 */
public class NodeRepository extends AbstractComponent implements HealthCheckerProvider {

    private final CuratorDb db;
    private final Clock clock;
    private final Zone zone;
    private final Nodes nodes;
    private final NodeFlavors flavors;
    private final HostResourcesCalculator resourcesCalculator;
    private final NodeResourceLimits nodeResourceLimits;
    private final NameResolver nameResolver;
    private final OsVersions osVersions;
    private final InfrastructureVersions infrastructureVersions;
    private final FirmwareChecks firmwareChecks;
    private final ContainerImages containerImages;
    private final ArchiveUriManager archiveUriManager;
    private final JobControl jobControl;
    private final Applications applications;
    private final LoadBalancers loadBalancers;
    private final FlagSource flagSource;
    private final MetricsDb metricsDb;
    private final Orchestrator orchestrator;
    private final int spareCount;
    private final ProtoHealthChecker healthChecker;
    private final JacksonFlag<SharedHost> sharedHosts;

    /**
     * Creates a node repository from a zookeeper provider.
     * This will use the system time to make time-sensitive decisions
     */
    @Inject
    public NodeRepository(NodeRepositoryConfig config,
                          NodeFlavors flavors,
                          ProvisionServiceProvider provisionServiceProvider,
                          Curator curator,
                          Zone zone,
                          FlagSource flagSource,
                          MetricsDb metricsDb,
                          Orchestrator orchestrator) {
        this(flavors,
             provisionServiceProvider,
             curator,
             Clock.systemUTC(),
             zone,
             new DnsNameResolver(),
             DockerImage.fromString(config.containerImage()),
             optionalImage(config.tenantContainerImage()),
             optionalImage(config.tenantGpuContainerImage()),
             flagSource,
             metricsDb,
             orchestrator,
             config.useCuratorClientCache(),
             zone.environment().isProduction() && !zone.cloud().dynamicProvisioning() && !zone.system().isCd() ? 1 : 0);
    }

    /**
     * Creates a node repository from a zookeeper provider and a clock instance
     * which will be used for time-sensitive decisions.
     */
    public NodeRepository(NodeFlavors flavors,
                          ProvisionServiceProvider provisionServiceProvider,
                          Curator curator,
                          Clock clock,
                          Zone zone,
                          NameResolver nameResolver,
                          DockerImage containerImage,
                          Optional<DockerImage> tenantContainerImage,
                          Optional<DockerImage> tenantGpuContainerImage,
                          FlagSource flagSource,
                          MetricsDb metricsDb,
                          Orchestrator orchestrator,
                          boolean useCuratorClientCache,
                          int spareCount) {
        if (provisionServiceProvider.getHostProvisioner().isPresent() != zone.cloud().dynamicProvisioning())
            throw new IllegalArgumentException(String.format(
                    "dynamicProvisioning property must be 1-to-1 with availability of HostProvisioner, was: dynamicProvisioning=%s, hostProvisioner=%s",
                    zone.cloud().dynamicProvisioning(), provisionServiceProvider.getHostProvisioner().map(__ -> "present").orElse("empty")));

        this.flagSource = flagSource;
        this.db = new CuratorDb(flavors, curator, clock, useCuratorClientCache);
        this.zone = zone;
        this.clock = clock;
        this.applications = new Applications(db);
        this.nodes = new Nodes(db, zone, clock, orchestrator, applications);
        this.flavors = flavors;
        this.resourcesCalculator = provisionServiceProvider.getHostResourcesCalculator();
        this.nodeResourceLimits = new NodeResourceLimits(this);
        this.nameResolver = nameResolver;
        this.osVersions = new OsVersions(this);
        this.infrastructureVersions = new InfrastructureVersions(db);
        this.firmwareChecks = new FirmwareChecks(db, clock);
        this.containerImages = new ContainerImages(containerImage, tenantContainerImage, tenantGpuContainerImage);
        this.archiveUriManager = new ArchiveUriManager(db, zone);
        this.jobControl = new JobControl(new JobControlFlags(db, flagSource));
        this.loadBalancers = new LoadBalancers(db);
        this.metricsDb = metricsDb;
        this.orchestrator = orchestrator;
        this.spareCount = spareCount;
        this.sharedHosts = PermanentFlags.SHARED_HOST.bindTo(flagSource());
        this.healthChecker = provisionServiceProvider.getHealthChecker();
        nodes.rewrite();
    }

    /** Returns the curator database client used by this */
    public CuratorDb database() { return db; }

    /** Returns the nodes of the node repo. */
    public Nodes nodes() { return nodes; }

    /** Returns the name resolver used to resolve hostname and ip addresses */
    public NameResolver nameResolver() { return nameResolver; }

    /** Returns the OS versions to use for nodes in this */
    public OsVersions osVersions() { return osVersions; }

    /** Returns the infrastructure versions to use for nodes in this */
    public InfrastructureVersions infrastructureVersions() { return infrastructureVersions; }

    /** Returns the status of firmware checks for hosts managed by this. */
    public FirmwareChecks firmwareChecks() { return firmwareChecks; }

    /** Returns the container images to use for nodes in this. */
    public ContainerImages containerImages() { return containerImages; }

    /** Returns the archive URIs to use for nodes in this. */
    public ArchiveUriManager archiveUriManager() { return archiveUriManager; }

    /** Returns the status of maintenance jobs managed by this. */
    public JobControl jobControl() { return jobControl; }

    /** Returns this node repo's view of the applications deployed to it */
    public Applications applications() { return applications; }

    /** Returns the load balancers available in this node repo */
    public LoadBalancers loadBalancers() { return loadBalancers; }

    public NodeFlavors flavors() { return flavors; }

    public HostResourcesCalculator resourcesCalculator() { return resourcesCalculator; }

    public NodeResourceLimits nodeResourceLimits() { return nodeResourceLimits; }

    public FlagSource flagSource() { return flagSource; }

    public MetricsDb metricsDb() { return metricsDb; }

    public Orchestrator orchestrator() { return orchestrator; }

    public NodeRepoStats computeStats() { return NodeRepoStats.computeOver(this); }

    /** Returns the time-keeper of this */
    public Clock clock() { return clock; }

    /** Returns the zone of this */
    public Zone zone() { return zone; }

    /** The number of nodes we should ensure has free capacity for node failures whenever possible */
    public int spareCount() { return spareCount; }

    /** Returns whether nodes must be allocated to hosts that are exclusive to the cluster type. */
    public boolean exclusiveClusterType(ClusterSpec cluster) {
        return sharedHosts.value().hasClusterType(cluster.type().name());
    }

    /**
     * Returns whether nodes are allocated exclusively in this instance given this cluster spec.
     * Exclusive allocation requires that the wanted node resources matches the advertised resources of the node
     * perfectly.
     */
    public boolean exclusiveAllocation(ClusterSpec clusterSpec) {
        return clusterSpec.isExclusive() ||
               ( clusterSpec.type().isContainer() && zone.system().isPublic() && !zone.environment().isTest() ) ||
               ( !zone().cloud().allowHostSharing() && !sharedHosts.value().supportsClusterType(clusterSpec.type().name()));
    }

    /** Whether the nodes of this cluster must be running on hosts that are specifically provisioned for the application. */
    public boolean exclusiveProvisioning(ClusterSpec clusterSpec) {
        return !zone.cloud().allowHostSharing() && clusterSpec.isExclusive();
    }

    /**
     * Returns ACLs for the children of the given host.
     *
     * @param host node for which to generate ACLs
     * @return the list of node ACLs
     */
    public List<NodeAcl> getChildAcls(Node host) {
        if ( ! host.type().isHost()) throw new IllegalArgumentException("Only hosts have children");
        NodeList allNodes = nodes().list();
        return allNodes.childrenOf(host)
                       .mapToList(childNode -> childNode.acl(allNodes, loadBalancers, zone));
    }

    /** Removes this application: all nodes are set dirty. */
    public void remove(ApplicationTransaction transaction) {
        NodeList applicationNodes = nodes().list().owner(transaction.application());
        db.writeTo(State.dirty,
                   applicationNodes.asList(),
                   Agent.system,
                   Optional.of("Application is removed"),
                   transaction);
        applications.remove(transaction);
    }

    private static Optional<DockerImage> optionalImage(String image) {
        return Optional.of(image).filter(s -> !s.isEmpty()).map(DockerImage::fromString);
    }

    @Override
    public HealthChecker getHealthChecker() {
        return endpoint -> healthChecker.healthy(endpoint,
                                                 loadBalancers.list(endpoint.applicationId())
                                                              .cluster(endpoint.clusterName())
                                                              .first()
                                                              .flatMap(LoadBalancer::instance)
                                                              .flatMap(LoadBalancerInstance::idSeed));
    }

}