aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
blob: 957996f05e40d6651ad248be53ac1691f0d97bc7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision;

import com.yahoo.collections.AbstractFilteringList;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.hosted.provision.node.ClusterId;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static java.util.stream.Collectors.collectingAndThen;

/**
 * A filterable node list. The result of a filter operation is immutable.
 *
 * @author bratseth
 * @author mpolden
 */
public class NodeList extends AbstractFilteringList<Node, NodeList> {

    private static final NodeList EMPTY = new NodeList(List.of(), false);

    /**
     * A lazily populated cache of parent-child relationships. This exists to improve the speed of parent<->child
     * lookup which is a frequent operation
     */
    private final AtomicReference<Map<String, NodeFamily>> nodeCache = new AtomicReference<>(null);
    private final AtomicReference<Set<String>> ipCache = new AtomicReference<>(null);

    protected NodeList(List<Node> nodes, boolean negate) {
        super(nodes, negate, NodeList::new);
    }

    /** Returns the node with the given hostname from this list, or empty if it is not present  */
    public Optional<Node> node(String hostname) {
        return get(hostname).map(NodeFamily::node);
    }

    /** Returns the subset of nodes which are retired */
    public NodeList retired() {
        return matching(node -> node.allocation().isPresent() && node.allocation().get().membership().retired());
    }

    /** Returns the subset of nodes that are being deprovisioned */
    public NodeList deprovisioning() {
        return matching(node -> node.status().wantToRetire() && node.status().wantToDeprovision());
    }

    /** Returns the subset of nodes that are being rebuilt */
    public NodeList rebuilding(boolean soft) {
        return matching(node -> {
            if (soft) {
                return !node.status().wantToRetire() && node.status().wantToRebuild();
            }
            return node.status().wantToRetire() && node.status().wantToRebuild();
        });
    }

    /** Returns the subset of nodes which are removable */
    public NodeList removable() {
        return matching(node -> node.allocation().isPresent() && node.allocation().get().removable());
    }

    /** Returns the subset of nodes which are reusable immediately after removal */
    public NodeList reusable() {
        return matching(node -> node.allocation().isPresent() && node.allocation().get().reusable());
    }

    /** Returns the subset of nodes having exactly the given resources */
    public NodeList resources(NodeResources resources) { return matching(node -> node.resources().equals(resources)); }

    /** Returns the subset of nodes which have a replaceable root disk */
    public NodeList replaceableRootDisk() {
        return matching(node -> node.resources().storageType() == NodeResources.StorageType.remote);
    }

    /** Returns the subset of nodes which satisfy the given resources */
    public NodeList satisfies(NodeResources resources) { return matching(node -> node.resources().satisfies(resources)); }

    /** Returns the subset of nodes not in the given set */
    public NodeList except(Set<Node> nodes) {
        return matching(node -> ! nodes.contains(node));
    }

    /** Returns the subset of nodes excluding given node */
    public NodeList except(Node node) {
        return except(Set.of(node));
    }

    /** Returns the subset of nodes assigned to the given cluster type */
    public NodeList type(ClusterSpec.Type type) {
        return matching(node -> node.allocation().isPresent() && node.allocation().get().membership().cluster().type().equals(type));
    }

    /** Returns the subset of nodes that run containers */
    public NodeList container() {
        return matching(node -> node.allocation().isPresent() && node.allocation().get().membership().cluster().type().isContainer());
    }

    /** Returns the subset of nodes that run a stateless service */
    public NodeList stateless() {
        return matching(node -> node.allocation().isPresent() && ! node.allocation().get().membership().cluster().isStateful());
    }

    /** Returns the subset of nodes that run a stateful service */
    public NodeList stateful() {
        return matching(node -> node.allocation().isPresent() && node.allocation().get().membership().cluster().isStateful());
    }

    /** Returns the subset of nodes that are currently changing their Vespa version */
    public NodeList changingVersion() {
        return matching(node -> node.status().vespaVersion().isPresent() &&
                                node.allocation().isPresent() &&
                                !node.status().vespaVersion().get().equals(node.allocation().get().membership().cluster().vespaVersion()));
    }

    /** Returns the subset of nodes with want to fail set to true */
    public NodeList failing() {
        return matching(node -> node.status().wantToFail());
    }

    /** Returns the subset of nodes that are currently changing their OS version to given version */
    public NodeList changingOsVersionTo(Version version) {
        return matching(node -> node.status().osVersion().changingTo(version));
    }

    /** Returns the subset of nodes that are currently changing their OS version */
    public NodeList changingOsVersion() {
        return matching(node -> node.status().osVersion().changing());
    }

    /** Returns a copy of this sorted by current OS version (lowest to highest) */
    public NodeList byIncreasingOsVersion() {
        return sortedBy(Comparator.comparing(node -> node.status()
                                                         .osVersion()
                                                         .current()
                                                         .orElse(Version.emptyVersion)));
    }

    /** Returns the subset of nodes that are currently on a lower version than the given version */
    public NodeList osVersionIsBefore(Version version) {
        return matching(node -> node.status().osVersion().isBefore(version));
    }

    /** Returns the subset of nodes that are currently on the given OS version */
    public NodeList onOsVersion(Version version) {
        return matching(node -> node.status().osVersion().matches(version));
    }

    /** Returns the subset of nodes assigned to the given cluster */
    public NodeList cluster(ClusterSpec.Id cluster) {
        return matching(node -> node.allocation().isPresent() && node.allocation().get().membership().cluster().id().equals(cluster));
    }

    /** Returns the subset of nodes owned by the given application */
    public NodeList owner(ApplicationId application) {
        return matching(node -> node.allocation().map(a -> a.owner().equals(application)).orElse(false));
    }

    /** Returns the subset of nodes allocated to a tester instance */
    public NodeList tester() {
        return matching(node -> node.allocation().isPresent() && node.allocation().get().owner().instance().isTester());
    }

    /** Returns the subset of nodes matching any of the given node type(s) */
    public NodeList nodeType(NodeType first, NodeType... rest) {
        if (rest.length == 0) {
            return matching(node -> node.type() == first);
        }
        EnumSet<NodeType> nodeTypes = EnumSet.of(first, rest);
        return matching(node -> nodeTypes.contains(node.type()));
    }

    /** Returns the subset of nodes of the host type */
    public NodeList hosts() {
        return nodeType(NodeType.host);
    }

    /** Returns the subset of nodes that are parents */
    public NodeList parents() {
        return matching(node -> node.parentHostname().isEmpty());
    }

    /** Returns the child nodes of the given parent node */
    public NodeList childrenOf(String hostname) {
        NodeList children = get(hostname).map(NodeFamily::children).map(NodeList::copyOf).orElse(EMPTY);
        // Fallback, in case the parent itself is not in this list
        return children.isEmpty() ? matching(node -> node.hasParent(hostname)) : children;
    }

    public NodeList childrenOf(Node parent) {
        return childrenOf(parent.hostname());
    }

    /** Returns the subset of nodes that are in any of the given state(s) */
    public NodeList state(Node.State first, Node.State... rest) {
        if (rest.length == 0) {
            return matching(node -> node.state() == first);
        }
        return state(EnumSet.of(first, rest));
    }

    /** Returns the subset of nodes that are in any of the given state(s) */
    public NodeList state(Set<Node.State> nodeStates) {
        return matching(node -> nodeStates.contains(node.state()));
    }

    /** Returns the subset of nodes which have a record of being down */
    public NodeList down() { return matching(Node::isDown); }

    /** Returns the subset of nodes which are being retired */
    public NodeList retiring() {
        return matching(node -> node.status().wantToRetire() || node.status().preferToRetire());
    }

    /** Returns the parent nodes of the given child nodes */
    public NodeList parentsOf(NodeList children) {
        return children.stream()
                       .map(this::parentOf)
                       .flatMap(Optional::stream)
                       .collect(collectingAndThen(Collectors.toList(), NodeList::copyOf));
    }

    /** Returns the parent node of the given child node */
    public Optional<Node> parentOf(Node child) {
        return child.parentHostname().flatMap(this::node);
    }

    /** Returns the nodes contained in the group identified by given index */
    public NodeList group(int index) {
        return matching(n -> n.allocation().isPresent() &&
                             n.allocation().get().membership().cluster().group().equals(Optional.of(ClusterSpec.Group.from(index))));
    }

    /** Returns the hostnames of nodes in this */
    public Set<String> hostnames() {
        return stream().map(Node::hostname).collect(Collectors.toUnmodifiableSet());
    }

    /** Returns the stateful clusters on nodes in this */
    public Set<ClusterId> statefulClusters() {
        return stream().filter(node -> node.allocation().isPresent() &&
                                       node.allocation().get().membership().cluster().isStateful())
                       .map(node -> new ClusterId(node.allocation().get().owner(),
                                                  node.allocation().get().membership().cluster().id()))
                       .collect(Collectors.toUnmodifiableSet());

    }

    /**
     * Returns the requested resources of the nodes in this
     *
     * @throws IllegalStateException if there are no nodes in this list, or they do not all belong to the same cluster
     */
    public NodeResources requestedResources() {
        ensureSingleCluster();
        if (isEmpty()) throw new IllegalStateException("No nodes");
        return first().get().allocation().get().requestedResources();
    }

    /**
     * Returns the cluster spec of the nodes in this, without any group designation
     *
     * @throws IllegalStateException if there are no nodes in this list, or they do not all belong to the same cluster
     */
    public ClusterSpec clusterSpec() {
        ensureSingleCluster();
        if (isEmpty()) throw new IllegalStateException("No nodes");
        return first().get().allocation().get().membership().cluster().with(Optional.empty());
    }

    /**
     * Returns the resources of the nodes of this.
     *
     * NOTE: If the nodes do not all have the same values of node resources, a random pick among those node resources
     *       will be returned.
     *
     * @throws IllegalStateException if the nodes in this do not all belong to the same cluster
     */
    public ClusterResources toResources() {
        ensureSingleCluster();
        if (isEmpty()) return new ClusterResources(0, 0, NodeResources.unspecified());
        return new ClusterResources(size(),
                                    (int)stream().map(node -> node.allocation().get().membership().cluster().group().get())
                                                 .distinct()
                                                 .count(),
                                    first().get().resources());
    }

    /** Returns the nodes that are allocated on an exclusive network switch within its cluster */
    public NodeList onExclusiveSwitch(NodeList clusterHosts) {
        ensureSingleCluster();
        Map<String, Long> switchCount = clusterHosts.stream()
                                                    .flatMap(host -> host.switchHostname().stream())
                                                    .collect(Collectors.groupingBy(Function.identity(),
                                                                                   Collectors.counting()));
        return matching(node -> {
            Optional<Node> nodeOnSwitch = clusterHosts.parentOf(node);
            if (node.parentHostname().isPresent()) {
                if (nodeOnSwitch.isEmpty()) {
                    throw new IllegalArgumentException("Parent of " + node + ", " + node.parentHostname().get() +
                                                       ", not found in given cluster hosts");
                }
            } else {
                nodeOnSwitch = Optional.of(node);
            }
            Optional<String> allocatedSwitch = nodeOnSwitch.flatMap(Node::switchHostname);
            return allocatedSwitch.isEmpty() || switchCount.get(allocatedSwitch.get()) == 1;
        });
    }

    /**
     * Returns the number of unused IP addresses in the pool, assuming any and all unaccounted for hostnames
     * in the pool are resolved to exactly 1 IP address (or 2 if dual-stack).
     */
    public int eventuallyUnusedIpAddressCount(Node host) {
        // The count in this method relies on the size of the IP address pool if that's non-empty,
        // otherwise fall back to the address/hostname pool.
        if (host.ipConfig().pool().ips().isEmpty()) {
            Set<String> allHostnames = cache().keySet();
            return (int) host.ipConfig().pool().hostnames().stream()
                             .filter(hostname -> !allHostnames.contains(hostname.value()))
                             .count();
        }
        Set<String> allIps = ipCache.updateAndGet(old ->
            old != null ? old : stream().flatMap(node -> node.ipConfig().primary().stream())
                                        .collect(Collectors.toUnmodifiableSet())
        );
        return (int) host.ipConfig().pool().ips().stream()
                         .filter(address -> !allIps.contains(address))
                         .count();
    }

    private void ensureSingleCluster() {
        if (isEmpty()) return;

        if (stream().anyMatch(node -> node.allocation().isEmpty()))
            throw new IllegalStateException("Some nodes are not allocated to a cluster");

        ClusterSpec firstNodeSpec = first().get().allocation().get().membership().cluster().with(Optional.empty());
        if (stream().map(node -> node.allocation().get().membership().cluster().with(Optional.empty()))
                    .anyMatch(clusterSpec -> ! clusterSpec.id().equals(firstNodeSpec.id())))
            throw new IllegalStateException("Nodes belong to multiple clusters");
    }

    /** Returns the nodes of this as a stream */
    public Stream<Node> stream() { return asList().stream(); }

    public static NodeList of(Node ... nodes) {
        return copyOf(List.of(nodes));
    }

    public static NodeList copyOf(List<Node> nodes) {
        if (nodes.isEmpty()) return EMPTY;
        return new NodeList(nodes, false);
    }

    @Override
    public String toString() {
        return asList().toString();
    }

    @Override
    public int hashCode() { return asList().hashCode(); }

    @Override
    public boolean equals(Object other) {
        if (other == this) return true;
        if ( ! (other instanceof NodeList)) return false;
        return this.asList().equals(((NodeList) other).asList());
    }

    /** Get node family, by given hostname */
    private Optional<NodeFamily> get(String hostname) {
        return Optional.ofNullable(cache().get(hostname));
    }

    private Map<String, NodeFamily> cache() {
        return nodeCache.updateAndGet((cached) -> {
            if (cached != null)
                return cached;

            Map<String, List<Node>> children = new HashMap<>();
            for (Node node : this)
                node.parentHostname().ifPresent(parent -> children.computeIfAbsent(parent, __ -> new ArrayList<>()).add(node));

            Map<String, NodeFamily> families = new HashMap<>();
            for (Node node : this)
                families.put(node.hostname(), new NodeFamily(node, children.getOrDefault(node.hostname(), List.of())));

            return families;
        });
    }

    /** A node and its children, if any */
    private record NodeFamily(Node node, List<Node> children) {}

}