aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecification.java
blob: baf752cb4be2849e0000459327f93acf5626cf53 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.builder.xml.dom;

import com.yahoo.config.provision.ClusterInfo;
import com.yahoo.config.provision.IntRange;
import com.yahoo.collections.Pair;
import com.yahoo.component.Version;
import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.config.provision.ZoneEndpoint;
import com.yahoo.config.model.ConfigModelContext;
import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.CloudAccount;
import com.yahoo.config.provision.ClusterMembership;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.text.XML;
import com.yahoo.vespa.model.HostResource;
import com.yahoo.vespa.model.HostSystem;
import com.yahoo.vespa.model.container.xml.ContainerModelBuilder;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.logging.Level;

/**
 * A common utility class to represent a requirement for nodes during model building.
 * Such a requirement is commonly specified in services.xml as a <code>nodes</code> element.
 *
 * @author bratseth
 */
public class NodesSpecification {

    private final ClusterResources min, max;

    private final IntRange groupSize;

    private final boolean dedicated;

    /** The Vespa version we want the nodes to run */
    private final Version version;

    /** 
     * Whether the capacity amount specified is required or can be relaxed
     * at the discretion of the component fulfilling it
     */
    private final boolean required;

    private final boolean canFail;

    private final boolean exclusive;

    /** The repo part of a docker image (without tag), optional */
    private final Optional<DockerImage> dockerImageRepo;

    /** The ID of the cluster referencing this node specification, if any */
    private final Optional<String> combinedId;

    /** The cloud account to use for nodes in this spec, if any */
    private final Optional<CloudAccount> cloudAccount;

    /* Whether the count attribute was present on the nodes element. */
    private final boolean hasCountAttribute;

    private NodesSpecification(ClusterResources min,
                               ClusterResources max,
                               IntRange groupSize,
                               boolean dedicated, Version version,
                               boolean required, boolean canFail, boolean exclusive,
                               Optional<DockerImage> dockerImageRepo,
                               Optional<String> combinedId,
                               Optional<CloudAccount> cloudAccount,
                               boolean hasCountAttribute) {
        if (max.smallerThan(min))
            throw new IllegalArgumentException("Min resources must be larger or equal to max resources, but " +
                                               max + " is smaller than " + min);

        // Non-scaled resources must be equal
        if ( ! min.nodeResources().justNonNumbers().equals(max.nodeResources().justNonNumbers()))
            throw new IllegalArgumentException("Min and max resources must have the same non-numeric settings, but " +
                                               "min is " + min + " and max " + max);
        if (min.nodeResources().bandwidthGbps() != max.nodeResources().bandwidthGbps())
            throw new IllegalArgumentException("Min and max resources must have the same bandwidth, but " +
                                               "min is " + min + " and max " + max);

        this.min = min;
        this.max = max;
        this.groupSize = groupSize;
        this.dedicated = dedicated;
        this.version = version;
        this.required = required;
        this.canFail = canFail;
        this.exclusive = exclusive;
        this.dockerImageRepo = dockerImageRepo;
        this.combinedId = combinedId;
        this.cloudAccount = cloudAccount;
        this.hasCountAttribute = hasCountAttribute;
    }

    static NodesSpecification create(boolean dedicated, boolean canFail, Version version,
                                     ModelElement nodesElement, Optional<DockerImage> dockerImageRepo,
                                     Optional<CloudAccount> cloudAccount) {
        var resolvedElement = resolveElement(nodesElement);
        var combinedId = findCombinedId(nodesElement, resolvedElement);
        var resourceConstraints = toResourceConstraints(resolvedElement);
        boolean hasCountAttribute = resolvedElement.stringAttribute("count") != null;
        return new NodesSpecification(resourceConstraints.min,
                                      resourceConstraints.max,
                                      resourceConstraints.groupSize,
                                      dedicated,
                                      version,
                                      resolvedElement.booleanAttribute("required", false),
                                      canFail,
                                      resolvedElement.booleanAttribute("exclusive", false),
                                      dockerImageToUse(resolvedElement, dockerImageRepo),
                                      combinedId,
                                      cloudAccount,
                                      hasCountAttribute);
    }

    private static ResourceConstraints toResourceConstraints(ModelElement nodesElement) {
        var nodes =  rangeFrom(nodesElement, "count");
        var groups =  rangeFrom(nodesElement, "groups");
        var groupSize =  rangeFrom(nodesElement, "group-size");

        // Find the tightest possible limits for groups to avoid falsely concluding we are autoscaling
        // when only specifying group size
        int defaultMinGroups =                           nodes.from().orElse(1) / groupSize.to().orElse(nodes.from().orElse(1));
        int defaultMaxGroups = groupSize.isEmpty() ? 1 : nodes.to().orElse(1) / groupSize.from().orElse(1);

        var min = new ClusterResources(nodes.from().orElse(1),  groups.from().orElse(defaultMinGroups),  nodeResources(nodesElement).getFirst());
        var max = new ClusterResources(nodes.to().orElse(1), groups.to().orElse(defaultMaxGroups), nodeResources(nodesElement).getSecond());
        return new ResourceConstraints(min, max, groupSize);
    }

    private static IntRange rangeFrom(ModelElement element, String name) {
        try {
            return IntRange.from(element.stringAttribute(name, ""));
        }
        catch (IllegalArgumentException e) {
            throw new IllegalArgumentException("Illegal " + name + " value", e);
        }
    }

    private record ResourceConstraints(ClusterResources min, ClusterResources max, IntRange groupSize) {}

    /** Returns the ID of the cluster referencing this node specification, if any */
    private static Optional<String> findCombinedId(ModelElement nodesElement, ModelElement resolvedElement) {
        if (resolvedElement != nodesElement) {
            // Specification for a container cluster referencing nodes in a content cluster
            return containerIdOf(nodesElement);
        }
        // Specification for a content cluster that is referenced by a container cluster
        return containerIdReferencing(nodesElement);
    }

    /** Returns a requirement for dedicated nodes taken from the given <code>nodes</code> element */
    public static NodesSpecification from(ModelElement nodesElement, ConfigModelContext context) {
        return create(true,
                      ! context.getDeployState().getProperties().isBootstrap(),
                      context.getDeployState().getWantedNodeVespaVersion(),
                      nodesElement,
                      context.getDeployState().getWantedDockerImageRepo(),
                      context.getDeployState().getProperties().cloudAccount());
    }

    /**
     * Returns a requirement for non-dedicated or dedicated nodes taken from the <code>nodes</code> element
     * contained in the given parent element, or empty if the parent element is null, or the nodes elements
     * is not present.
     */
    public static Optional<NodesSpecification> optionalDedicatedFromParent(ModelElement parentElement,
                                                                           ConfigModelContext context) {
        if (parentElement == null) return Optional.empty();
        ModelElement nodesElement = parentElement.child("nodes");
        if (nodesElement == null) return Optional.empty();
        return Optional.of(create(nodesElement.booleanAttribute("dedicated", false),
                                  ! context.getDeployState().getProperties().isBootstrap(),
                                  context.getDeployState().getWantedNodeVespaVersion(),
                                  nodesElement,
                                  context.getDeployState().getWantedDockerImageRepo(),
                                  context.getDeployState().getProperties().cloudAccount()));
    }

    /**
     * Returns a requirement from <code>count</code> non-dedicated nodes in one group
     */
    public static NodesSpecification nonDedicated(int count, ConfigModelContext context) {
        return new NodesSpecification(new ClusterResources(count, 1, NodeResources.unspecified()),
                                      new ClusterResources(count, 1, NodeResources.unspecified()),
                                      IntRange.empty(),
                                      false,
                                      context.getDeployState().getWantedNodeVespaVersion(),
                                      false,
                                      ! context.getDeployState().getProperties().isBootstrap(),
                                      false,
                                      context.getDeployState().getWantedDockerImageRepo(),
                                      Optional.empty(),
                                      context.getDeployState().getProperties().cloudAccount(),
                                      false);
    }

    /** Returns a requirement from <code>count</code> dedicated nodes in one group */
    public static NodesSpecification dedicated(int count, ConfigModelContext context) {
        return new NodesSpecification(new ClusterResources(count, 1, NodeResources.unspecified()),
                                      new ClusterResources(count, 1, NodeResources.unspecified()),
                                      IntRange.empty(),
                                      true,
                                      context.getDeployState().getWantedNodeVespaVersion(),
                                      false,
                                      ! context.getDeployState().getProperties().isBootstrap(),
                                      false,
                                      context.getDeployState().getWantedDockerImageRepo(),
                                      Optional.empty(),
                                      context.getDeployState().getProperties().cloudAccount(),
                                      false);
    }

    /**
     * Returns a requirement for {@code count} shared nodes with {@code required} taken as
     * the OR over all content clusters, and with the given resources.
     */
    public static NodesSpecification requiredFromSharedParents(int count, NodeResources resources,
                                                               ModelElement element, ConfigModelContext context) {
        List<NodesSpecification> allContent = findParentByTag("services", element.getXml()).map(services -> XML.getChildren(services, "content"))
                                                                                           .orElse(List.of())
                                                                                           .stream()
                                                                                           .map(content -> new ModelElement(content).child("nodes"))
                                                                                           .filter(nodes -> nodes != null && nodes.stringAttribute("count") != null)
                                                                                           .map(nodes -> from(nodes, context))
                                                                                           .toList();
        return new NodesSpecification(new ClusterResources(count, 1, resources),
                                      new ClusterResources(count, 1, resources),
                                      IntRange.empty(),
                                      true,
                                      context.getDeployState().getWantedNodeVespaVersion(),
                                      allContent.stream().anyMatch(content -> content.required),
                                      ! context.getDeployState().getProperties().isBootstrap(),
                                      false,
                                      context.getDeployState().getWantedDockerImageRepo(),
                                      Optional.empty(),
                                      context.getDeployState().getProperties().cloudAccount(),
                                      false);
    }

    public ClusterResources minResources() { return min; }
    public ClusterResources maxResources() { return max; }
    public IntRange groupSize() { return groupSize; }

    /**
     * Returns whether this requires dedicated nodes.
     * Otherwise the model encountering this request should reuse nodes requested for other purposes whenever possible.
     */
    public boolean isDedicated() { return dedicated; }

    /**
     * Returns whether the physical hosts running the nodes of this application can
     * also run nodes of other applications. Using exclusive nodes for containers increases security
     * and increases cost.
     */
    public boolean isExclusive() { return exclusive; }

    /** Returns whether the count attribute was present on the {@code <nodes>} element. */
    public boolean hasCountAttribute() {
        return hasCountAttribute;
    }

    public Map<HostResource, ClusterMembership> provision(HostSystem hostSystem,
                                                          ClusterSpec.Type clusterType,
                                                          ClusterSpec.Id clusterId,
                                                          DeployLogger logger,
                                                          boolean stateful,
                                                          ClusterInfo clusterInfo) {
        return provision(hostSystem, clusterType, clusterId, ZoneEndpoint.defaultEndpoint, logger, stateful, clusterInfo);
    }

    public Map<HostResource, ClusterMembership> provision(HostSystem hostSystem,
                                                          ClusterSpec.Type clusterType,
                                                          ClusterSpec.Id clusterId,
                                                          ZoneEndpoint zoneEndpoint,
                                                          DeployLogger logger,
                                                          boolean stateful,
                                                          ClusterInfo info) {
        if (combinedId.isPresent())
            clusterType = ClusterSpec.Type.combined;
        ClusterSpec cluster = ClusterSpec.request(clusterType, clusterId)
                                         .vespaVersion(version)
                                         .exclusive(exclusive)
                                         .combinedId(combinedId.map(ClusterSpec.Id::from))
                                         .dockerImageRepository(dockerImageRepo)
                                         .loadBalancerSettings(zoneEndpoint)
                                         .stateful(stateful)
                                         .build();
        return hostSystem.allocateHosts(cluster, Capacity.from(min, max, groupSize, required, canFail, cloudAccount, info), logger);
    }

    private static Pair<NodeResources, NodeResources> nodeResources(ModelElement nodesElement) {
        ModelElement resources = nodesElement.child("resources");
        if (resources != null) {
            return nodeResourcesFromResourcesElement(resources);
        }
        else if (nodesElement.stringAttribute("flavor") != null) { // legacy fallback
            var flavorResources = NodeResources.fromLegacyName(nodesElement.stringAttribute("flavor"));
            return new Pair<>(flavorResources, flavorResources);
        }
        else {
            return new Pair<>(NodeResources.unspecified(), NodeResources.unspecified());
        }
    }

    private static Pair<NodeResources, NodeResources> nodeResourcesFromResourcesElement(ModelElement element) {
        Pair<Double, Double> vcpu       = toRange(element.stringAttribute("vcpu"),   .0, Double::parseDouble);
        Pair<Double, Double> memory     = toRange(element.stringAttribute("memory"), .0, s -> parseGbAmount(s, "B"));
        Pair<Double, Double> disk       = toRange(element.stringAttribute("disk"),   .0, s -> parseGbAmount(s, "B"));
        Pair<Double, Double> bandwith   = toRange(element.stringAttribute("bandwidth"),      .3, s -> parseGbAmount(s, "BPS"));
        NodeResources.DiskSpeed   diskSpeed     = parseOptionalDiskSpeed(element.stringAttribute("disk-speed"));
        NodeResources.StorageType storageType   = parseOptionalStorageType(element.stringAttribute("storage-type"));
        NodeResources.Architecture architecture = parseOptionalArchitecture(element.stringAttribute("architecture"));
        NodeResources.GpuResources gpuResources = parseOptionalGpuResources(element.child("gpu"));

        var min = new NodeResources(vcpu.getFirst(),  memory.getFirst(),  disk.getFirst(),  bandwith.getFirst(),
                                    diskSpeed, storageType, architecture, gpuResources);
        var max = new NodeResources(vcpu.getSecond(), memory.getSecond(), disk.getSecond(), bandwith.getSecond(),
                                    diskSpeed, storageType, architecture, gpuResources);
        return new Pair<>(min, max);
    }

    private static NodeResources.GpuResources parseOptionalGpuResources(ModelElement element) {
        if (element == null) return NodeResources.GpuResources.getDefault();
        int count = element.requiredIntegerAttribute("count");
        double memory = parseGbAmount(element.requiredStringAttribute("memory"), "B");
        return new NodeResources.GpuResources(count, memory);
    }

    private static double parseGbAmount(String byteAmount, String unit) {
        byteAmount = byteAmount.strip();
        byteAmount = byteAmount.toUpperCase();
        if (byteAmount.endsWith(unit))
            byteAmount = byteAmount.substring(0, byteAmount.length() - unit.length());

        double multiplier = Math.pow(1000, -3);
        if (byteAmount.endsWith("K"))
            multiplier = Math.pow(1000, -2);
        else if (byteAmount.endsWith("M"))
            multiplier = Math.pow(1000, -1);
        else if (byteAmount.endsWith("G"))
            multiplier = 1;
        else if (byteAmount.endsWith("T"))
            multiplier = 1000;
        else if (byteAmount.endsWith("P"))
            multiplier = Math.pow(1000, 2);
        else if (byteAmount.endsWith("E"))
            multiplier = Math.pow(1000, 3);
        else if (byteAmount.endsWith("Z"))
            multiplier = Math.pow(1000, 4);
        else if (byteAmount.endsWith("Y"))
            multiplier = Math.pow(1000, 5);

        byteAmount = byteAmount.substring(0, byteAmount.length() -1 ).strip();
        try {
            return Double.parseDouble(byteAmount) * multiplier;
        }
        catch (NumberFormatException e) {
            throw new IllegalArgumentException("Invalid byte amount '" + byteAmount +
                                               "': Must be a floating point number " +
                                               "optionally followed by k, M, G, T, P, E, Z or Y");
        }
    }

    private static NodeResources.DiskSpeed parseOptionalDiskSpeed(String diskSpeedString) {
        if (diskSpeedString == null) return NodeResources.DiskSpeed.getDefault();
        return switch (diskSpeedString) {
            case "fast" -> NodeResources.DiskSpeed.fast;
            case "slow" -> NodeResources.DiskSpeed.slow;
            case "any" -> NodeResources.DiskSpeed.any;
            default -> throw new IllegalArgumentException("Illegal disk-speed value '" + diskSpeedString +
                                                          "': Legal values are 'fast', 'slow' and 'any')");
        };
    }

    private static NodeResources.StorageType parseOptionalStorageType(String storageTypeString) {
        if (storageTypeString == null) return NodeResources.StorageType.getDefault();
        return switch (storageTypeString) {
            case "remote" -> NodeResources.StorageType.remote;
            case "local" -> NodeResources.StorageType.local;
            case "any" -> NodeResources.StorageType.any;
            default -> throw new IllegalArgumentException("Illegal storage-type value '" + storageTypeString +
                                                          "': Legal values are 'remote', 'local' and 'any')");
        };
    }

    private static NodeResources.Architecture parseOptionalArchitecture(String architecture) {
        if (architecture == null) return NodeResources.Architecture.getDefault();
        return switch (architecture) {
            case "x86_64" -> NodeResources.Architecture.x86_64;
            case "arm64" -> NodeResources.Architecture.arm64;
            case "any" -> NodeResources.Architecture.any;
            default -> throw new IllegalArgumentException("Illegal architecture value '" + architecture +
                                                          "': Legal values are 'x86_64', 'arm64' and 'any')");
        };
    }

    /**
     * Resolve any reference in nodesElement and return the referred element.
     *
     * If nodesElement does not refer to a different element, this method behaves as the identity function.
     */
    private static ModelElement resolveElement(ModelElement nodesElement) {
        var element = nodesElement.getXml();
        var referenceId = element.getAttribute("of");
        if (referenceId.isEmpty()) return nodesElement;

        var services = findParentByTag("services", element).orElseThrow(() -> clusterReferenceNotFoundException(referenceId));
        var referencedService = findChildById(services, referenceId).orElseThrow(() -> clusterReferenceNotFoundException(referenceId));
        if ( ! referencedService.getTagName().equals("content"))
            throw new IllegalArgumentException("service '" + referenceId + "' is not a content service");
        var referencedNodesElement = XML.getChild(referencedService, "nodes");
        if (referencedNodesElement == null)
            throw new IllegalArgumentException("expected reference to service '" + referenceId + "' to supply nodes, " +
                                               "but that service has no <nodes> element");

        return new ModelElement(referencedNodesElement);
    }

    /** Returns the ID of the parent container element of nodesElement, if any  */
    private static Optional<String> containerIdOf(ModelElement nodesElement) {
        var element = nodesElement.getXml();
        var container = findParentByTag("container", element);
        return container.map(el -> el.getAttribute("id"));
    }

    /** Returns the ID of the container element referencing nodesElement, if any */
    private static Optional<String> containerIdReferencing(ModelElement nodesElement) {
        var element = nodesElement.getXml();
        var services = findParentByTag("services", element);
        if (services.isEmpty()) return Optional.empty();

        var content = findParentByTag("content", element);
        if (content.isEmpty()) return Optional.empty();
        var contentClusterId = content.get().getAttribute("id");
        if (contentClusterId.isEmpty()) return Optional.empty();
        for (var rootChild : XML.getChildren(services.get())) {
            if ( ! ContainerModelBuilder.isContainerTag(rootChild)) continue;
            var nodes = XML.getChild(rootChild, "nodes");
            if (nodes == null) continue;
            if (!contentClusterId.equals(nodes.getAttribute("of"))) continue;
            return Optional.of(rootChild.getAttribute("id"));
        }
        return Optional.empty();
    }

    private static Optional<Element> findChildById(Element parent, String id) {
        for (Element child : XML.getChildren(parent))
            if (id.equals(child.getAttribute("id"))) return Optional.of(child);
        return Optional.empty();
    }

    private static Optional<Element> findParentByTag(String tag, Element element) {
        Node parent = element.getParentNode();
        if (parent == null) return Optional.empty();
        if ( ! (parent instanceof Element parentElement)) return Optional.empty();
        if (parentElement.getTagName().equals(tag)) return Optional.of(parentElement);
        return findParentByTag(tag, parentElement);
    }

    private static IllegalArgumentException clusterReferenceNotFoundException(String referenceId) {
        return new IllegalArgumentException("referenced service '" + referenceId + "' is not defined");
    }

    private static Optional<DockerImage> dockerImageToUse(ModelElement nodesElement, Optional<DockerImage> dockerImage) {
        String dockerImageFromElement = nodesElement.stringAttribute("docker-image");
        return dockerImageFromElement == null ? dockerImage : Optional.of(DockerImage.fromString(dockerImageFromElement));
    }

    /** Parses a value ("value") or value range ("[min-value, max-value]") */
    private static <T> Pair<T, T> toRange(String s, T defaultValue, Function<String, T> valueParser) {
        try {
            if (s == null) return new Pair<>(defaultValue, defaultValue);
            s = s.trim();
            if (s.startsWith("[") && s.endsWith("]")) {
                String[] numbers = s.substring(1, s.length() - 1).split(",");
                if (numbers.length != 2) throw new IllegalArgumentException();
                return new Pair<>(valueParser.apply(numbers[0].trim()), valueParser.apply(numbers[1].trim()));
            } else {
                return new Pair<>(valueParser.apply(s), valueParser.apply(s));
            }
        }
        catch (IllegalArgumentException e) {
            throw new IllegalArgumentException("Expected a number or range on the form [min, max], but got '" + s + "'", e);
        }
    }

    @Override
    public String toString() {
        return "specification of " + (dedicated ? "dedicated " : "") +
               (min.equals(max) ? min : "min " + min + " max " + max);
    }
}