From bcaf74cc7cddd26f315ea9c60ceb8a5f9b665168 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Fri, 27 Mar 2020 12:57:10 +0100 Subject: Maintain application min, max and target resources --- .../yahoo/config/provision/ClusterResources.java | 7 ++ .../vespa/hosted/provision/NodeRepository.java | 6 ++ .../hosted/provision/applications/Application.java | 37 ++++++++++ .../provision/applications/Applications.java | 24 +++++++ .../hosted/provision/applications/Cluster.java | 46 ++++++++++++ .../provision/provisioning/CapacityPolicies.java | 34 ++++----- .../provisioning/NodeRepositoryProvisioner.java | 84 +++++++++++++++------- .../provision/provisioning/ProvisioningTest.java | 10 +-- 8 files changed, 200 insertions(+), 48 deletions(-) create mode 100644 node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java create mode 100644 node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java create mode 100644 node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java index 11873bc908c..48a201f4f65 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java @@ -44,6 +44,13 @@ public class ClusterResources { return false; } + /** Returns true if this is within the given limits (inclusive) */ + public boolean isWithin(ClusterResources min, ClusterResources max) { + if (this.smallerThan(min)) return false; + if (max.smallerThan(this)) return false; + return true; + } + @Override public boolean equals(Object o) { if (o == this) return true; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 3cf2442f6f7..4ba480b73b1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -19,6 +19,7 @@ import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerId; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance; @@ -98,6 +99,7 @@ public class NodeRepository extends AbstractComponent { private final FirmwareChecks firmwareChecks; private final DockerImages dockerImages; private final JobControl jobControl; + private final Applications applications; /** * Creates a node repository from a zookeeper provider. @@ -124,6 +126,7 @@ public class NodeRepository extends AbstractComponent { this.firmwareChecks = new FirmwareChecks(db, clock); this.dockerImages = new DockerImages(db, dockerImage); this.jobControl = new JobControl(db); + this.applications = new Applications(); // read and write all nodes to make sure they are stored in the latest version of the serialized format for (State state : State.values()) @@ -154,6 +157,9 @@ public class NodeRepository extends AbstractComponent { /** Returns the status of maintenance jobs managed by this. */ public JobControl jobControl() { return jobControl; } + /** Returns this node repo's view of the applications deployed to it */ + public Applications applications() { return applications; } + // ---------------- Query API ---------------------------------------------------------------- /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java new file mode 100644 index 00000000000..7dd2dc7be17 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java @@ -0,0 +1,37 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.applications; + +import com.yahoo.config.provision.ClusterResources; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.transaction.Mutex; + +import java.util.Map; +import java.util.HashMap; +import java.util.Optional; + +/** + * The node repository's view of an application deployment. + * + * This is immutable, and must be locked with the application lock on read-modify-write. + * + * @author bratseth + */ +public class Application { + + private Map clusters = new HashMap<>(); + + /** Returns the cluster with the given id or null if none */ + public Cluster cluster(ClusterSpec.Id id) { return clusters.get(id); } + + /** + * Sets the min and max resource limits of the given cluster. + * This will create the cluster with these limits if it does not exist. + * If the cluster has a target which is not inside the new limits, the target is removed. + */ + public void setClusterLimits(ClusterSpec.Id id, ClusterResources min, ClusterResources max, Mutex applicationLock) { + Cluster cluster = clusters.computeIfAbsent(id, clusterId -> new Cluster(min, max, Optional.empty())); + if (cluster.targetResources().isPresent() && ! cluster.targetResources().get().isWithin(min, max)) + clusters.put(id, cluster.withoutTarget()); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java new file mode 100644 index 00000000000..1409857df1a --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java @@ -0,0 +1,24 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.applications; + +import com.yahoo.config.provision.ApplicationId; + +import java.util.concurrent.ConcurrentHashMap; + +/** + * An (in-memory, for now) repository of the node repo's view of applications. + * + * This is multithread safe. + * + * @author bratseth + */ +public class Applications { + + private final ConcurrentHashMap applications = new ConcurrentHashMap<>(); + + /** Returns the application with the given id, or null if it does not exist and should not be created */ + public Application get(ApplicationId applicationId, boolean create) { + return applications.computeIfAbsent(applicationId, id -> create ? new Application() : null); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java new file mode 100644 index 00000000000..d4f7f10abfd --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -0,0 +1,46 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.applications; + +import com.yahoo.config.provision.ClusterResources; + +import java.util.Optional; + +/** + * The node repo's view of a cluster in an application deployment. + * + * This is immutable, and must be locked with the application lock on read-modify-write. + * + * @author bratseth + */ +public class Cluster { + + private final ClusterResources minResources, maxResources; + private final Optional targetResources; + + Cluster(ClusterResources minResources, ClusterResources maxResources, Optional targetResources) { + this.minResources = minResources; + this.maxResources = maxResources; + this.targetResources = targetResources; + } + + /** Returns the configured minimal resources in this cluster */ + public ClusterResources minResources() { return minResources; } + + /** Returns the configured maximal resources in this cluster */ + public ClusterResources maxResources() { return maxResources; } + + /** + * Returns the computed resources (between min and max, inclusive) this cluster should + * have allocated at the moment, or empty if the system currently have no opinion on this. + */ + public Optional targetResources() { return targetResources; } + + public Cluster withTarget(ClusterResources target) { + return new Cluster(minResources, maxResources, Optional.of(target)); + } + + public Cluster withoutTarget() { + return new Cluster(minResources, maxResources, Optional.empty()); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java index a23c1a932d4..d1cd4752c93 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; @@ -31,42 +32,37 @@ public class CapacityPolicies { this.isUsingAdvertisedResources = zone.cloud().value().equals("aws"); } - public int decideSize(Capacity capacity, ClusterSpec cluster, ApplicationId application) { - int requestedNodes = capacity.minResources().nodes(); - + public int decideSize(int requested, Capacity capacity, ClusterSpec cluster, ApplicationId application) { if (application.instance().isTester()) return 1; - ensureRedundancy(requestedNodes, cluster, capacity.canFail()); - - if (capacity.isRequired()) return requestedNodes; - + ensureRedundancy(requested, cluster, capacity.canFail()); + if (capacity.isRequired()) return requested; switch(zone.environment()) { case dev : case test : return 1; - case perf : return Math.min(capacity.minResources().nodes(), 3); - case staging: return requestedNodes <= 1 ? requestedNodes : Math.max(2, requestedNodes / 10); - case prod : return requestedNodes; + case perf : return Math.min(requested, 3); + case staging: return requested <= 1 ? requested : Math.max(2, requested / 10); + case prod : return requested; default : throw new IllegalArgumentException("Unsupported environment " + zone.environment()); } } - public NodeResources decideNodeResources(Capacity capacity, ClusterSpec cluster) { - NodeResources resources = capacity.minResources().nodeResources(); - if (resources == NodeResources.unspecified) - resources = defaultNodeResources(cluster.type()); - ensureSufficientResources(resources, cluster); + public NodeResources decideNodeResources(NodeResources requested, Capacity capacity, ClusterSpec cluster) { + if (requested == NodeResources.unspecified) + requested = defaultNodeResources(cluster.type()); + ensureSufficientResources(requested, cluster); - if (capacity.isRequired()) return resources; + if (capacity.isRequired()) return requested; // Allow slow storage in zones which are not performance sensitive if (zone.system().isCd() || zone.environment() == Environment.dev || zone.environment() == Environment.test) - resources = resources.with(NodeResources.DiskSpeed.any).with(NodeResources.StorageType.any); + requested = requested.with(NodeResources.DiskSpeed.any).with(NodeResources.StorageType.any); // Dev does not cap the cpu of containers since usage is spotty: Allocate just a small amount exclusively // Do not cap in AWS as hosts are allocated on demand and 1-to-1, so the node can use the entire host if (zone.environment() == Environment.dev && !zone.region().value().contains("aws-")) - resources = resources.withVcpu(0.1); + requested = requested.withVcpu(0.1); - return resources; + return requested; } private void ensureSufficientResources(NodeResources resources, ClusterSpec cluster) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 2c2c927034b..083f8db5aa5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.google.inject.Inject; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Environment; @@ -15,10 +16,14 @@ import com.yahoo.config.provision.ProvisionLogger; import com.yahoo.config.provision.Provisioner; import com.yahoo.config.provision.Zone; import com.yahoo.log.LogLevel; +import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Application; +import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.filter.ApplicationFilter; import com.yahoo.vespa.hosted.provision.node.filter.NodeHostFilter; @@ -87,38 +92,36 @@ public class NodeRepositoryProvisioner implements Provisioner { * The nodes are ordered by increasing index number. */ @Override - public List prepare(ApplicationId application, ClusterSpec cluster, Capacity requestedCapacity, + public List prepare(ApplicationId application, ClusterSpec cluster, Capacity requested, ProvisionLogger logger) { - if (cluster.group().isPresent()) throw new IllegalArgumentException("Node requests cannot specify a group"); - log.log(zone.system().isCd() ? Level.INFO : LogLevel.DEBUG, - () -> "Received deploy prepare request for " + requestedCapacity + + () -> "Received deploy prepare request for " + requested + " for application " + application + ", cluster " + cluster); - int effectiveGroups; - NodeSpec requestedNodes; - NodeResources resources = requestedCapacity.minResources().nodeResources(); - if ( requestedCapacity.type() == NodeType.tenant) { - int nodeCount = capacityPolicies.decideSize(requestedCapacity, cluster, application); - if (zone.environment().isManuallyDeployed() && nodeCount < requestedCapacity.minResources().nodes()) - logger.log(Level.INFO, "Requested " + requestedCapacity.minResources().nodes() + " nodes for " + cluster + - ", downscaling to " + nodeCount + " nodes in " + zone.environment()); - resources = capacityPolicies.decideNodeResources(requestedCapacity, cluster); - boolean exclusive = capacityPolicies.decideExclusivity(cluster.isExclusive()); - effectiveGroups = Math.min(requestedCapacity.minResources().groups(), nodeCount); // cannot have more groups than nodes - requestedNodes = NodeSpec.from(nodeCount, resources, exclusive, requestedCapacity.canFail()); + if (cluster.group().isPresent()) throw new IllegalArgumentException("Node requests cannot specify a group"); - if ( ! hasQuota(application, nodeCount)) - throw new IllegalArgumentException(requestedCapacity + " requested for " + cluster + - (requestedCapacity.minResources().nodes() != nodeCount ? " resolved to " + nodeCount + " nodes" : "") + - " exceeds your quota. Resolve this at https://cloud.vespa.ai/quota"); + if ( ! hasQuota(application, requested.maxResources().nodes())) + throw new IllegalArgumentException(requested + " requested for " + cluster + + ". Max value exceeds your quota. Resolve this at https://cloud.vespa.ai/quota"); + + int groups; + NodeResources resources; + NodeSpec nodeSpec; + if ( requested.type() == NodeType.tenant) { + ClusterResources target = decideTargetResources(application, cluster.id(), requested); + int nodeCount = capacityPolicies.decideSize(target.nodes(), requested, cluster, application); + resources = capacityPolicies.decideNodeResources(target.nodeResources(), requested, cluster); + boolean exclusive = capacityPolicies.decideExclusivity(cluster.isExclusive()); + groups = Math.min(target.groups(), nodeCount); // cannot have more groups than nodes + nodeSpec = NodeSpec.from(nodeCount, resources, exclusive, requested.canFail()); + logIfDownscaled(target.nodes(), nodeCount, cluster, logger); } else { - requestedNodes = NodeSpec.from(requestedCapacity.type()); - effectiveGroups = 1; // type request with multiple groups is not supported + groups = 1; // type request with multiple groups is not supported + resources = requested.minResources().nodeResources(); + nodeSpec = NodeSpec.from(requested.type()); } - - return asSortedHosts(preparer.prepare(application, cluster, requestedNodes, effectiveGroups), resources); + return asSortedHosts(preparer.prepare(application, cluster, nodeSpec, groups), resources); } @Override @@ -138,6 +141,39 @@ public class NodeRepositoryProvisioner implements Provisioner { loadBalancerProvisioner.ifPresent(lbProvisioner -> lbProvisioner.deactivate(application, transaction)); } + /** + * Returns the target cluster resources, a value between the min and max in the requested capacity, + * and updates the application store with the received min and max, + */ + private ClusterResources decideTargetResources(ApplicationId applicationId, ClusterSpec.Id clusterId, Capacity requested) { + try (Mutex lock = nodeRepository.lock(applicationId)) { + Application application = nodeRepository.applications().get(applicationId, true); + application.setClusterLimits(clusterId, requested.minResources(), requested.maxResources(), lock); + return application.cluster(clusterId).targetResources() + .orElse(currentResources(applicationId, clusterId, requested) + .orElse(requested.minResources())); + } + } + + /** Returns the current resources of this cluster, if it'1s already depoyed and inside the requested limits */ + private Optional currentResources(ApplicationId applicationId, + ClusterSpec.Id clusterId, + Capacity requested) { + List nodes = NodeList.copyOf(nodeRepository.getNodes(applicationId, Node.State.active)) + .cluster(clusterId).not().retired().asList(); + if (nodes.size() < 1) return Optional.empty(); + long groups = nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(); + var resources = new ClusterResources(nodes.size(), (int)groups, nodes.get(0).flavor().resources()); + if ( ! resources.isWithin(requested.minResources(), requested.maxResources())) return Optional.empty(); + return Optional.of(resources); + } + + private void logIfDownscaled(int targetNodes, int actualNodes, ClusterSpec cluster, ProvisionLogger logger) { + if (zone.environment().isManuallyDeployed() && actualNodes < targetNodes) + logger.log(Level.INFO, "Requested " + targetNodes + " nodes for " + cluster + + ", downscaling to " + actualNodes + " nodes in " + zone.environment()); + } + private boolean hasQuota(ApplicationId application, int requestedNodes) { if ( ! this.zone.system().isPublic()) return true; // no quota management diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index f88cb839946..76258e86de9 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -493,7 +493,7 @@ public class ProvisioningTest { fail("Expected exception"); } catch (IllegalArgumentException e) { - assertEquals("6 nodes with [vcpu: 1.0, memory: 4.0 Gb, disk 10.0 Gb, bandwidth: 4.0 Gbps] requested for content cluster 'content0' 6.42 exceeds your quota. Resolve this at https://cloud.vespa.ai/quota", + assertEquals("6 nodes with [vcpu: 1.0, memory: 4.0 Gb, disk 10.0 Gb, bandwidth: 4.0 Gbps] requested for content cluster 'content0' 6.42. Max value exceeds your quota. Resolve this at https://cloud.vespa.ai/quota", e.getMessage()); } } @@ -772,10 +772,10 @@ public class ProvisioningTest { allHosts.addAll(content1); Function capacity = count -> Capacity.from(new ClusterResources(count, 1, NodeResources.unspecified), required, true); - int expectedContainer0Size = tester.capacityPolicies().decideSize(capacity.apply(container0Size), containerCluster0, application); - int expectedContainer1Size = tester.capacityPolicies().decideSize(capacity.apply(container1Size), containerCluster1, application); - int expectedContent0Size = tester.capacityPolicies().decideSize(capacity.apply(content0Size), contentCluster0, application); - int expectedContent1Size = tester.capacityPolicies().decideSize(capacity.apply(content1Size), contentCluster1, application); + int expectedContainer0Size = tester.capacityPolicies().decideSize(container0Size, capacity.apply(container0Size), containerCluster0, application); + int expectedContainer1Size = tester.capacityPolicies().decideSize(container1Size, capacity.apply(container1Size), containerCluster1, application); + int expectedContent0Size = tester.capacityPolicies().decideSize(content0Size, capacity.apply(content0Size), contentCluster0, application); + int expectedContent1Size = tester.capacityPolicies().decideSize(content1Size, capacity.apply(content1Size), contentCluster1, application); assertEquals("Hosts in each group cluster is disjunct and the total number of unretired nodes is correct", expectedContainer0Size + expectedContainer1Size + expectedContent0Size + expectedContent1Size, -- cgit v1.2.3 From b18e2939b6632e7bf02cc12dea096dabecb9e754 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Fri, 27 Mar 2020 15:02:03 +0100 Subject: Activate autoscaling --- .../hosted/provision/applications/Application.java | 36 ++++++++++++++---- .../provision/applications/Applications.java | 5 +++ .../hosted/provision/applications/Cluster.java | 12 ++++-- .../autoscale/AllocatableClusterResources.java | 4 ++ .../hosted/provision/autoscale/Autoscaler.java | 3 +- .../maintenance/AutoscalingMaintainer.java | 44 ++++++++++++++++++---- .../maintenance/MaintenanceDeployment.java | 19 ++++++++-- .../provisioning/NodeRepositoryProvisioner.java | 3 +- .../autoscale/AutoscalingIntegrationTest.java | 11 +++++- .../provision/autoscale/AutoscalingTest.java | 41 +++++++++++++------- .../provision/autoscale/AutoscalingTester.java | 9 ++++- 11 files changed, 148 insertions(+), 39 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java index 7dd2dc7be17..e56e426b499 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java @@ -18,20 +18,42 @@ import java.util.Optional; */ public class Application { - private Map clusters = new HashMap<>(); + private final Map clusters; + + public Application() { + this(Map.of()); + } + + private Application(Map clusters) { + this.clusters = Map.copyOf(clusters); + } /** Returns the cluster with the given id or null if none */ public Cluster cluster(ClusterSpec.Id id) { return clusters.get(id); } + public Application with(ClusterSpec.Id id, Cluster cluster) { + Map clusters = new HashMap<>(this.clusters); + clusters.put(id, cluster); + return new Application(clusters); + } + /** - * Sets the min and max resource limits of the given cluster. - * This will create the cluster with these limits if it does not exist. + * Returns an application with the given cluster having the min and max resource limits of the given cluster. * If the cluster has a target which is not inside the new limits, the target is removed. */ - public void setClusterLimits(ClusterSpec.Id id, ClusterResources min, ClusterResources max, Mutex applicationLock) { - Cluster cluster = clusters.computeIfAbsent(id, clusterId -> new Cluster(min, max, Optional.empty())); - if (cluster.targetResources().isPresent() && ! cluster.targetResources().get().isWithin(min, max)) - clusters.put(id, cluster.withoutTarget()); + public Application withClusterLimits(ClusterSpec.Id id, ClusterResources min, ClusterResources max) { + Cluster cluster = clusters.get(id); + return with(id, new Cluster(min, max, cluster == null ? Optional.empty() : cluster.targetResources())); + } + + /** + * Returns an application with the given target for the given cluster, + * if it exists and the target is within the bounds + */ + public Application withClusterTarget(ClusterSpec.Id id, ClusterResources target) { + Cluster cluster = clusters.get(id); + if (cluster == null) return this; + return with(id, cluster.withTarget(target)); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java index 1409857df1a..879fcc5f6cb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.applications; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.transaction.Mutex; import java.util.concurrent.ConcurrentHashMap; @@ -21,4 +22,8 @@ public class Applications { return applications.computeIfAbsent(applicationId, id -> create ? new Application() : null); } + public void set(ApplicationId id, Application application, Mutex applicationLock) { + applications.put(id, application); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index d4f7f10abfd..6ff827ac92b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.applications; import com.yahoo.config.provision.ClusterResources; +import java.util.Objects; import java.util.Optional; /** @@ -18,9 +19,14 @@ public class Cluster { private final Optional targetResources; Cluster(ClusterResources minResources, ClusterResources maxResources, Optional targetResources) { - this.minResources = minResources; - this.maxResources = maxResources; - this.targetResources = targetResources; + this.minResources = Objects.requireNonNull(minResources); + this.maxResources = Objects.requireNonNull(maxResources); + Objects.requireNonNull(targetResources); + + if (targetResources.isPresent() && ! targetResources.get().isWithin(minResources, maxResources)) + this.targetResources = Optional.empty(); + else + this.targetResources = targetResources; } /** Returns the configured minimal resources in this cluster */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index 7a36103f337..f553a4c76a4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -73,6 +73,10 @@ public class AllocatableClusterResources { public int groups() { return groups; } public ClusterSpec.Type clusterType() { return clusterType; } + public ClusterResources toAdvertisedClusterResources() { + return new ClusterResources(nodes, groups, advertisedResources); + } + @Override public String toString() { return "$" + cost() + ": " + realResources(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index dc9699a7a0b..6612525685a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -9,6 +9,7 @@ import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.host.FlavorOverrides; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceLimits; @@ -66,7 +67,7 @@ public class Autoscaler { * @param clusterNodes the list of all the active nodes in a cluster * @return a new suggested allocation for this cluster, or empty if it should not be rescaled at this time */ - public Optional autoscale(List clusterNodes) { + public Optional autoscale(Cluster cluster, List clusterNodes) { if (clusterNodes.stream().anyMatch(node -> node.status().wantToRetire() || node.allocation().get().membership().retired() || node.allocation().get().isRemovable())) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 3f26725da15..064c7db5e60 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -6,8 +6,11 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.NodeResources; +import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Application; +import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; @@ -52,22 +55,49 @@ public class AutoscalingMaintainer extends Maintainer { private void autoscale(ApplicationId application, List applicationNodes) { try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, nodeRepository())) { if ( ! deployment.isValid()) return; // Another config server will consider this application - nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> autoscale(application, clusterId, clusterNodes)); + nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> autoscale(application, clusterId, clusterNodes, deployment)); } } - private void autoscale(ApplicationId application, ClusterSpec.Id clusterId, List clusterNodes) { - Optional target = autoscaler.autoscale(clusterNodes); - if (target.isEmpty()) return; + private void autoscale(ApplicationId applicationId, + ClusterSpec.Id clusterId, + List clusterNodes, + MaintenanceDeployment deployment) { + Application application = nodeRepository().applications().get(applicationId, true); + Cluster cluster = application.cluster(clusterId); + if (cluster == null) return; // no information on limits for this cluster + Optional target = autoscaler.autoscale(cluster, clusterNodes); + if (target.isEmpty()) return; // current resources are fine + if (cluster.minResources().equals(cluster.maxResources())) // autoscaling is deactivated + logAutoscaleSuggestion(target.get(), applicationId, clusterId, clusterNodes); + else + autoscaleTo(target.get(), applicationId, clusterId, application, deployment); + } + + private void autoscaleTo(AllocatableClusterResources target, + ApplicationId applicationId, + ClusterSpec.Id clusterId, + Application application, + MaintenanceDeployment deployment) { + nodeRepository().applications().set(applicationId, + application.withClusterTarget(clusterId, target.toAdvertisedClusterResources()), + deployment.applicationLock().get()); + deployment.activate(); + } + + private void logAutoscaleSuggestion(AllocatableClusterResources target, + ApplicationId application, + ClusterSpec.Id clusterId, + List clusterNodes) { Instant lastLogTime = lastLogged.get(new Pair<>(application, clusterId)); if (lastLogTime != null && lastLogTime.isAfter(nodeRepository().clock().instant().minus(Duration.ofHours(1)))) return; - int currentGroups = (int) clusterNodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(); + int currentGroups = (int)clusterNodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(); ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); - log.info("Autoscale: " + application + " " + clusterType + " " + clusterId + + log.info("Scaling suggestion: " + application + " " + clusterType + " " + clusterId + "\nfrom " + toString(clusterNodes.size(), currentGroups, clusterNodes.get(0).flavor().resources()) + - "\nto " + toString(target.get().nodes(), target.get().groups(), target.get().advertisedResources())); + "\nto " + toString(target.nodes(), target.groups(), target.advertisedResources())); lastLogged.put(new Pair<>(application, clusterId), nodeRepository().clock().instant()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java index 856de2609be..c21c9d68c8a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java @@ -36,10 +36,8 @@ class MaintenanceDeployment implements Closeable { public MaintenanceDeployment(ApplicationId application, Deployer deployer, NodeRepository nodeRepository) { this.application = application; Optional lock = tryLock(application, nodeRepository); - try { deployment = tryDeployment(lock, application, deployer, nodeRepository); - this.lock = lock; lock = Optional.empty(); } finally { @@ -52,6 +50,16 @@ class MaintenanceDeployment implements Closeable { return deployment.isPresent(); } + /** + * Returns the application lock held by this, or empty if it is not held. + * + * @throws IllegalStateException id this is called when closed + */ + public Optional applicationLock() { + if (closed) throw new IllegalStateException(this + "is closed"); + return lock; + } + public boolean prepare() { return doStep(() -> deployment.get().prepare()); } @@ -61,7 +69,7 @@ class MaintenanceDeployment implements Closeable { } private boolean doStep(Runnable action) { - if (closed) throw new IllegalStateException("Deployment of '" + application + "' is closed"); + if (closed) throw new IllegalStateException(this + "' is closed"); if ( ! isValid()) return false; try { action.run(); @@ -101,4 +109,9 @@ class MaintenanceDeployment implements Closeable { closed = true; } + @Override + public String toString() { + return "deployment of " + application; + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 083f8db5aa5..6ae89f6c9ed 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -148,7 +148,8 @@ public class NodeRepositoryProvisioner implements Provisioner { private ClusterResources decideTargetResources(ApplicationId applicationId, ClusterSpec.Id clusterId, Capacity requested) { try (Mutex lock = nodeRepository.lock(applicationId)) { Application application = nodeRepository.applications().get(applicationId, true); - application.setClusterLimits(clusterId, requested.minResources(), requested.maxResources(), lock); + application = application.withClusterLimits(clusterId, requested.minResources(), requested.maxResources()); + nodeRepository.applications().set(applicationId, application, lock); return application.cluster(clusterId).targetResources() .orElse(currentResources(applicationId, clusterId, requested) .orElse(requested.minResources())); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java index d154af4f025..f02acdc1fca 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java @@ -2,12 +2,15 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.test.ManualClock; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.applications.Application; +import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.testutils.OrchestratorMock; import org.junit.Test; @@ -47,7 +50,13 @@ public class AutoscalingIntegrationTest { tester.nodeMetricsDb().gc(tester.clock()); } - var scaledResources = autoscaler.autoscale(tester.nodeRepository().getNodes(application1)); + ClusterResources min = new ClusterResources(2, 1, nodes); + ClusterResources max = new ClusterResources(2, 1, nodes); + + Application application = tester.nodeRepository().applications().get(application1, true).withClusterLimits(cluster1.id(), min, max); + tester.nodeRepository().applications().set(application1, application, tester.nodeRepository().lock(application1)); + var scaledResources = autoscaler.autoscale(application.cluster(cluster1.id()), + tester.nodeRepository().getNodes(application1)); assertTrue(scaledResources.isPresent()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 39259bf44f8..f3b6606a970 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.google.common.collect.Sets; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.Flavor; @@ -31,6 +32,8 @@ public class AutoscalingTest { @Test public void testAutoscalingSingleContentGroup() { NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); AutoscalingTester tester = new AutoscalingTester(resources); ApplicationId application1 = tester.applicationId("application1"); @@ -39,37 +42,39 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, resources); - assertTrue("No measurements -> No change", tester.autoscale(application1).isEmpty()); + assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1); - assertTrue("Too few measurements -> No change", tester.autoscale(application1).isEmpty()); + assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1); AllocatableClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.3, 28.6, 28.6, - tester.autoscale(application1)); + tester.autoscale(application1, cluster1.id(), min, max)); tester.deploy(application1, cluster1, scaledResources); - assertTrue("Cluster in flux -> No further change", tester.autoscale(application1).isEmpty()); + assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); tester.deactivateRetired(application1, cluster1, scaledResources); tester.addMeasurements(Resource.cpu, 0.8f, 1f, 3, application1); assertTrue("Load change is large, but insufficient measurements for new config -> No change", - tester.autoscale(application1).isEmpty()); + tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); tester.addMeasurements(Resource.cpu, 0.19f, 1f, 100, application1); - assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1)); + assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), min, max)); tester.addMeasurements(Resource.cpu, 0.1f, 1f, 120, application1); tester.assertResources("Scaling down since resource usage has gone down significantly", 26, 1, 0.6, 16.0, 16.0, - tester.autoscale(application1)); + tester.autoscale(application1, cluster1.id(), min, max)); } /** We prefer fewer nodes for container clusters as (we assume) they all use the same disk and memory */ @Test public void testAutoscalingSingleContainerGroup() { NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); AutoscalingTester tester = new AutoscalingTester(resources); ApplicationId application1 = tester.applicationId("application1"); @@ -81,7 +86,7 @@ public class AutoscalingTest { tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); AllocatableClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high", 7, 1, 2.6, 80.0, 80.0, - tester.autoscale(application1)); + tester.autoscale(application1, cluster1.id(), min, max)); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -89,12 +94,14 @@ public class AutoscalingTest { tester.addMeasurements(Resource.cpu, 0.1f, 1f, 120, application1); tester.assertResources("Scaling down since cpu usage has gone down", 4, 1, 2.4, 68.6, 68.6, - tester.autoscale(application1)); + tester.autoscale(application1, cluster1.id(), min, max)); } @Test public void testAutoscalingGroupSize1() { NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); AutoscalingTester tester = new AutoscalingTester(resources); ApplicationId application1 = tester.applicationId("application1"); @@ -105,12 +112,14 @@ public class AutoscalingTest { tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high", 7, 7, 2.5, 80.0, 80.0, - tester.autoscale(application1)); + tester.autoscale(application1, cluster1.id(), min, max)); } @Test public void testAutoscalingGroupSize3() { NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); AutoscalingTester tester = new AutoscalingTester(resources); ApplicationId application1 = tester.applicationId("application1"); @@ -121,12 +130,14 @@ public class AutoscalingTest { tester.addMeasurements(Resource.cpu, 0.22f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high", 9, 3, 2.7, 83.3, 83.3, - tester.autoscale(application1)); + tester.autoscale(application1, cluster1.id(), min, max)); } @Test public void testAutoscalingAvoidsIllegalConfigurations() { NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); AutoscalingTester tester = new AutoscalingTester(resources); ApplicationId application1 = tester.applicationId("application1"); @@ -137,11 +148,13 @@ public class AutoscalingTest { tester.addMeasurements(Resource.memory, 0.02f, 1f, 120, application1); tester.assertResources("Scaling down", 6, 1, 3.0, 4.0, 100.0, - tester.autoscale(application1)); + tester.autoscale(application1, cluster1.id(), min, max)); } @Test public void testAutoscalingAws() { + ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); List flavors = new ArrayList<>(); flavors.add(new Flavor("aws-xlarge", new NodeResources(3, 200, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote))); flavors.add(new Flavor("aws-large", new NodeResources(3, 150, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote))); @@ -160,7 +173,7 @@ public class AutoscalingTest { tester.addMeasurements(Resource.memory, 0.9f, 0.6f, 120, application1); AllocatableClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", 8, 1, 3, 83, 34.3, - tester.autoscale(application1)); + tester.autoscale(application1, cluster1.id(), min, max)); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -168,7 +181,7 @@ public class AutoscalingTest { tester.addMeasurements(Resource.memory, 0.3f, 0.6f, 1000, application1); tester.assertResources("Scaling down since resource usage has gone down", 5, 1, 3, 83, 36, - tester.autoscale(application1)); + tester.autoscale(application1, cluster1.id(), min, max)); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index efb97841623..8043d5b2d39 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -20,6 +20,7 @@ import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException; @@ -140,8 +141,12 @@ class AutoscalingTester { } } - public Optional autoscale(ApplicationId application) { - return autoscaler.autoscale(nodeRepository().getNodes(application, Node.State.active)); + public Optional autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId, + ClusterResources min, ClusterResources max) { + Application application = nodeRepository().applications().get(applicationId, true).withClusterLimits(clusterId, min, max); + nodeRepository().applications().set(applicationId, application, nodeRepository().lock(applicationId)); + return autoscaler.autoscale(application.cluster(clusterId), + nodeRepository().getNodes(applicationId, Node.State.active)); } public AllocatableClusterResources assertResources(String message, -- cgit v1.2.3 From 3124be24e2419dbd17ae448b4cd8203e22278fea Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Sat, 28 Mar 2020 13:32:18 +0100 Subject: Respect node resource limits --- .../com/yahoo/config/provision/NodeResources.java | 10 +- .../hosted/provision/applications/Cluster.java | 36 ++++--- .../hosted/provision/autoscale/Autoscaler.java | 8 +- .../vespa/hosted/provision/autoscale/Resource.java | 8 +- .../provision/autoscale/ResourceIterator.java | 63 +++++++++--- .../maintenance/AutoscalingMaintainer.java | 6 +- .../provision/autoscale/AutoscalingTest.java | 89 +++++++++++++++-- .../provision/autoscale/AutoscalingTester.java | 13 +++ .../maintenance/AutoscalingMaintainerTest.java | 110 +++++++++++++++++++++ .../provision/maintenance/MaintenanceTester.java | 3 + .../provision/maintenance/RebalancerTest.java | 16 +-- .../provision/provisioning/ProvisioningTester.java | 29 +++++- 12 files changed, 331 insertions(+), 60 deletions(-) create mode 100644 node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeResources.java b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeResources.java index 6d7fe752e46..5fc05a87a7d 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeResources.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeResources.java @@ -114,26 +114,32 @@ public class NodeResources { public StorageType storageType() { return storageType; } public NodeResources withVcpu(double vcpu) { + if (vcpu == this.vcpu) return this; return new NodeResources(vcpu, memoryGb, diskGb, bandwidthGbps, diskSpeed, storageType); } public NodeResources withMemoryGb(double memoryGb) { + if (memoryGb == this.memoryGb) return this; return new NodeResources(vcpu, memoryGb, diskGb, bandwidthGbps, diskSpeed, storageType); } public NodeResources withDiskGb(double diskGb) { + if (diskGb == this.diskGb) return this; return new NodeResources(vcpu, memoryGb, diskGb, bandwidthGbps, diskSpeed, storageType); } public NodeResources withBandwidthGbps(double bandwidthGbps) { + if (bandwidthGbps == this.bandwidthGbps) return this; return new NodeResources(vcpu, memoryGb, diskGb, bandwidthGbps, diskSpeed, storageType); } - public NodeResources with(DiskSpeed speed) { - return new NodeResources(vcpu, memoryGb, diskGb, bandwidthGbps, speed, storageType); + public NodeResources with(DiskSpeed diskSpeed) { + if (diskSpeed == this.diskSpeed) return this; + return new NodeResources(vcpu, memoryGb, diskGb, bandwidthGbps, diskSpeed, storageType); } public NodeResources with(StorageType storageType) { + if (storageType == this.storageType) return this; return new NodeResources(vcpu, memoryGb, diskGb, bandwidthGbps, diskSpeed, storageType); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index 6ff827ac92b..6ff7f41be8f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.applications; import com.yahoo.config.provision.ClusterResources; +import com.yahoo.config.provision.NodeResources; import java.util.Objects; import java.util.Optional; @@ -15,38 +16,51 @@ import java.util.Optional; */ public class Cluster { - private final ClusterResources minResources, maxResources; - private final Optional targetResources; + private final ClusterResources min, max; + private final Optional target; Cluster(ClusterResources minResources, ClusterResources maxResources, Optional targetResources) { - this.minResources = Objects.requireNonNull(minResources); - this.maxResources = Objects.requireNonNull(maxResources); + this.min = Objects.requireNonNull(minResources); + this.max = Objects.requireNonNull(maxResources); Objects.requireNonNull(targetResources); if (targetResources.isPresent() && ! targetResources.get().isWithin(minResources, maxResources)) - this.targetResources = Optional.empty(); + this.target = Optional.empty(); else - this.targetResources = targetResources; + this.target = targetResources; } /** Returns the configured minimal resources in this cluster */ - public ClusterResources minResources() { return minResources; } + public ClusterResources minResources() { return min; } /** Returns the configured maximal resources in this cluster */ - public ClusterResources maxResources() { return maxResources; } + public ClusterResources maxResources() { return max; } /** * Returns the computed resources (between min and max, inclusive) this cluster should * have allocated at the moment, or empty if the system currently have no opinion on this. */ - public Optional targetResources() { return targetResources; } + public Optional targetResources() { return target; } public Cluster withTarget(ClusterResources target) { - return new Cluster(minResources, maxResources, Optional.of(target)); + return new Cluster(min, max, Optional.of(target)); } public Cluster withoutTarget() { - return new Cluster(minResources, maxResources, Optional.empty()); + return new Cluster(min, max, Optional.empty()); + } + + public NodeResources capAtLimits(NodeResources resources) { + resources = resources.withVcpu(between(min.nodeResources().vcpu(), max.nodeResources().vcpu(), resources.vcpu())); + resources = resources.withMemoryGb(between(min.nodeResources().memoryGb(), max.nodeResources().memoryGb(), resources.memoryGb())); + resources = resources.withDiskGb(between(min.nodeResources().diskGb(), max.nodeResources().diskGb(), resources.diskGb())); + return resources; + } + + private double between(double min, double max, double value) { + value = Math.max(min, value); + value = Math.min(max, value); + return value; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 6612525685a..dc873ce4e69 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -84,7 +84,8 @@ public class Autoscaler { Optional bestAllocation = findBestAllocation(cpuLoad.get(), memoryLoad.get(), diskLoad.get(), - currentAllocation); + currentAllocation, + cluster); if (bestAllocation.isEmpty()) return Optional.empty(); if (closeToIdeal(Resource.cpu, cpuLoad.get()) && @@ -97,9 +98,10 @@ public class Autoscaler { } private Optional findBestAllocation(double cpuLoad, double memoryLoad, double diskLoad, - AllocatableClusterResources currentAllocation) { + AllocatableClusterResources currentAllocation, + Cluster cluster) { Optional bestAllocation = Optional.empty(); - for (ResourceIterator i = new ResourceIterator(cpuLoad, memoryLoad, diskLoad, currentAllocation); i.hasNext(); ) { + for (ResourceIterator i = new ResourceIterator(cpuLoad, memoryLoad, diskLoad, currentAllocation, cluster); i.hasNext(); ) { ClusterResources allocation = i.next(); Optional allocatableResources = toAllocatableResources(allocation, currentAllocation.clusterType()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java index e84544e7e7b..3d5ce8881e0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java @@ -12,7 +12,7 @@ public enum Resource { /** Cpu utilization ratio */ cpu { - String metricName() { return "cpu.util"; } + public String metricName() { return "cpu.util"; } double idealAverageLoad() { return 0.2; } double valueFrom(NodeResources resources) { return resources.vcpu(); } double valueFromMetric(double metricValue) { return metricValue / 100; } // % to ratio @@ -20,7 +20,7 @@ public enum Resource { /** Memory utilization ratio */ memory { - String metricName() { return "mem_total.util"; } + public String metricName() { return "mem_total.util"; } double idealAverageLoad() { return 0.7; } double valueFrom(NodeResources resources) { return resources.memoryGb(); } double valueFromMetric(double metricValue) { return metricValue / 100; } // % to ratio @@ -28,13 +28,13 @@ public enum Resource { /** Disk utilization ratio */ disk { - String metricName() { return "disk.util"; } + public String metricName() { return "disk.util"; } double idealAverageLoad() { return 0.6; } double valueFrom(NodeResources resources) { return resources.diskGb(); } double valueFromMetric(double metricValue) { return metricValue / 100; } // % to ratio }; - abstract String metricName(); + public abstract String metricName(); /** The load we should have of this resource on average, when one node in the cluster is down */ abstract double idealAverageLoad(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java index 82c07345c7f..19909e40441 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.applications.Cluster; /** * Provides iteration over possible cluster resource allocations given a target total load @@ -10,16 +11,19 @@ import com.yahoo.config.provision.NodeResources; */ public class ResourceIterator { - // Configured min and max nodes TODO: These should come from the application package - private static final int minimumNodesPerCluster = 3; // Since this is with redundancy it cannot be lower than 2 - private static final int maximumNodesPerCluster = 150; + // Configured min and max nodes for suggestions for apps which have not activated autoscaling + private static final int minimumNodes = 3; // Since this is with redundancy it cannot be lower than 2 + private static final int maximumNodes = 150; // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component // proportional to document count. We must account for this when comparing configurations with more or fewer nodes. // TODO: Measure this, and only take it into account with queries private static final double fixedCpuCostFraction = 0.1; - // Describes the observed state + // Prescribed state + private final Cluster cluster; + + // Observed state private final AllocatableClusterResources allocation; private final double cpuLoad; private final double memoryLoad; @@ -33,7 +37,9 @@ public class ResourceIterator { // Iterator state private int currentNodes; - public ResourceIterator(double cpuLoad, double memoryLoad, double diskLoad, AllocatableClusterResources currentAllocation) { + public ResourceIterator(double cpuLoad, double memoryLoad, double diskLoad, + AllocatableClusterResources currentAllocation, + Cluster cluster) { this.cpuLoad = cpuLoad; this.memoryLoad = memoryLoad; this.diskLoad = diskLoad; @@ -42,6 +48,8 @@ public class ResourceIterator { groupSize = (int)Math.ceil((double)currentAllocation.nodes() / currentAllocation.groups()); allocation = currentAllocation; + this.cluster = cluster; + // What number of nodes is it effective to add or remove at the time from this cluster? // This is the group size, since we (for now) assume the group size is decided by someone wiser than us // and we decide the number of groups. @@ -49,30 +57,53 @@ public class ResourceIterator { singleGroupMode = currentAllocation.groups() == 1; nodeIncrement = singleGroupMode ? 1 : groupSize; + // Step down to the right starting point currentNodes = currentAllocation.nodes(); - while (currentNodes - nodeIncrement >= minimumNodesPerCluster - && (singleGroupMode || currentNodes - nodeIncrement > groupSize)) // group level redundancy + while (currentNodes - nodeIncrement >= minNodes() + && ( singleGroupMode || currentNodes - nodeIncrement > groupSize)) // group level redundancy currentNodes -= nodeIncrement; } + /** If autoscaling is not enabled (meaning max and min resources are the same), we want to suggest */ + private boolean suggestMode() { + return cluster.minResources().equals(cluster.maxResources()); + } + public ClusterResources next() { - int nodesWithRedundancy = currentNodes - (singleGroupMode ? 1 : groupSize); - ClusterResources next = new ClusterResources(currentNodes, - singleGroupMode ? 1 : currentNodes / groupSize, - resourcesFor(nodesWithRedundancy)); + ClusterResources next = resourcesWith(currentNodes); currentNodes += nodeIncrement; + System.out.println("Candidate: " + next); return next; } public boolean hasNext() { - return currentNodes <= maximumNodesPerCluster; + return currentNodes <= maxNodes(); + } + + private int minNodes() { + if (suggestMode()) return minimumNodes; + if (singleGroupMode) return cluster.minResources().nodes(); + return Math.max(cluster.minResources().nodes(), cluster.minResources().groups() * groupSize ); + } + + private int maxNodes() { + if (suggestMode()) return maximumNodes; + if (singleGroupMode) return cluster.maxResources().nodes(); + return Math.min(cluster.maxResources().nodes(), cluster.maxResources().groups() * groupSize ); + } + + private ClusterResources resourcesWith(int nodes) { + int nodesWithRedundancy = nodes - (singleGroupMode ? 1 : groupSize); + return new ClusterResources(nodes, + singleGroupMode ? 1 : nodes / groupSize, + nodeResourcesWith(nodesWithRedundancy)); } /** * For the observed load this instance is initialized with, returns the resources needed per node to be at * ideal load given a target node count */ - private NodeResources resourcesFor(int nodeCount) { + private NodeResources nodeResourcesWith(int nodeCount) { // Cpu: Scales with cluster size (TODO: Only reads, writes scales with group size) // Memory and disk: Scales with group size @@ -103,7 +134,11 @@ public class ResourceIterator { disk = nodeUsage(Resource.disk, diskLoad) / Resource.disk.idealAverageLoad(); } } - return allocation.realResources().withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk); + + NodeResources resources = allocation.realResources().withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk); + if ( ! suggestMode()) + resources = cluster.capAtLimits(resources); + return resources; } private double clusterUsage(Resource resource, double load) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 064c7db5e60..7073ab5d1a9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -95,7 +95,7 @@ public class AutoscalingMaintainer extends Maintainer { int currentGroups = (int)clusterNodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(); ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); - log.info("Scaling suggestion: " + application + " " + clusterType + " " + clusterId + + log.info("Scaling suggestion for " + application + " " + clusterType + " " + clusterId + ":" + "\nfrom " + toString(clusterNodes.size(), currentGroups, clusterNodes.get(0).flavor().resources()) + "\nto " + toString(target.nodes(), target.groups(), target.advertisedResources())); lastLogged.put(new Pair<>(application, clusterId), nodeRepository().clock().instant()); @@ -103,8 +103,8 @@ public class AutoscalingMaintainer extends Maintainer { private String toString(int nodes, int groups, NodeResources resources) { return String.format(nodes + (groups > 1 ? " (in " + groups + " groups)" : "") + - " * [vcpu: %1$.1f, memory: %2$.1f Gb, disk %3$.1f Gb]" + - " (total: [vcpu: %4$.1f, memory: %5$.1f Gb, disk: %6$.1f Gb])," + + " * [vcpu: %0$.1f, memory: %1$.1f Gb, disk %2$.1f Gb]" + + " (total: [vcpu: %3$.1f, memory: %4$.1f Gb, disk: %5$.1f Gb])", resources.vcpu(), resources.memoryGb(), resources.diskGb(), nodes * resources.vcpu(), nodes * resources.memoryGb(), nodes * resources.diskGb()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index f3b6606a970..0e6d2365490 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -64,8 +64,8 @@ public class AutoscalingTest { assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), min, max)); tester.addMeasurements(Resource.cpu, 0.1f, 1f, 120, application1); - tester.assertResources("Scaling down since resource usage has gone down significantly", - 26, 1, 0.6, 16.0, 16.0, + tester.assertResources("Scaling down to minimum since usage has gone down significantly", + 14, 1, 1.0, 30.8, 30.8, tester.autoscale(application1, cluster1.id(), min, max)); } @@ -98,10 +98,87 @@ public class AutoscalingTest { } @Test - public void testAutoscalingGroupSize1() { + public void testAutoscalingRespectsUpperLimit() { NodeResources resources = new NodeResources(3, 100, 100, 1); ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); - ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); + ClusterResources max = new ClusterResources( 6, 1, new NodeResources(2.4, 78, 79, 1)); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 5, 1, resources); + tester.addMeasurements(Resource.cpu, 0.25f, 120, application1); + tester.addMeasurements(Resource.memory, 0.95f, 120, application1); + tester.addMeasurements(Resource.disk, 0.95f, 120, application1); + tester.assertResources("Scaling up to limit since resource usage is too high", + 6, 1, 2.4, 78.0, 79.0, + tester.autoscale(application1, cluster1.id(), min, max)); + } + + @Test + public void testAutoscalingRespectsLowerLimit() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources( 3, 1, new NodeResources(1.8, 7.4, 8.5, 1)); + ClusterResources max = new ClusterResources( 6, 1, new NodeResources(2.4, 78, 79, 1)); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 5, 1, resources); + tester.addMeasurements(Resource.cpu, 0.05f, 120, application1); + tester.addMeasurements(Resource.memory, 0.05f, 120, application1); + tester.addMeasurements(Resource.disk, 0.05f, 120, application1); + tester.assertResources("Scaling down to limit since resource usage is low", + 3, 1, 1.8, 7.4, 8.5, + tester.autoscale(application1, cluster1.id(), min, max)); + } + + @Test + public void testAutoscalingRespectsGroupLimit() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources( 2, 2, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(18, 6, new NodeResources(100, 1000, 1000, 1)); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 5, 5, resources); + tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.assertResources("Scaling up since resource usage is too high", + 6, 6, 2.5, 80.0, 80.0, + tester.autoscale(application1, cluster1.id(), min, max)); + } + + /** This condition ensures we get recommendation suggestions when deactivated */ + @Test + public void testAutoscalingLimitsAreIgnoredIfMinEqualsMax() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = min; + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 5, 1, resources); + tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.assertResources("Scaling up since resource usage is too high", + 7, 1, 2.6, 80.0, 80.0, + tester.autoscale(application1, cluster1.id(), min, max)); + } + + @Test + public void testAutoscalingGroupSize1() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources( 2, 2, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(20, 20, new NodeResources(100, 1000, 1000, 1)); AutoscalingTester tester = new AutoscalingTester(resources); ApplicationId application1 = tester.applicationId("application1"); @@ -118,8 +195,8 @@ public class AutoscalingTest { @Test public void testAutoscalingGroupSize3() { NodeResources resources = new NodeResources(3, 100, 100, 1); - ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); - ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); + ClusterResources min = new ClusterResources( 3, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(21, 7, new NodeResources(100, 1000, 1000, 1)); AutoscalingTester tester = new AutoscalingTester(resources); ApplicationId application1 = tester.applicationId("application1"); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 8043d5b2d39..ebc4d158ded 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -141,6 +141,19 @@ class AutoscalingTester { } } + public void addMeasurements(Resource resource, float value, int count, ApplicationId applicationId) { + List nodes = nodeRepository().getNodes(applicationId, Node.State.active); + for (int i = 0; i < count; i++) { + clock().advance(Duration.ofMinutes(1)); + for (Node node : nodes) { + db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), + resource.metricName(), + clock().instant().toEpochMilli(), + value * 100))); // the metrics are in % + } + } + } + public Optional autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId, ClusterResources min, ClusterResources max) { Application application = nodeRepository().applications().get(applicationId, true).withClusterLimits(clusterId, min, max); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java new file mode 100644 index 00000000000..413c14d132d --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -0,0 +1,110 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.ClusterResources; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.Flavor; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.RegionName; +import com.yahoo.config.provision.Zone; +import com.yahoo.config.provisioning.FlavorsConfig; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.Resource; +import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; +import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; +import org.junit.Test; + +import java.time.Duration; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertTrue; + +/** + * Tests the autoscaling maintainer integration. + * The specific recommendations of the autoscaler are not tested here. + * + * @author bratseth + */ +public class AutoscalingMaintainerTest { + + @Test + public void testAutoscalingMaintainer() { + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east3"))).flavorsConfig(flavorsConfig()).build(); + + ApplicationId app1 = tester.makeApplicationId("app1"); + ClusterSpec cluster1 = tester.clusterSpec(); + + ApplicationId app2 = tester.makeApplicationId("app2"); + ClusterSpec cluster2 = tester.clusterSpec(); + + NodeResources lowResources = new NodeResources(4, 4, 10, 0.1); + NodeResources highResources = new NodeResources(6.5, 9, 20, 0.1); + + Map apps = Map.of( + app1, new MockDeployer.ApplicationContext(app1, cluster1, Capacity.from(new ClusterResources(2, 1, lowResources))), + app2, new MockDeployer.ApplicationContext(app2, cluster2, Capacity.from(new ClusterResources(2, 1, highResources)))); + MockDeployer deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps); + + NodeMetricsDb nodeMetricsDb = new NodeMetricsDb(); + AutoscalingMaintainer maintainer = new AutoscalingMaintainer(tester.nodeRepository(), + tester.identityHostResourcesCalculator(), + nodeMetricsDb, + deployer, + Duration.ofMinutes(1)); + maintainer.maintain(); // noop + assertTrue(deployer.lastDeployTime(app1).isEmpty()); + assertTrue(deployer.lastDeployTime(app2).isEmpty()); + + tester.makeReadyNodes(20, "flt", NodeType.host, 8); + tester.deployZoneApp(); + + tester.deploy(app1, cluster1, Capacity.from(new ClusterResources(5, 1, lowResources), false, true)); + tester.deploy(app2, cluster2, Capacity.from(new ClusterResources(5, 1, lowResources), + new ClusterResources(10, 1, highResources), false, true)); + + maintainer.maintain(); // noop + assertTrue(deployer.lastDeployTime(app1).isEmpty()); + assertTrue(deployer.lastDeployTime(app2).isEmpty()); + + addMeasurements(Resource.cpu, 0.9f, 500, app1, tester.nodeRepository(), nodeMetricsDb); + addMeasurements(Resource.memory, 0.9f, 500, app1, tester.nodeRepository(), nodeMetricsDb); + addMeasurements(Resource.disk, 0.9f, 500, app1, tester.nodeRepository(), nodeMetricsDb); + addMeasurements(Resource.cpu, 0.9f, 500, app2, tester.nodeRepository(), nodeMetricsDb); + addMeasurements(Resource.memory, 0.9f, 500, app2, tester.nodeRepository(), nodeMetricsDb); + addMeasurements(Resource.disk, 0.9f, 500, app2, tester.nodeRepository(), nodeMetricsDb); + + maintainer.maintain(); + assertTrue(deployer.lastDeployTime(app1).isEmpty()); // since autoscaling is off + assertTrue(deployer.lastDeployTime(app2).isPresent()); + } + + public void addMeasurements(Resource resource, float value, int count, ApplicationId applicationId, + NodeRepository nodeRepository, NodeMetricsDb db) { + List nodes = nodeRepository.getNodes(applicationId, Node.State.active); + for (int i = 0; i < count; i++) { + for (Node node : nodes) + db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), + resource.metricName(), + nodeRepository.clock().instant().toEpochMilli(), + value * 100))); // the metrics are in % + } + } + + private FlavorsConfig flavorsConfig() { + FlavorConfigBuilder b = new FlavorConfigBuilder(); + b.addFlavor("flt", 30, 30, 40, 3, Flavor.Type.BARE_METAL); + b.addFlavor("cpu", 40, 20, 40, 3, Flavor.Type.BARE_METAL); + b.addFlavor("mem", 20, 40, 40, 3, Flavor.Type.BARE_METAL); + return b.build(); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceTester.java index 4344016c6fe..664809dc3ab 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceTester.java @@ -3,7 +3,9 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeFlavors; +import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.Zone; @@ -14,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; +import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver; import java.time.Instant; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java index 8c9d5cca54b..387f614c5eb 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java @@ -55,7 +55,7 @@ public class RebalancerTest { Rebalancer rebalancer = new Rebalancer(deployer, tester.nodeRepository(), - new IdentityHostResourcesCalculator(), + tester.identityHostResourcesCalculator(), Optional.empty(), metric, tester.clock(), @@ -149,18 +149,4 @@ public class RebalancerTest { return b.build(); } - private static class IdentityHostResourcesCalculator implements HostResourcesCalculator { - - @Override - public NodeResources realResourcesOf(Node node) { - return node.flavor().resources(); - } - - @Override - public NodeResources advertisedResourcesOf(Flavor flavor) { - return flavor.resources(); - } - - } - } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index 3e7104380a0..a8df47aab1a 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -233,6 +233,10 @@ public class ProvisioningTester { InstanceName.from(UUID.randomUUID().toString())); } + public ApplicationId makeApplicationId(String applicationName) { + return ApplicationId.from("tenant", applicationName, "default"); + } + public List makeReadyNodes(int n, String flavor) { return makeReadyNodes(n, flavor, NodeType.tenant); } @@ -418,12 +422,15 @@ public class ProvisioningTester { } public List deploy(ApplicationId application, Capacity capacity) { - List prepared = prepare(application, clusterSpec(), capacity); + return deploy(application, clusterSpec(), capacity); + } + + public List deploy(ApplicationId application, ClusterSpec cluster, Capacity capacity) { + List prepared = prepare(application, cluster, capacity); activate(application, Set.copyOf(prepared)); return getNodes(application, Node.State.active).asList(); } - /** Returns the hosts from the input list which are not retired */ public List nonRetired(Collection hosts) { return hosts.stream().filter(host -> ! host.membership().get().retired()).collect(Collectors.toList()); @@ -522,4 +529,22 @@ public class ProvisioningTester { @Override public void log(Level level, String message) { } } + public IdentityHostResourcesCalculator identityHostResourcesCalculator() { + return new IdentityHostResourcesCalculator(); + } + + private static class IdentityHostResourcesCalculator implements HostResourcesCalculator { + + @Override + public NodeResources realResourcesOf(Node node) { + return node.flavor().resources(); + } + + @Override + public NodeResources advertisedResourcesOf(Flavor flavor) { + return flavor.resources(); + } + + } + } -- cgit v1.2.3 From b59e13352d2f6c5c9a1f70a039e6e95bf7c246f4 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Sat, 28 Mar 2020 14:29:29 +0100 Subject: Prefer the best fulfilment over lowest cost --- .../autoscale/AllocatableClusterResources.java | 34 +++++++++++++++++++--- .../hosted/provision/autoscale/Autoscaler.java | 23 +++++++++++---- .../provision/autoscale/ResourceIterator.java | 6 +--- .../provision/autoscale/AutoscalingTest.java | 14 ++++----- 4 files changed, 55 insertions(+), 22 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index f553a4c76a4..1c3ea55163a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -26,26 +26,32 @@ public class AllocatableClusterResources { private final ClusterSpec.Type clusterType; + private final double fulfilment; + public AllocatableClusterResources(List nodes, HostResourcesCalculator calculator) { this.advertisedResources = nodes.get(0).flavor().resources(); this.realResources = calculator.realResourcesOf(nodes.get(0)); this.nodes = nodes.size(); this.groups = (int)nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(); this.clusterType = nodes.get(0).allocation().get().membership().cluster().type(); + this.fulfilment = 1; } public AllocatableClusterResources(ClusterResources realResources, NodeResources advertisedResources, + NodeResources idealResources, ClusterSpec.Type clusterType) { this.realResources = realResources.nodeResources(); this.advertisedResources = advertisedResources; this.nodes = realResources.nodes(); this.groups = realResources.groups(); this.clusterType = clusterType; + this.fulfilment = fulfilment(realResources.nodeResources(), idealResources); } public AllocatableClusterResources(ClusterResources realResources, Flavor flavor, + NodeResources idealResources, ClusterSpec.Type clusterType, HostResourcesCalculator calculator) { this.realResources = realResources.nodeResources(); @@ -53,6 +59,7 @@ public class AllocatableClusterResources { this.nodes = realResources.nodes(); this.groups = realResources.groups(); this.clusterType = clusterType; + this.fulfilment = fulfilment(realResources.nodeResources(), idealResources); } /** @@ -67,19 +74,38 @@ public class AllocatableClusterResources { */ public NodeResources advertisedResources() { return advertisedResources; } - public double cost() { return nodes * Autoscaler.costOf(advertisedResources); } + public ClusterResources toAdvertisedClusterResources() { + return new ClusterResources(nodes, groups, advertisedResources); + } public int nodes() { return nodes; } public int groups() { return groups; } public ClusterSpec.Type clusterType() { return clusterType; } - public ClusterResources toAdvertisedClusterResources() { - return new ClusterResources(nodes, groups, advertisedResources); + public double cost() { return nodes * Autoscaler.costOf(advertisedResources); } + + /** + * Returns the fraction measuring how well the real resources fulfils the ideal: 1 means completely fulfiled, + * 0 means we have zero real resources. + * The real may be short of the ideal due to resource limits imposed by the system or application. + */ + public double fulfilment() { return fulfilment; } + + private static double fulfilment(NodeResources realResources, NodeResources idealResources) { + double vcpuFulfilment = Math.min(1, realResources.vcpu() / idealResources.vcpu()); + double memoryGbFulfilment = Math.min(1, realResources.memoryGb() / idealResources.memoryGb()); + double diskGbFulfilment = Math.min(1, realResources.diskGb() / idealResources.diskGb()); + return (vcpuFulfilment + memoryGbFulfilment + diskGbFulfilment) / 3; + } + + public boolean preferableTo(AllocatableClusterResources other) { + if (this.fulfilment > other.fulfilment) return true; // we always want to fulfil as much as possible + return this.cost() < other.cost(); // otherwise, prefer lower cost } @Override public String toString() { - return "$" + cost() + ": " + realResources(); + return "$" + cost() + " (fulfilment " + fulfilment + "): " + realResources(); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index dc873ce4e69..bfaf6874b51 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -102,11 +102,11 @@ public class Autoscaler { Cluster cluster) { Optional bestAllocation = Optional.empty(); for (ResourceIterator i = new ResourceIterator(cpuLoad, memoryLoad, diskLoad, currentAllocation, cluster); i.hasNext(); ) { - ClusterResources allocation = i.next(); - Optional allocatableResources = toAllocatableResources(allocation, - currentAllocation.clusterType()); + Optional allocatableResources = toAllocatableResources(i.next(), + currentAllocation.clusterType(), + cluster); if (allocatableResources.isEmpty()) continue; - if (bestAllocation.isEmpty() || allocatableResources.get().cost() < bestAllocation.get().cost()) + if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) bestAllocation = allocatableResources; } return bestAllocation; @@ -129,16 +129,26 @@ public class Autoscaler { * or empty if none available. */ private Optional toAllocatableResources(ClusterResources resources, - ClusterSpec.Type clusterType) { - NodeResources nodeResources = nodeResourceLimits.enlargeToLegal(resources.nodeResources(), clusterType); + ClusterSpec.Type clusterType, + Cluster cluster) { + System.out.println("Candidate: " + resources); + NodeResources nodeResources = resources.nodeResources(); + if ( ! cluster.minResources().equals(cluster.maxResources())) // enforce application limits unless suggest mode + nodeResources = cluster.capAtLimits(nodeResources); + nodeResources = nodeResourceLimits.enlargeToLegal(nodeResources, clusterType); // enforce system limits + if (allowsHostSharing(nodeRepository.zone().cloud())) { // return the requested resources, or empty if they cannot fit on existing hosts for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) { if (flavor.resources().satisfies(nodeResources)) return Optional.of(new AllocatableClusterResources(resources.with(nodeResources), nodeResources, + resources.nodeResources(), clusterType)); + else + System.out.println(" " + flavor + " with " + flavor.resources() + " does not satisfy " + nodeResources); } + System.out.println(" ... returning empty"); return Optional.empty(); } else { @@ -151,6 +161,7 @@ public class Autoscaler { flavor = flavor.with(FlavorOverrides.ofDisk(nodeResources.diskGb())); var candidate = new AllocatableClusterResources(resources.with(flavor.resources()), flavor, + resources.nodeResources(), clusterType, resourcesCalculator); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java index 19909e40441..bc14ca1779c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java @@ -72,7 +72,6 @@ public class ResourceIterator { public ClusterResources next() { ClusterResources next = resourcesWith(currentNodes); currentNodes += nodeIncrement; - System.out.println("Candidate: " + next); return next; } @@ -135,10 +134,7 @@ public class ResourceIterator { } } - NodeResources resources = allocation.realResources().withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk); - if ( ! suggestMode()) - resources = cluster.capAtLimits(resources); - return resources; + return allocation.realResources().withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk); } private double clusterUsage(Resource resource, double load) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 0e6d2365490..3bb0676da4f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -120,7 +120,7 @@ public class AutoscalingTest { @Test public void testAutoscalingRespectsLowerLimit() { NodeResources resources = new NodeResources(3, 100, 100, 1); - ClusterResources min = new ClusterResources( 3, 1, new NodeResources(1.8, 7.4, 8.5, 1)); + ClusterResources min = new ClusterResources( 4, 1, new NodeResources(1.8, 7.4, 8.5, 1)); ClusterResources max = new ClusterResources( 6, 1, new NodeResources(2.4, 78, 79, 1)); AutoscalingTester tester = new AutoscalingTester(resources); @@ -133,25 +133,25 @@ public class AutoscalingTest { tester.addMeasurements(Resource.memory, 0.05f, 120, application1); tester.addMeasurements(Resource.disk, 0.05f, 120, application1); tester.assertResources("Scaling down to limit since resource usage is low", - 3, 1, 1.8, 7.4, 8.5, + 4, 1, 1.8, 7.4, 8.5, tester.autoscale(application1, cluster1.id(), min, max)); } @Test public void testAutoscalingRespectsGroupLimit() { - NodeResources resources = new NodeResources(3, 100, 100, 1); + NodeResources hostResources = new NodeResources(30.0, 100, 100, 1); ClusterResources min = new ClusterResources( 2, 2, new NodeResources(1, 1, 1, 1)); ClusterResources max = new ClusterResources(18, 6, new NodeResources(100, 1000, 1000, 1)); - AutoscalingTester tester = new AutoscalingTester(resources); + AutoscalingTester tester = new AutoscalingTester(hostResources); ApplicationId application1 = tester.applicationId("application1"); ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); // deploy - tester.deploy(application1, cluster1, 5, 5, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.deploy(application1, cluster1, 5, 5, new NodeResources(3.0, 10, 10, 1)); + tester.addMeasurements(Resource.cpu, 0.3f, 1f, 240, application1); tester.assertResources("Scaling up since resource usage is too high", - 6, 6, 2.5, 80.0, 80.0, + 6, 6, 3.6, 8.0, 8.0, tester.autoscale(application1, cluster1.id(), min, max)); } -- cgit v1.2.3 From 8243b7a6c219e0bb28f15d112b02834a68977a89 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Sat, 28 Mar 2020 14:35:17 +0100 Subject: Log in both cases, remove printlns --- .../hosted/provision/autoscale/Autoscaler.java | 4 ---- .../maintenance/AutoscalingMaintainer.java | 21 ++++++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index bfaf6874b51..1040ffdf0e4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -131,7 +131,6 @@ public class Autoscaler { private Optional toAllocatableResources(ClusterResources resources, ClusterSpec.Type clusterType, Cluster cluster) { - System.out.println("Candidate: " + resources); NodeResources nodeResources = resources.nodeResources(); if ( ! cluster.minResources().equals(cluster.maxResources())) // enforce application limits unless suggest mode nodeResources = cluster.capAtLimits(nodeResources); @@ -145,10 +144,7 @@ public class Autoscaler { nodeResources, resources.nodeResources(), clusterType)); - else - System.out.println(" " + flavor + " with " + flavor.resources() + " does not satisfy " + nodeResources); } - System.out.println(" ... returning empty"); return Optional.empty(); } else { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 7073ab5d1a9..abfe65408b6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -6,7 +6,6 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.NodeResources; -import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; @@ -69,10 +68,13 @@ public class AutoscalingMaintainer extends Maintainer { Optional target = autoscaler.autoscale(cluster, clusterNodes); if (target.isEmpty()) return; // current resources are fine - if (cluster.minResources().equals(cluster.maxResources())) // autoscaling is deactivated - logAutoscaleSuggestion(target.get(), applicationId, clusterId, clusterNodes); - else + if (cluster.minResources().equals(cluster.maxResources())) { // autoscaling is deactivated + logAutoscaling("Scaling suggestion for ", target.get(), applicationId, clusterId, clusterNodes); + } + else { + logAutoscaling("Autoscaling ", target.get(), applicationId, clusterId, clusterNodes); autoscaleTo(target.get(), applicationId, clusterId, application, deployment); + } } private void autoscaleTo(AllocatableClusterResources target, @@ -86,16 +88,17 @@ public class AutoscalingMaintainer extends Maintainer { deployment.activate(); } - private void logAutoscaleSuggestion(AllocatableClusterResources target, - ApplicationId application, - ClusterSpec.Id clusterId, - List clusterNodes) { + private void logAutoscaling(String prefix, + AllocatableClusterResources target, + ApplicationId application, + ClusterSpec.Id clusterId, + List clusterNodes) { Instant lastLogTime = lastLogged.get(new Pair<>(application, clusterId)); if (lastLogTime != null && lastLogTime.isAfter(nodeRepository().clock().instant().minus(Duration.ofHours(1)))) return; int currentGroups = (int)clusterNodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(); ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); - log.info("Scaling suggestion for " + application + " " + clusterType + " " + clusterId + ":" + + log.info(prefix + application + " " + clusterType + " " + clusterId + ":" + "\nfrom " + toString(clusterNodes.size(), currentGroups, clusterNodes.get(0).flavor().resources()) + "\nto " + toString(target.nodes(), target.groups(), target.advertisedResources())); lastLogged.put(new Pair<>(application, clusterId), nodeRepository().clock().instant()); -- cgit v1.2.3 From a1e11e67da2f5f3df7da1248b58b6dc983c4d0b2 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Sat, 28 Mar 2020 16:45:48 +0100 Subject: Update node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java Co-Authored-By: Valerij Fredriksen --- .../vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 6ae89f6c9ed..7207b5a65d1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -156,7 +156,7 @@ public class NodeRepositoryProvisioner implements Provisioner { } } - /** Returns the current resources of this cluster, if it'1s already depoyed and inside the requested limits */ + /** Returns the current resources of this cluster, if it's already deployed and inside the requested limits */ private Optional currentResources(ApplicationId applicationId, ClusterSpec.Id clusterId, Capacity requested) { -- cgit v1.2.3 From 12b4ab36a248541156f80e66a66c077811507379 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Sat, 28 Mar 2020 16:46:04 +0100 Subject: Update node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java Co-Authored-By: Valerij Fredriksen --- .../vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 7207b5a65d1..d03aa0cac91 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -151,7 +151,7 @@ public class NodeRepositoryProvisioner implements Provisioner { application = application.withClusterLimits(clusterId, requested.minResources(), requested.maxResources()); nodeRepository.applications().set(applicationId, application, lock); return application.cluster(clusterId).targetResources() - .orElse(currentResources(applicationId, clusterId, requested) + .orElseGet(() -> currentResources(applicationId, clusterId, requested) .orElse(requested.minResources())); } } -- cgit v1.2.3 From 0fc3a51689475497b609524459c01152a11ea443 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Sat, 28 Mar 2020 16:46:16 +0100 Subject: Update node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java Co-Authored-By: Valerij Fredriksen --- .../yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java index c21c9d68c8a..d9e06f87db7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java @@ -56,7 +56,7 @@ class MaintenanceDeployment implements Closeable { * @throws IllegalStateException id this is called when closed */ public Optional applicationLock() { - if (closed) throw new IllegalStateException(this + "is closed"); + if (closed) throw new IllegalStateException(this + " is closed"); return lock; } -- cgit v1.2.3