diff options
author | Jon Bratseth <bratseth@verizonmedia.com> | 2020-02-26 16:32:06 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@verizonmedia.com> | 2020-02-26 16:32:06 +0100 |
commit | 5168d102ca8c5d82c6f93753acc01cd2668d46a5 (patch) | |
tree | 8bef310c37d1508c5faecba3b4cf81d5d986e748 | |
parent | 2aa1ee8401290bfce3b97409e8fc634b8386f247 (diff) |
Revert "Merge pull request #12351 from vespa-engine/revert-11875-bratseth/node-metrics"
This reverts commit 37f6c5b31cb2809a54c34dc3a4e51307f3320fbd, reversing
changes made to 76221d0b9b7bda577ac61ce8a79c7b6ee3a8dbb4.
53 files changed, 1659 insertions, 71 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java index 4fe6c3a96f2..30ce142d503 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java @@ -91,7 +91,7 @@ public class SDField extends Field implements TypedKey, FieldOperationContainer, private NormalizeLevel normalizing = new NormalizeLevel(); /** Extra query commands of this field */ - private List<String> queryCommands=new java.util.ArrayList<>(0); + private List<String> queryCommands = new java.util.ArrayList<>(0); /** Summary fields defined in this field */ private Map<String, SummaryField> summaryFields = new java.util.LinkedHashMap<>(0); @@ -749,20 +749,11 @@ public class SDField extends Field implements TypedKey, FieldOperationContainer, return queryCommands.contains(name); } - /** - * A list of query commands - * - * @return a list of strings with query commands. - */ + /** Returns a list of query commands */ @Override - public List<String> getQueryCommands() { - return queryCommands; - } + public List<String> getQueryCommands() { return queryCommands; } - /** - * The document that this field was declared in, or null - * - */ + /** Returns the document that this field was declared in, or null */ private SDDocumentType getOwnerDocType() { return ownerDocType; } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingRewriteOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingRewriteOperation.java index a0d47d7fa81..0a29fae04bf 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingRewriteOperation.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingRewriteOperation.java @@ -4,9 +4,10 @@ package com.yahoo.searchdefinition.fieldoperation; import com.yahoo.searchdefinition.document.SDField; /** - * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a> + * @author Einar M R Rosenvinge */ public class IndexingRewriteOperation implements FieldOperation { - public void apply(SDField field) { - } + + public void apply(SDField field) { } + } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java index 9c752f3aa0d..a8fbcf50b02 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java @@ -17,14 +17,15 @@ import static java.util.Collections.unmodifiableList; */ @Immutable public class MetricsConsumer { + private final String id; private final MetricSet metricSet; private final List<CloudWatch> cloudWatches = new ArrayList<>(); /** - * @param id The consumer - * @param metricSet The metrics for this consumer + * @param id the consumer + * @param metricSet the metrics for this consumer */ public MetricsConsumer(String id, MetricSet metricSet) { this.id = Objects.requireNonNull(id, "A consumer must have a non-null id.");; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java index 58b77ee1297..c05cad89852 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java @@ -9,6 +9,7 @@ import java.util.Set; * @author gjoranv */ public class SystemMetrics { + public static final String CPU_UTIL = "cpu.util"; public static final String CPU_SYS_UTIL = "cpu.sys.util"; public static final String CPU_THROTTLED_TIME = "cpu.throttled_time.rate"; diff --git a/config-provisioning/abi-spec.json b/config-provisioning/abi-spec.json index f2ae997a164..9a091f1161c 100644 --- a/config-provisioning/abi-spec.json +++ b/config-provisioning/abi-spec.json @@ -391,6 +391,7 @@ "methods": [ "public void <init>(com.yahoo.config.provisioning.FlavorsConfig$Flavor)", "public void <init>(com.yahoo.config.provision.NodeResources)", + "public void <init>(java.lang.String, com.yahoo.config.provision.NodeResources)", "public com.yahoo.config.provision.Flavor with(com.yahoo.config.provision.host.FlavorOverrides)", "public com.yahoo.config.provision.Flavor with(com.yahoo.config.provision.NodeResources)", "public java.lang.String name()", diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java index 16369d82f9f..5aed5d8e2e7 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java @@ -158,7 +158,6 @@ public final class ClusterSpec { } /** Identifier of a group within a cluster */ - @SuppressWarnings("deprecation") public static final class Group { private final int index; diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/Flavor.java b/config-provisioning/src/main/java/com/yahoo/config/provision/Flavor.java index 2711406c216..d11d7137226 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/Flavor.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/Flavor.java @@ -49,6 +49,11 @@ public class Flavor { this(resources.toString(), resources, Optional.empty(), Type.DOCKER_CONTAINER, false, 0, resources.vcpu()); } + /** Creates a *host* flavor for testing */ + public Flavor(String name, NodeResources resources) { + this(name, resources, Optional.empty(), Flavor.Type.VIRTUAL_MACHINE, true, 0, resources.vcpu()); + } + private Flavor(String name, NodeResources resources, Optional<FlavorOverrides> flavorOverrides, diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/HostName.java b/config-provisioning/src/main/java/com/yahoo/config/provision/HostName.java index 510122c2342..25c42884295 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/HostName.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/HostName.java @@ -4,7 +4,7 @@ package com.yahoo.config.provision; import java.util.Objects; /** - * Represents a host name + * A host name * * @author mortent */ @@ -18,12 +18,7 @@ public class HostName implements Comparable<HostName> { public String value() { return name; } - /** - * Create a {@link HostName} with a given name. - * - * @param name Name - * @return instance of {@link HostName}. - */ + /** Create a {@link HostName} with a given name */ public static HostName from(String name) { return new HostName(name); } diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java index a9f031cae70..eb462c86f4f 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java @@ -14,7 +14,7 @@ import java.util.Optional; import java.util.stream.Collectors; /** - * All the flavors *configured* in this zone (i.e this should be called HostFlavors). + * All the flavors configured in this zone (i.e this should be called HostFlavors). * * @author bratseth */ diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java index 76eef33d6c0..aa3d6a2c0f8 100644 --- a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java +++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java @@ -48,7 +48,6 @@ public class IndexFacts { static final String unionName = "unionOfAllKnown"; /** A search definition which contains the union of all settings. */ - @SuppressWarnings("deprecation") private SearchDefinition unionSearchDefinition = new SearchDefinition(unionName); private boolean frozen; diff --git a/container-search/src/main/java/com/yahoo/search/Result.java b/container-search/src/main/java/com/yahoo/search/Result.java index 4080b09f40b..ab48d5797b2 100644 --- a/container-search/src/main/java/com/yahoo/search/Result.java +++ b/container-search/src/main/java/com/yahoo/search/Result.java @@ -89,7 +89,6 @@ public final class Result extends com.yahoo.processing.Response implements Clone * with a result. It should <b>always</b> be called when adding * hits from a result, but there is no constraints on the order of the calls. */ - @SuppressWarnings("deprecation") public void mergeWith(Result result) { totalHitCount += result.getTotalHitCount(); deepHitCount += result.getDeepHitCount(); diff --git a/container-search/src/main/java/com/yahoo/search/federation/FederationResult.java b/container-search/src/main/java/com/yahoo/search/federation/FederationResult.java index 5f1cfccf549..6243dc694c2 100644 --- a/container-search/src/main/java/com/yahoo/search/federation/FederationResult.java +++ b/container-search/src/main/java/com/yahoo/search/federation/FederationResult.java @@ -39,8 +39,8 @@ class FederationResult { } /** - * Wait on each target for that targets timeout - * On the worst case this is the same as waiting for the max target timeout, + * Wait on each target for that targets timeout. + * In the worst case this is the same as waiting for the max target timeout, * in the average case it may be much better because lower timeout sources do not get to * drive the timeout above their own timeout value. * When this completes, results can be accessed from the TargetResults with no blocking diff --git a/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java index 421544b5b49..60c5d42c531 100644 --- a/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java @@ -354,7 +354,7 @@ public class FederationSearcher extends ForkingSearcher { } private void warnIfUnresolvedSearchChains(List<UnresolvedSearchChainException> missingTargets, - HitGroup errorHitGroup) { + HitGroup errorHitGroup) { if (!missingTargets.isEmpty()) { errorHitGroup.addError(missingSearchChainsErrorMessage(missingTargets)); } @@ -492,9 +492,9 @@ public class FederationSearcher extends ForkingSearcher { * TODO This is probably a dirty hack for bug 4711376. There are probably better ways. * But I will leave that to trd-processing@ * - * @param group The merging hitgroup to be updated if necessary - * @param orderer The per provider hit orderer. - * @return The hitorderer chosen + * @param group the merging hitgroup to be updated if necessary + * @param orderer the per provider hit orderer + * @return he hitorderer chosen */ private HitOrderer dirtyCopyIfModifiedOrderer(HitGroup group, HitOrderer orderer) { if (orderer != null) { diff --git a/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java b/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java index 6fb6e4f0860..0565b1cff09 100644 --- a/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java +++ b/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java @@ -39,6 +39,7 @@ public class Defaults { vespaPortConfigServerHttp = vespaPortConfigServerRpc + 1; vespaPortConfigProxyRpc = findConfigProxyPort(vespaPortBase + 90); } + static private String findVespaHome(String defHome) { Optional<String> vespaHomeEnv = Optional.ofNullable(System.getenv("VESPA_HOME")); if ( ! vespaHomeEnv.isPresent() || vespaHomeEnv.get().trim().isEmpty()) { diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java index c9d7618b9d7..c04dca465a1 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java @@ -35,6 +35,7 @@ import static com.google.common.base.Strings.isNullOrEmpty; * @author gjoranv */ public class VespaMetrics { + private static final Logger log = Logger.getLogger(VespaMetrics.class.getPackage().getName()); public static final ConsumerId VESPA_CONSUMER_ID = toConsumerId("Vespa"); diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/ValuesFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/ValuesFetcher.java index ae0ef2fa57a..51bdae1aab3 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/ValuesFetcher.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/ValuesFetcher.java @@ -21,6 +21,7 @@ import static ai.vespa.metricsproxy.metric.model.ConsumerId.toConsumerId; * @author gjoranv */ public class ValuesFetcher { + private static final Logger log = Logger.getLogger(ValuesFetcher.class.getName()); public static final ConsumerId DEFAULT_PUBLIC_CONSUMER_ID = toConsumerId("default"); diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/ApplicationMetricsRetriever.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/ApplicationMetricsRetriever.java index c51970ce3ae..9ddd7885fcb 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/ApplicationMetricsRetriever.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/ApplicationMetricsRetriever.java @@ -33,6 +33,7 @@ import static java.util.stream.Collectors.toMap; * @author gjoranv */ public class ApplicationMetricsRetriever extends AbstractComponent { + private static final Logger log = Logger.getLogger(ApplicationMetricsRetriever.class.getName()); private static final int PARALLELISM = 20; diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java index f2ee326029a..01cf6b19836 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java @@ -32,6 +32,7 @@ import static java.util.Collections.emptyList; * @author gjoranv */ public class NodeMetricsClient { + private static final Logger log = Logger.getLogger(NodeMetricsClient.class.getName()); static final Duration METRICS_TTL = Duration.ofSeconds(30); @@ -80,7 +81,6 @@ public class NodeMetricsClient { return snapshotsRetrieved; } - /** * Convenience class for storing a metrics snapshot with its timestamp. */ diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java index 62de9649bb0..795d1005b10 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java @@ -7,6 +7,7 @@ import java.util.Objects; * @author gjoranv */ public class ConsumerId { + public final String id; private ConsumerId(String id) { this.id = id; } diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java index 481068f0df2..e07a67770bc 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java @@ -2,6 +2,7 @@ package ai.vespa.metricsproxy.service; class CpuJiffies { + private int cpuId; private long jiffies; @@ -37,4 +38,5 @@ class CpuJiffies { public long getTotalJiffies() { return jiffies; } + } diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java index 922a2a15ffd..9068be81b65 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java @@ -20,6 +20,7 @@ import java.util.logging.Logger; * @author bjorncs */ public abstract class HttpMetricFetcher { + private final static Logger log = Logger.getLogger(HttpMetricFetcher.class.getPackage().getName()); public final static String STATE_PATH = "/state/v1/"; // The call to apache will do 3 retries. As long as we check the services in series, we can't have this too high. @@ -31,8 +32,8 @@ public abstract class HttpMetricFetcher { /** - * @param service The service to fetch metrics from - * @param port The port to use + * @param service the service to fetch metrics from + * @param port the port to use */ HttpMetricFetcher(VespaService service, int port, String path) { this.service = service; @@ -86,4 +87,5 @@ public abstract class HttpMetricFetcher { .build()) .build(); } + } diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java index 379e5296bb8..c8fbc83eb59 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java @@ -24,6 +24,7 @@ import java.util.logging.Logger; * @author Eirik Nygaard */ public class SystemPoller { + final private static Logger log = Logger.getLogger(SystemPoller.class.getName()); private final int pollingIntervalSecs; diff --git a/node-repository/src/main/config/node-repository.xml b/node-repository/src/main/config/node-repository.xml index 274be6d572a..186f052a274 100644 --- a/node-repository/src/main/config/node-repository.xml +++ b/node-repository/src/main/config/node-repository.xml @@ -1,6 +1,8 @@ <!-- services.xml snippet for the node repository. Included in config server services.xml if the package is installed--> <!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> <component id="com.yahoo.vespa.hosted.provision.provisioning.InfraDeployerImpl" bundle="node-repository"/> +<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsFetcher" bundle="node-repository"/> +<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb" bundle="node-repository"/> <component id="com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner" bundle="node-repository" /> <component id="NodeRepository" class="com.yahoo.vespa.hosted.provision.NodeRepository" bundle="node-repository"/> <component id="com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance" bundle="node-repository"/> diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index efb2a71264a..f881f888752 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -83,7 +83,7 @@ public final class Node { this.reservedTo = Objects.requireNonNull(reservedTo, "reservedTo cannot be null"); if (state == State.active) - requireNonEmpty(ipConfig.primary(), "An active node must have at least one valid IP address"); + requireNonEmpty(ipConfig.primary(), "Active node " + hostname + " must have at least one valid IP address"); if (parentHostname.isPresent()) { if (!ipConfig.pool().asSet().isEmpty()) throw new IllegalArgumentException("A child node cannot have an IP address pool"); @@ -375,8 +375,6 @@ public final class Node { .deviation(); } - - @Override public boolean equals(Object o) { if (this == o) return true; @@ -436,6 +434,7 @@ public final class Node { public static Set<State> allocatedStates() { return Set.of(reserved, active, inactive, failed, parked); } + } /** The mean and mean deviation (squared difference) of a bunch of numbers */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java new file mode 100644 index 00000000000..71f7dc3701e --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -0,0 +1,185 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Flavor; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; +import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceLimits; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * The autoscaler makes decisions about the flavor and node count that should be allocated to a cluster + * based on observed behavior. + * + * @author bratseth + */ +public class Autoscaler { + + /* + TODO: + - Scale group size + - Have a better idea about whether we have sufficient information to make decisions + - Consider taking spikes/variance into account + - Measure observed regulation lag (startup+redistribution) into account when deciding regulation observation window + - Test AutoscalingMaintainer + - Scale by performance not just load+cost + */ + + private static final int minimumMeasurements = 500; // TODO: Per node instead? Also say something about interval? + + /** What cost difference factor warrants reallocation? */ + private static final double costDifferenceRatioWorthReallocation = 0.1; + /** What difference factor from ideal (for any resource) warrants a change? */ + private static final double idealDivergenceWorthReallocation = 0.1; + + // We only depend on the ratios between these values + private static final double cpuUnitCost = 12.0; + private static final double memoryUnitCost = 1.2; + private static final double diskUnitCost = 0.045; + + private final HostResourcesCalculator hostResourcesCalculator; + private final NodeMetricsDb metricsDb; + private final NodeRepository nodeRepository; + private final NodeResourceLimits nodeResourceLimits; + + public Autoscaler(HostResourcesCalculator hostResourcesCalculator, + NodeMetricsDb metricsDb, + NodeRepository nodeRepository) { + this.hostResourcesCalculator = hostResourcesCalculator; + this.metricsDb = metricsDb; + this.nodeRepository = nodeRepository; + this.nodeResourceLimits = new NodeResourceLimits(nodeRepository.zone()); + } + + public Optional<ClusterResources> autoscale(ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) { + if (clusterNodes.stream().anyMatch(node -> node.status().wantToRetire() || + node.allocation().get().membership().retired() || + node.allocation().get().isRemovable())) + return Optional.empty(); // Don't autoscale clusters that are in flux + ClusterResources currentAllocation = new ClusterResources(clusterNodes); + Optional<Double> cpuLoad = averageLoad(Resource.cpu, cluster, clusterNodes); + Optional<Double> memoryLoad = averageLoad(Resource.memory, cluster, clusterNodes); + Optional<Double> diskLoad = averageLoad(Resource.disk, cluster, clusterNodes); + if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) return Optional.empty(); + + Optional<ClusterResourcesWithCost> bestAllocation = findBestAllocation(cpuLoad.get(), + memoryLoad.get(), + diskLoad.get(), + currentAllocation, + cluster); + if (bestAllocation.isEmpty()) return Optional.empty(); + + if (closeToIdeal(Resource.cpu, cpuLoad.get()) && + closeToIdeal(Resource.memory, memoryLoad.get()) && + closeToIdeal(Resource.disk, diskLoad.get()) && + similarCost(bestAllocation.get().cost(), currentAllocation.nodes() * costOf(currentAllocation.nodeResources()))) + return Optional.empty(); // Avoid small, unnecessary changes + return bestAllocation.map(a -> a.clusterResources()); + } + + private Optional<ClusterResourcesWithCost> findBestAllocation(double cpuLoad, double memoryLoad, double diskLoad, + ClusterResources currentAllocation, ClusterSpec cluster) { + Optional<ClusterResourcesWithCost> bestAllocation = Optional.empty(); + for (ResourceIterator i = new ResourceIterator(cpuLoad, memoryLoad, diskLoad, currentAllocation); i.hasNext(); ) { + ClusterResources allocation = i.next(); + Optional<ClusterResourcesWithCost> allocatableResources = toAllocatableResources(allocation, cluster); + if (allocatableResources.isEmpty()) continue; + if (bestAllocation.isEmpty() || allocatableResources.get().cost() < bestAllocation.get().cost()) + bestAllocation = allocatableResources; + } + return bestAllocation; + } + + private boolean similarCost(double cost1, double cost2) { + return similar(cost1, cost2, costDifferenceRatioWorthReallocation); + } + + private boolean closeToIdeal(Resource resource, double value) { + return similar(resource.idealAverageLoad(), value, idealDivergenceWorthReallocation); + } + + private boolean similar(double r1, double r2, double threshold) { + return Math.abs(r1 - r2) / r1 < threshold; + } + + /** + * Returns the smallest allocatable node resources larger than the given node resources, + * or empty if none available. + */ + private Optional<ClusterResourcesWithCost> toAllocatableResources(ClusterResources resources, ClusterSpec cluster) { + if (allowsHostSharing(nodeRepository.zone().cloud())) { + // Return the requested resources, adjusted to be legal or empty if they cannot fit on existing hosts + NodeResources nodeResources = nodeResourceLimits.enlargeToLegal(resources.nodeResources(), cluster.type()); + for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) + if (flavor.resources().satisfies(nodeResources)) + return Optional.of(new ClusterResourcesWithCost(resources.with(nodeResources), + costOf(nodeResources) * resources.nodes())); + return Optional.empty(); + } + else { + // return the cheapest flavor satisfying the target resources, if any + double bestCost = Double.MAX_VALUE; + Optional<Flavor> bestFlavor = Optional.empty(); + for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) { + if ( ! flavor.resources().satisfies(resources.nodeResources())) continue; + if (bestFlavor.isEmpty() || bestCost > costOf(flavor.resources())) { + bestFlavor = Optional.of(flavor); + bestCost = costOf(flavor); + } + } + if (bestFlavor.isEmpty()) + return Optional.empty(); + else + return Optional.of(new ClusterResourcesWithCost(resources.with(bestFlavor.get().resources()), + bestCost * resources.nodes())); + } + } + + /** + * Returns the average load of this resource in the measurement window, + * or empty if we are not in a position to make decisions from these measurements at this time. + */ + private Optional<Double> averageLoad(Resource resource, ClusterSpec cluster, List<Node> clusterNodes) { + NodeMetricsDb.Window window = metricsDb.getWindow(nodeRepository.clock().instant().minus(scalingWindow(cluster.type())), + resource, + clusterNodes.stream().map(Node::hostname).collect(Collectors.toList())); + + if (window.measurementCount() < minimumMeasurements) return Optional.empty(); + if (window.hostnames() != clusterNodes.size()) return Optional.empty(); // Regulate only when all nodes are measured + + return Optional.of(window.average()); + } + + /** The duration of the window we need to consider to make a scaling decision */ + private Duration scalingWindow(ClusterSpec.Type clusterType) { + if (clusterType.isContent()) return Duration.ofHours(12); // Ideally we should use observed redistribution time + return Duration.ofHours(12); // TODO: Measure much more often to get this down to minutes. And, ideally we should take node startup time into account + } + + // TODO: Put this in zone config instead? + private boolean allowsHostSharing(CloudName cloudName) { + if (cloudName.value().equals("aws")) return false; + return true; + } + + private double costOf(Flavor flavor) { + NodeResources chargedResources = hostResourcesCalculator.availableCapacityOf(flavor.name(), flavor.resources()); + return costOf(chargedResources); + } + + private double costOf(NodeResources resources) { + return resources.vcpu() * cpuUnitCost + + resources.memoryGb() * memoryUnitCost + + resources.diskGb() * diskUnitCost; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java new file mode 100644 index 00000000000..e068b4404d8 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java @@ -0,0 +1,65 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.Node; + +import java.util.List; +import java.util.Objects; + +/** A description of the resources of a cluster */ +public class ClusterResources { + + /** The node count in the cluster */ + private final int nodes; + + /** The number of node groups in the cluster */ + private final int groups; + + /** The resources of each node in the cluster */ + private final NodeResources nodeResources; + + public ClusterResources(List<Node> nodes) { + this(nodes.size(), + (int)nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(), + nodes.get(0).flavor().resources()); + } + + public ClusterResources(int nodes, int groups, NodeResources nodeResources) { + this.nodes = nodes; + this.groups = groups; + this.nodeResources = nodeResources; + } + + /** Returns the total number of allocated nodes (over all groups) */ + public int nodes() { return nodes; } + public int groups() { return groups; } + public NodeResources nodeResources() { return nodeResources; } + + public ClusterResources with(NodeResources resources) { + return new ClusterResources(nodes, groups, resources); + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof ClusterResources)) return false; + + ClusterResources other = (ClusterResources)o; + if (other.nodes != this.nodes) return false; + if (other.groups != this.groups) return false; + if (other.nodeResources != this.nodeResources) return false; + return true; + } + + @Override + public int hashCode() { + return Objects.hash(nodes, groups, nodeResources); + } + + @Override + public String toString() { + return "cluster resources: " + nodes + " * " + nodeResources + (groups > 1 ? " in " + groups + " groups" : ""); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResourcesWithCost.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResourcesWithCost.java new file mode 100644 index 00000000000..55b28ef3ce1 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResourcesWithCost.java @@ -0,0 +1,26 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +/** + * @author bratseth + */ +public class ClusterResourcesWithCost { + + private final ClusterResources resources; + private final double cost; + + public ClusterResourcesWithCost(ClusterResources resources, double cost) { + this.resources = resources; + this.cost = cost; + } + + public ClusterResources clusterResources() { return resources;} + + public double cost() { return cost; } + + @Override + public String toString() { + return "$" + cost + ": " + clusterResources(); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java new file mode 100644 index 00000000000..a599606c314 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java @@ -0,0 +1,74 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.slime.ArrayTraverser; +import com.yahoo.slime.Inspector; +import com.yahoo.slime.ObjectTraverser; +import com.yahoo.slime.Slime; +import com.yahoo.slime.SlimeUtils; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Consumes a response from the metrics/v2 API and populates the fields of this with the resulting values + * + * @author bratseth + */ +public class MetricsResponse { + + private final List<NodeMetrics.MetricValue> metricValues = new ArrayList<>(); + + public MetricsResponse(byte[] response) { + this(SlimeUtils.jsonToSlime(response)); + } + + public MetricsResponse(String response) { + this(SlimeUtils.jsonToSlime(response)); + } + + public List<NodeMetrics.MetricValue> metrics() { return metricValues; } + + private MetricsResponse(Slime response) { + Inspector root = response.get(); + Inspector nodes = root.field("nodes"); + nodes.traverse((ArrayTraverser)(__, node) -> consumeNode(node)); + } + + private void consumeNode(Inspector node) { + String hostname = node.field("hostname").asString(); + consumeNodeMetrics(hostname, node.field("node")); + consumeServiceMetrics(hostname, node.field("services")); + } + + private void consumeNodeMetrics(String hostname, Inspector node) { + long timestamp = node.field("timestamp").asLong(); + Map<String, Double> values = consumeMetrics(node.field("metrics")); + for (Resource resource : Resource.values()) + addMetricIfPresent(hostname, resource.metricName(), timestamp, values); + } + + private void addMetricIfPresent(String hostname, String metricName, long timestamp, Map<String, Double> values) { + if (values.containsKey(metricName)) + metricValues.add(new NodeMetrics.MetricValue(hostname, metricName, timestamp, values.get(metricName).floatValue())); + } + + private void consumeServiceMetrics(String hostname, Inspector node) { + String name = node.field("name").asString(); + long timestamp = node.field("timestamp").asLong(); + Map<String, Double> values = consumeMetrics(node.field("metrics")); + } + + private Map<String, Double> consumeMetrics(Inspector metrics) { + Map<String, Double> values = new HashMap<>(); + metrics.traverse((ArrayTraverser) (__, item) -> consumeMetricsItem(item, values)); + return values; + } + + private void consumeMetricsItem(Inspector item, Map<String, Double> values) { + item.field("values").traverse((ObjectTraverser)(name, value) -> values.put(name, value.asDouble())); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java new file mode 100644 index 00000000000..97ac1e72be9 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java @@ -0,0 +1,48 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ApplicationId; + +import java.util.Collection; + +/** + * Interface to retrieve metrics on (tenant) nodes. + * + * @author bratseth + */ +public interface NodeMetrics { + + /** + * Fetches metrics for an application. This call may be expensive. + * + * @param application the application to fetch metrics from + */ + Collection<MetricValue> fetchMetrics(ApplicationId application); + + final class MetricValue { + + private final String hostname; + private final String name; + private long timestamp; + private final float value; + + public MetricValue(String hostname, String name, long timestamp, float value) { + this.hostname = hostname; + this.name = name; + this.timestamp = timestamp; + this.value = value; + } + + public String hostname() { return hostname; } + public String name() { return name; } + public long timestamp() { return timestamp; } + public float value() { return value; } + + @Override + public String toString() { + return "metric value " + name + ": " + value + " at " + timestamp + " for " + hostname; + } + + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java new file mode 100644 index 00000000000..14a35e3efbc --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java @@ -0,0 +1,169 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * An in-memory time-series "database" of node metrics. + * Thread model: One writer, many readers. + * + * @author bratseth + */ +public class NodeMetricsDb { + + private static final Duration dbWindow = Duration.ofHours(24); + + /** Measurements by key. Each list of measurements is sorted by increasing timestamp */ + private Map<MeasurementKey, List<Measurement>> db = new HashMap<>(); + + /** Lock all access for now since we modify lists inside a map */ + private final Object lock = new Object(); + + /** Add a measurement to this */ + public void add(Collection<NodeMetrics.MetricValue> metricValues) { + synchronized (lock) { + for (var value : metricValues) { + List<Measurement> measurements = db.computeIfAbsent(new MeasurementKey(value.hostname(), + Resource.fromMetric(value.name())), + (__) -> new ArrayList<>()); + measurements.add(new Measurement(value.timestamp(), value.value())); + } + } + } + + /** Must be called intermittently (as long as add is called) to gc old measurements */ + public void gc(Clock clock) { + synchronized (lock) { + // TODO: We may need to do something more complicated to avoid spending too much memory to + // lower the measurement interval (see NodeRepositoryMaintenance) + // Each measurement is Object + long + float = 16 + 8 + 4 = 28 bytes + // 24 hours with 1k nodes and 3 resources and 1 measurement/sec is about 10Gb + + long oldestTimestamp = clock.instant().minus(dbWindow).toEpochMilli(); + for (Iterator<List<Measurement>> i = db.values().iterator(); i.hasNext(); ) { + List<Measurement> measurements = i.next(); + + while (!measurements.isEmpty() && measurements.get(0).timestamp < oldestTimestamp) + measurements.remove(0); + + if (measurements.isEmpty()) + i.remove(); + } + } + } + + /** Returns a window within which we can ask for specific information from this db */ + public Window getWindow(Instant startTime, Resource resource, List<String> hostnames) { + return new Window(startTime, resource, hostnames); + } + + public class Window { + + private final long startTime; + private List<MeasurementKey> keys; + + private Window(Instant startTime, Resource resource, List<String> hostnames) { + this.startTime = startTime.toEpochMilli(); + keys = hostnames.stream().map(hostname -> new MeasurementKey(hostname, resource)).collect(Collectors.toList()); + } + + public int measurementCount() { + synchronized (lock) { + return (int) keys.stream() + .flatMap(key -> db.getOrDefault(key, List.of()).stream()) + .filter(measurement -> measurement.timestamp >= startTime) + .count(); + } + } + + /** Returns the count of hostnames which have measurements in this window */ + public int hostnames() { + synchronized (lock) { + int count = 0; + for (MeasurementKey key : keys) { + List<Measurement> measurements = db.get(key); + if (measurements == null || measurements.isEmpty()) continue; + + if (measurements.get(measurements.size() - 1).timestamp >= startTime) + count++; + } + return count; + } + } + + public double average() { + synchronized (lock) { + double sum = 0; + int count = 0; + for (MeasurementKey key : keys) { + List<Measurement> measurements = db.get(key); + if (measurements == null) continue; + + int index = measurements.size() - 1; + while (index >= 0 && measurements.get(index).timestamp >= startTime) { + sum += measurements.get(index).value; + count++; + + index--; + } + } + return sum / count; + } + } + + } + + private static class MeasurementKey { + + private final String hostname; + private final Resource resource; + + public MeasurementKey(String hostname, Resource resource) { + this.hostname = hostname; + this.resource = resource; + } + + @Override + public int hashCode() { + return Objects.hash(hostname, resource); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if ( ! (o instanceof MeasurementKey)) return false; + MeasurementKey other = (MeasurementKey)o; + if ( ! this.hostname.equals(other.hostname)) return false; + if ( ! this.resource.equals(other.resource)) return false; + return true; + } + + } + + private static class Measurement { + + /** The time of this measurement in epoch millis */ + private final long timestamp; + + /** The measured value */ + private final float value; + + public Measurement(long timestamp, float value) { + this.timestamp = timestamp; + this.value = value; + } + + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcher.java new file mode 100644 index 00000000000..54d8eac238f --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcher.java @@ -0,0 +1,112 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import ai.vespa.util.http.VespaHttpClientBuilder; +import com.google.inject.Inject; +import com.yahoo.component.AbstractComponent; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.orchestrator.HostNameNotFoundException; +import com.yahoo.vespa.orchestrator.Orchestrator; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.BasicResponseHandler; +import org.apache.http.impl.client.CloseableHttpClient; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collection; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Fetches node metrics over the metrics/v2 API + * + * @author bratseth + */ +public class NodeMetricsFetcher extends AbstractComponent implements NodeMetrics { + + private static final Logger log = Logger.getLogger(NodeMetricsFetcher.class.getName()); + + private static final String apiPath = "/metrics/v2/values"; + + private final NodeRepository nodeRepository; + private final Orchestrator orchestrator; + private final HttpClient httpClient; + + @Inject + public NodeMetricsFetcher(NodeRepository nodeRepository, Orchestrator orchestrator) { + this(nodeRepository, orchestrator, new ApacheHttpClient()); + } + + NodeMetricsFetcher(NodeRepository nodeRepository, Orchestrator orchestrator, HttpClient httpClient) { + this.nodeRepository = nodeRepository; + this.orchestrator = orchestrator; + this.httpClient = httpClient; + } + + @Override + public Collection<MetricValue> fetchMetrics(ApplicationId application) { + Node metricsV2Container = nodeRepository.list() + .owner(application) + .state(Node.State.active) + .container() + .filter(node -> expectedUp(node)) + .asList().get(0); + String url = "http://" + metricsV2Container.hostname() + ":" + 4080 + apiPath + "?consumer=vespa-consumer-metrics"; + String response = httpClient.get(url); + return new MetricsResponse(response).metrics(); + } + + @Override + public void deconstruct() { + httpClient.close(); + } + + private boolean expectedUp(Node node) { + try { + return ! orchestrator.getNodeStatus(new HostName(node.hostname())).isSuspended(); + } + catch (HostNameNotFoundException e) { + return false; + } + } + + /** The simplest possible http client interface */ + public interface HttpClient { + + String get(String url); + void close(); + + } + + /** Implements the HttpClient interface by delegating to an Apache HTTP client */ + public static class ApacheHttpClient implements HttpClient { + + private final CloseableHttpClient httpClient = VespaHttpClientBuilder.createWithBasicConnectionManager().build(); + + @Override + public String get(String url) { + try { + return httpClient.execute(new HttpGet(url), new BasicResponseHandler()); + } + catch (IOException e) { + throw new UncheckedIOException("Could not get " + url, e); + } + } + + @Override + public void close() { + try { + httpClient.close(); + } + catch (IOException e) { + log.log(Level.WARNING, "Exception deconstructing", e); + } + } + + + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java new file mode 100644 index 00000000000..9c85ca870d5 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java @@ -0,0 +1,44 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.NodeResources; + +/** + * A resource subject to autoscaling + * + * @author bratseth + */ +public enum Resource { + + cpu { + String metricName() { return "cpu.util"; } + double idealAverageLoad() { return 0.2; } + double valueFrom(NodeResources resources) { return resources.vcpu(); } + }, + + memory { + String metricName() { return "memory.util"; } + double idealAverageLoad() { return 0.7; } + double valueFrom(NodeResources resources) { return resources.memoryGb(); } + }, + + disk { + String metricName() { return "disk.util"; } + double idealAverageLoad() { return 0.7; } + double valueFrom(NodeResources resources) { return resources.diskGb(); } + }; + + abstract String metricName(); + + /** The load we should have of this resource on average, when one node in the cluster is down */ + abstract double idealAverageLoad(); + + abstract double valueFrom(NodeResources resources); + + public static Resource fromMetric(String metricName) { + for (Resource resource : values()) + if (resource.metricName().equals(metricName)) return resource; + throw new IllegalArgumentException("Metric '" + metricName + "' does not map to a resource"); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java new file mode 100644 index 00000000000..464fe570b95 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java @@ -0,0 +1,104 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.NodeResources; + +/** + * Provides iteration over possible cluster resource allocations given a target total load + * and current groups/nodes allocation. + */ +public class ResourceIterator { + + // Configured min and max nodes TODO: These should come from the application package + private static final int minimumNodesPerCluster = 3; // Since this is with redundancy it cannot be lower than 2 + private static final int maximumNodesPerCluster = 150; + + // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component + // proportional to document count. We must account for this when comparing configurations with more or fewer nodes. + // TODO: Measure this, and only take it into account with queries + private static final double fixedCpuCostFraction = 0.1; + + // Describes the observed state + private final ClusterResources allocation; + private final double cpuLoad; + private final double memoryLoad; + private final double diskLoad; + private final int groupSize; + + // Derived from the observed state + private final int nodeIncrement; + private final boolean singleGroupMode; + + // Iterator state + private int currentNodes; + + public ResourceIterator(double cpuLoad, double memoryLoad, double diskLoad, ClusterResources currentAllocation) { + this.cpuLoad = cpuLoad; + this.memoryLoad = memoryLoad; + this.diskLoad = diskLoad; + + // ceil: If the division does not produce a whole number we assume some node is missing + groupSize = (int)Math.ceil((double)currentAllocation.nodes() / currentAllocation.groups()); + allocation = currentAllocation; + + // What number of nodes is it effective to add or remove at the time from this cluster? + // This is the group size, since we (for now) assume the group size is decided by someone wiser than us + // and we decide the number of groups. + // The exception is when we only have one group, where we can add and remove single nodes in it. + singleGroupMode = currentAllocation.groups() == 1; + nodeIncrement = singleGroupMode ? 1 : groupSize; + + currentNodes = currentAllocation.nodes(); + while (currentNodes - nodeIncrement >= minimumNodesPerCluster + && (singleGroupMode || currentNodes - nodeIncrement > groupSize)) // group level redundancy + currentNodes -= nodeIncrement; + } + + public ClusterResources next() { + int nodesWithRedundancy = currentNodes - (singleGroupMode ? 1 : groupSize); + ClusterResources next = new ClusterResources(currentNodes, + singleGroupMode ? 1 : currentNodes / groupSize, + resourcesFor(nodesWithRedundancy)); + currentNodes += nodeIncrement; + return next; + } + + public boolean hasNext() { + return currentNodes <= maximumNodesPerCluster; + } + + /** + * For the observed load this instance is initialized with, returns the resources needed per node to be at + * ideal load given a target node count + */ + private NodeResources resourcesFor(int nodeCount) { + // Cpu: Scales with cluster size (TODO: Only reads, writes scales with group size) + // Memory and disk: Scales with group size + + double cpu, memory, disk; + if (singleGroupMode) { + // The fixed cost portion of cpu does not scale with changes to the node count + // TODO: Only for the portion of cpu consumed by queries + double totalCpu = totalUsage(Resource.cpu, cpuLoad); + cpu = fixedCpuCostFraction * totalCpu / groupSize / Resource.cpu.idealAverageLoad() + + (1 - fixedCpuCostFraction) * totalCpu / nodeCount / Resource.cpu.idealAverageLoad(); + memory = totalGroupUsage(Resource.memory, memoryLoad) / nodeCount / Resource.memory.idealAverageLoad(); + disk = totalGroupUsage(Resource.disk, diskLoad) / nodeCount / Resource.disk.idealAverageLoad(); + } + else { + cpu = totalUsage(Resource.cpu, cpuLoad) / nodeCount / Resource.cpu.idealAverageLoad(); + memory = totalGroupUsage(Resource.memory, memoryLoad) / groupSize / Resource.memory.idealAverageLoad(); + disk = totalGroupUsage(Resource.disk, diskLoad) / groupSize / Resource.disk.idealAverageLoad(); + } + return allocation.nodeResources().withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk); + } + + private double totalUsage(Resource resource, double load) { + return load * resource.valueFrom(allocation.nodeResources()) * allocation.nodes(); + } + + private double totalGroupUsage(Resource resource, double load) { + return load * resource.valueFrom(allocation.nodeResources()) * groupSize; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java new file mode 100644 index 00000000000..f3c1d8603b3 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -0,0 +1,62 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Deployer; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; +import com.yahoo.vespa.hosted.provision.autoscale.ClusterResources; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; +import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; + +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * Maintainer making automatic scaling decisions + * + * @author bratseth + */ +public class AutoscalingMaintainer extends Maintainer { + + private final Autoscaler autoscaler; + private final Deployer deployer; + + public AutoscalingMaintainer(NodeRepository nodeRepository, + HostResourcesCalculator hostResourcesCalculator, + NodeMetricsDb metricsDb, + Deployer deployer, + Duration interval) { + super(nodeRepository, interval); + this.autoscaler = new Autoscaler(hostResourcesCalculator, metricsDb, nodeRepository); + this.deployer = deployer; + } + + @Override + protected void maintain() { + if ( ! nodeRepository().zone().environment().isProduction()) return; + + activeNodesByApplication().forEach((applicationId, nodes) -> autoscale(applicationId, nodes)); + } + + private void autoscale(ApplicationId application, List<Node> applicationNodes) { + MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, nodeRepository()); + if ( ! deployment.isValid()) return; // Another config server will consider this application + nodesByCluster(applicationNodes).forEach((clusterSpec, clusterNodes) -> { + Optional<ClusterResources> target = autoscaler.autoscale(application, clusterSpec, clusterNodes); + target.ifPresent(t -> log.info("Autoscale: Application " + application + " cluster " + clusterSpec + + " from " + applicationNodes.size() + " * " + applicationNodes.get(0).flavor().resources() + + " to " + t.nodes() + " * " + t.nodeResources())); + }); + } + + private Map<ClusterSpec, List<Node>> nodesByCluster(List<Node> applicationNodes) { + return applicationNodes.stream().collect(Collectors.groupingBy(n -> n.allocation().get().membership().cluster())); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java index 0d5a8587902..27fba9e8f8e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java @@ -2,17 +2,22 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.component.AbstractComponent; +import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.HostName; +import com.yahoo.config.provision.NodeType; +import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.time.Duration; import java.time.Instant; import java.util.List; +import java.util.Map; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; /** * A maintainer is some job which runs at a fixed rate to perform some maintenance task on the node repo. @@ -75,6 +80,12 @@ public abstract class Maintainer extends AbstractComponent implements Runnable { private String name() { return this.getClass().getSimpleName(); } + /** A utility to group active tenant applications by application */ + protected Map<ApplicationId, List<Node>> activeNodesByApplication() { + return nodeRepository().list().nodeType(NodeType.tenant).state(Node.State.active).asList() + .stream().collect(Collectors.groupingBy(n -> n.allocation().get().owner())); + } + static long staggeredDelay(List<HostName> cluster, HostName host, Instant now, Duration interval) { if ( ! cluster.contains(host)) return interval.toMillis(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java new file mode 100644 index 00000000000..178e8385008 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java @@ -0,0 +1,49 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; + +import java.time.Duration; +import java.util.logging.Level; + +/** + * Maintainer which keeps the node metric db up to date by periodically fetching metrics from all + * active nodes. + * + * @author bratseth + */ +public class NodeMetricsDbMaintainer extends Maintainer { + + private static final int maxWarningsPerInvocation = 2; + + private final NodeMetrics nodeMetrics; + private final NodeMetricsDb nodeMetricsDb; + + public NodeMetricsDbMaintainer(NodeRepository nodeRepository, + NodeMetrics nodeMetrics, + NodeMetricsDb nodeMetricsDb, + Duration interval) { + super(nodeRepository, interval); + this.nodeMetrics = nodeMetrics; + this.nodeMetricsDb = nodeMetricsDb; + } + + @Override + protected void maintain() { + int warnings = 0; + for (ApplicationId application : activeNodesByApplication().keySet()) { + try { + nodeMetricsDb.add(nodeMetrics.fetchMetrics(application)); + } + catch (Exception e) { + if (warnings++ < maxWarningsPerInvocation) + log.log(Level.WARNING, "Could not update metrics for " + application, e); + } + } + nodeMetricsDb.gc(nodeRepository().clock()); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 37620e17a95..ecc550527fc 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -8,9 +8,11 @@ import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostLivenessTracker; import com.yahoo.config.provision.InfraDeployer; import com.yahoo.config.provision.Zone; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.service.monitor.ServiceMonitor; @@ -48,22 +50,25 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final CapacityReportMaintainer capacityReportMaintainer; private final OsUpgradeActivator osUpgradeActivator; private final Rebalancer rebalancer; + private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer; + private final AutoscalingMaintainer autoscalingMaintainer; @SuppressWarnings("unused") @Inject public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer, HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, Zone zone, Orchestrator orchestrator, Metric metric, - ProvisionServiceProvider provisionServiceProvider, - FlagSource flagSource) { + ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource, + NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) { this(nodeRepository, deployer, infraDeployer, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(), - orchestrator, metric, provisionServiceProvider, flagSource); + orchestrator, metric, provisionServiceProvider, flagSource, nodeMetrics, nodeMetricsDb); } public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer, HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, Zone zone, Clock clock, Orchestrator orchestrator, Metric metric, - ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource) { + ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource, + NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) { DefaultTimes defaults = new DefaultTimes(zone); nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, defaults.failGrace, clock, orchestrator, throttlePolicyFromEnv().orElse(defaults.throttlePolicy), metric); @@ -85,6 +90,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, defaults.capacityReportInterval); osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval); rebalancer = new Rebalancer(deployer, nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), provisionServiceProvider.getHostProvisioner(), metric, clock, defaults.rebalancerInterval); + nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval); + autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), nodeMetricsDb, deployer, defaults.autoscalingInterval); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintainButThrowOnException(); @@ -109,6 +116,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { dynamicProvisioningMaintainer.ifPresent(Maintainer::deconstruct); osUpgradeActivator.deconstruct(); rebalancer.deconstruct(); + nodeMetricsDbMaintainer.deconstruct(); + autoscalingMaintainer.deconstruct(); } private static Optional<NodeFailer.ThrottlePolicy> throttlePolicyFromEnv() { @@ -149,6 +158,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration dynamicProvisionerInterval; private final Duration osUpgradeActivatorInterval; private final Duration rebalancerInterval; + private final Duration nodeMetricsCollectionInterval; + private final Duration autoscalingInterval; private final NodeFailer.ThrottlePolicy throttlePolicy; @@ -169,6 +180,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { dynamicProvisionerInterval = Duration.ofMinutes(5); osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5); rebalancerInterval = Duration.ofMinutes(40); + nodeMetricsCollectionInterval = Duration.ofMinutes(1); + autoscalingInterval = Duration.ofMinutes(5); if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) { inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java index 7c5ff35878b..179d7f2703c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java @@ -19,11 +19,15 @@ import java.util.Locale; public class CapacityPolicies { private final Zone zone; + + private final NodeResourceLimits nodeResourceLimits; + /* Deployments must match 1-to-1 the advertised resources of a physical host */ private final boolean isUsingAdvertisedResources; public CapacityPolicies(Zone zone) { this.zone = zone; + this.nodeResourceLimits = new NodeResourceLimits(zone); this.isUsingAdvertisedResources = zone.cloud().value().equals("aws"); } @@ -64,7 +68,7 @@ public class CapacityPolicies { } private void ensureSufficientResources(NodeResources resources, ClusterSpec cluster) { - double minMemoryGb = minMemoryGb(cluster.type()); + double minMemoryGb = nodeResourceLimits.minMemoryGb(cluster.type()); if (resources.memoryGb() >= minMemoryGb) return; throw new IllegalArgumentException(String.format(Locale.ENGLISH, @@ -72,12 +76,6 @@ public class CapacityPolicies { minMemoryGb, cluster.type().name(), cluster.id().value(), resources.memoryGb())); } - private int minMemoryGb(ClusterSpec.Type clusterType) { - if (zone.system() == SystemName.dev) return 1; // Allow small containers in dev system - if (clusterType == ClusterSpec.Type.admin) return 2; - return 4; - } - private NodeResources defaultNodeResources(ClusterSpec.Type clusterType) { if (clusterType == ClusterSpec.Type.admin) { if (zone.system() == SystemName.dev) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java index 5753bbb3c5a..af6fa8edf64 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java @@ -64,7 +64,6 @@ public class GroupPreparer { .with(FetchVector.Dimension.APPLICATION_ID, application.serializedForm()) .value(); boolean allocateFully = dynamicProvisioningEnabled && preprovisionCapacityFlag.value().isEmpty(); - try (Mutex lock = nodeRepository.lock(application)) { // Lock ready pool to ensure that the same nodes are not simultaneously allocated by others diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java index 394549e4141..0423f762f2b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java @@ -18,7 +18,7 @@ public interface HostProvisioner { /** * Schedule provisioning of a given number of hosts. * - * @param provisionIndexes List of unique provision indexes which will be used to generate the node hostnames + * @param provisionIndexes list of unique provision indexes which will be used to generate the node hostnames * on the form of <code>[prefix][index].[domain]</code> * @param resources the resources needed per node * @param applicationId id of the application that will own the provisioned host diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index ebd6a01e61f..c92f7889496 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -390,4 +390,5 @@ class NodeAllocation { return count; } } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java new file mode 100644 index 00000000000..ca04bf66ce3 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java @@ -0,0 +1,32 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.provisioning; + +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.Zone; + +/** + * Defines the resource limits for nodes in various zones + * + * @author bratseth + */ +public class NodeResourceLimits { + + private final Zone zone; + + public NodeResourceLimits(Zone zone) { + this.zone = zone; + } + + public int minMemoryGb(ClusterSpec.Type clusterType) { + if (zone.system() == SystemName.dev) return 1; // Allow small containers in dev system + if (clusterType == ClusterSpec.Type.admin) return 2; + return 4; + } + + public NodeResources enlargeToLegal(NodeResources resources, ClusterSpec.Type clusterType) { + return resources.withMemoryGb(Math.max(minMemoryGb(clusterType), resources.memoryGb())); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java index 49d0ba5cf70..d26accd7a84 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java @@ -25,6 +25,8 @@ public class ContainerConfig { " <component id='com.yahoo.vespa.hosted.provision.testutils.ServiceMonitorStub'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockDuperModel'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeFlavors'/>\n" + + " <component id='com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb'/>\n" + + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeMetrics'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockProvisionServiceProvider'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance'/>\n" + diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java index 915ef0d9125..e7ebf049e51 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java @@ -18,6 +18,7 @@ import java.util.concurrent.ConcurrentHashMap; * @author hakonhall */ public class MockDuperModel implements DuperModelInfraApi { + private final Map<ApplicationId, InfraApplicationApi> supportedInfraApps = new HashMap<>(); private final ConcurrentHashMap<ApplicationId, List<HostName>> activeApps = new ConcurrentHashMap<>(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java new file mode 100644 index 00000000000..d5397aa421c --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java @@ -0,0 +1,20 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.testutils; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * @author bratseth + */ +public class MockNodeMetrics implements NodeMetrics { + + @Override + public Collection<MetricValue> fetchMetrics(ApplicationId application) { + return new ArrayList<>(); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java index ab813ddeb5a..95555185292 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java @@ -59,7 +59,7 @@ public class NodeRepositoryTester { public Node addNode(String id, String hostname, String parentHostname, String flavor, NodeType type) { Node node = nodeRepository.createNode(id, hostname, Optional.of(parentHostname), - nodeFlavors.getFlavorOrThrow(flavor), type); + nodeFlavors.getFlavorOrThrow(flavor), type); return nodeRepository.addNodes(Collections.singletonList(node)).get(0); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java new file mode 100644 index 00000000000..fd0517a6e50 --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -0,0 +1,147 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.Flavor; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.RegionName; +import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.Zone; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * @author bratseth + */ +public class AutoscalingTest { + + @Test + public void testAutoscalingSingleGroup() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 5, 1, resources); + + assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1).isEmpty()); + + tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1); + assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1).isEmpty()); + + tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1); + ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", + 15, 1, 1.3, 28.6, 28.6, + tester.autoscale(application1, cluster1)); + + tester.deploy(application1, cluster1, scaledResources); + assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1).isEmpty()); + + tester.deactivateRetired(application1, cluster1, scaledResources); + tester.addMeasurements(Resource.cpu, 0.8f, 1f, 3, application1); + assertTrue("Load change is large, but insufficient measurements for new config -> No change", + tester.autoscale(application1, cluster1).isEmpty()); + + tester.addMeasurements(Resource.cpu, 0.19f, 1f, 100, application1); + assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1)); + + tester.addMeasurements(Resource.cpu, 0.1f, 1f, 120, application1); + tester.assertResources("Scaling down since resource usage has gone down significantly", + 26, 1, 0.6, 16.0, 16.0, + tester.autoscale(application1, cluster1)); + } + + @Test + public void testAutoscalingGroupSize1() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 5, 5, resources); + tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.assertResources("Scaling up since resource usage is too high", + 7, 7, 2.5, 80.0, 80.0, + tester.autoscale(application1, cluster1)); + } + + @Test + public void testAutoscalingGroupSize3() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 6, 2, resources); + tester.addMeasurements(Resource.cpu, 0.22f, 1f, 120, application1); + tester.assertResources("Scaling up since resource usage is too high", + 9, 3, 2.7, 83.3, 83.3, + tester.autoscale(application1, cluster1)); + } + + @Test + public void testAutoscalingAvoidsIllegalConfigurations() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 6, 1, resources); + tester.addMeasurements(Resource.memory, 0.02f, 1f, 120, application1); + tester.assertResources("Scaling down", + 6, 1, 3.0, 4.0, 100.0, + tester.autoscale(application1, cluster1)); + } + + @Test + public void testAutoscalingAws() { + List<Flavor> flavors = new ArrayList<>(); + flavors.add(new Flavor("aws-xlarge", new NodeResources(3, 200, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote))); + flavors.add(new Flavor("aws-large", new NodeResources(3, 150, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote))); + flavors.add(new Flavor("aws-medium", new NodeResources(3, 100, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote))); + flavors.add(new Flavor("aws-small", new NodeResources(3, 80, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote))); + AutoscalingTester tester = new AutoscalingTester(new Zone(CloudName.from("aws"), SystemName.main, + Environment.prod, RegionName.from("us-east")), + flavors); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 5, 1, new NodeResources(3, 100, 100, 1)); + + tester.addMeasurements(Resource.memory, 0.9f, 0.6f, 120, application1); + ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high." + + "Scaling flavor not count since the latter is more expensive due to " + + "memory charged but taken by aws, see MockHostResourcesCalculator", + 5, 1, 3, 150, 100, + tester.autoscale(application1, cluster1)); + + tester.deploy(application1, cluster1, scaledResources); + tester.deactivateRetired(application1, cluster1, scaledResources); + + tester.addMeasurements(Resource.memory, 0.3f, 0.6f, 1000, application1); + System.out.println("Low memory usage"); + tester.assertResources("Scaling down since resource usage has gone down", + 4, 1, 3, 100, 100, + tester.autoscale(application1, cluster1)); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java new file mode 100644 index 00000000000..f15b7e4220b --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -0,0 +1,248 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.Flavor; +import com.yahoo.config.provision.HostSpec; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.RegionName; +import com.yahoo.config.provision.Zone; +import com.yahoo.config.provisioning.FlavorsConfig; +import com.yahoo.test.ManualClock; +import com.yahoo.transaction.Mutex; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.InMemoryFlagSource; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.IP; +import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; +import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +class AutoscalingTester { + + private final ProvisioningTester provisioningTester; + private final Autoscaler autoscaler; + private final NodeMetricsDb db; + private final MockHostResourcesCalculator hostResourcesCalculator; + + /** Creates an autoscaling tester with a single host type ready */ + public AutoscalingTester(NodeResources hostResources) { + this(new Zone(Environment.prod, RegionName.from("us-east")), null, null, asConfig(hostResources)); + provisioningTester.makeReadyNodes(20, "hostFlavor", NodeType.host, 8); // "hostFlavor" generated by asConfig + provisioningTester.deployZoneApp(); + } + + public AutoscalingTester(Zone zone, List<Flavor> flavors) { + this(zone, + new MockHostProvisioner(flavors), + new InMemoryFlagSource().withBooleanFlag(Flags.ENABLE_DYNAMIC_PROVISIONING.id(), true), + asConfig(flavors)); + } + + private AutoscalingTester(Zone zone, MockHostProvisioner hostProvisioner, FlagSource flagSource, FlavorsConfig flavorsConfig) { + provisioningTester = new ProvisioningTester.Builder().zone(zone) + .flavorsConfig(flavorsConfig) + .hostProvisioner(hostProvisioner) + .flagSource(flagSource) + .build(); + + hostResourcesCalculator = new MockHostResourcesCalculator(zone); + db = new NodeMetricsDb(); + autoscaler = new Autoscaler(hostResourcesCalculator, db, nodeRepository()); + } + + public ApplicationId applicationId(String applicationName) { + return ApplicationId.from("tenant1", applicationName, "instance1"); + } + + public ClusterSpec clusterSpec(ClusterSpec.Type type, String clusterId) { + return ClusterSpec.request(type, + ClusterSpec.Id.from(clusterId), + Version.fromString("7"), + false); + } + + public void deploy(ApplicationId application, ClusterSpec cluster, ClusterResources resources) { + deploy(application, cluster, resources.nodes(), resources.groups(), resources.nodeResources()); + } + + public void deploy(ApplicationId application, ClusterSpec cluster, int nodes, int groups, NodeResources resources) { + List<HostSpec> hosts = provisioningTester.prepare(application, cluster, Capacity.fromCount(nodes, resources), groups); + for (HostSpec host : hosts) + makeReady(host.hostname()); + provisioningTester.deployZoneApp(); + provisioningTester.activate(application, hosts); + } + + public void makeReady(String hostname) { + Node node = nodeRepository().getNode(hostname).get(); + nodeRepository().write(node.with(new IP.Config(Set.of("::" + 0 + ":0"), Set.of())), nodeRepository().lock(node)); + Node host = nodeRepository().getNode(node.parentHostname().get()).get(); + host = host.with(new IP.Config(Set.of("::" + 0 + ":0"), Set.of("::" + 0 + ":2"))); + if (host.state() == Node.State.provisioned) + nodeRepository().setReady(List.of(host), Agent.system, getClass().getSimpleName()); + } + + public void deactivateRetired(ApplicationId application, ClusterSpec cluster, ClusterResources resources) { + try (Mutex lock = nodeRepository().lock(application)){ + for (Node node : nodeRepository().getNodes(application, Node.State.active)) { + if (node.allocation().get().membership().retired()) + nodeRepository().write(node.with(node.allocation().get().removable()), lock); + } + } + deploy(application, cluster, resources); + } + + /** + * Adds measurements with the given resource value and ideal values for the other resources, + * scaled to take one node redundancy into account. + * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler + * wanting to see the ideal load with one node missing.) + * + * @param resource the resource we are explicitly setting the value of + * @param otherResourcesLoad the load factor relative to ideal to use for other resources + * @param count the number of measurements + * @param applicationId the application we're adding measurements for all nodes of + */ + public void addMeasurements(Resource resource, float value, float otherResourcesLoad, + int count, ApplicationId applicationId) { + List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); + float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); + for (int i = 0; i < count; i++) { + clock().advance(Duration.ofMinutes(1)); + for (Node node : nodes) { + for (Resource r : Resource.values()) { + float effectiveValue = (r == resource ? value : (float) r.idealAverageLoad() * otherResourcesLoad) + * oneExtraNodeFactor; + db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), + r.metricName(), + clock().instant().toEpochMilli(), + effectiveValue))); + } + } + } + } + + public Optional<ClusterResources> autoscale(ApplicationId application, ClusterSpec cluster) { + return autoscaler.autoscale(application, cluster, nodeRepository().getNodes(application, Node.State.active)); + } + + public ClusterResources assertResources(String message, + int nodeCount, int groupCount, + double approxCpu, double approxMemory, double approxDisk, + Optional<ClusterResources> actualResources) { + double delta = 0.0000000001; + assertTrue(message, actualResources.isPresent()); + assertEquals("Node count: " + message, nodeCount, actualResources.get().nodes()); + assertEquals("Group count: " + message, groupCount, actualResources.get().groups()); + assertEquals("Cpu: " + message, approxCpu, Math.round(actualResources.get().nodeResources().vcpu() * 10) / 10.0, delta); + assertEquals("Memory: " + message, approxMemory, Math.round(actualResources.get().nodeResources().memoryGb() * 10) / 10.0, delta); + assertEquals("Disk: " + message, approxDisk, Math.round(actualResources.get().nodeResources().diskGb() * 10) / 10.0, delta); + return actualResources.get(); + } + + public ManualClock clock() { + return provisioningTester.clock(); + } + + public NodeRepository nodeRepository() { + return provisioningTester.nodeRepository(); + } + + private static FlavorsConfig asConfig(NodeResources hostResources) { + FlavorsConfig.Builder b = new FlavorsConfig.Builder(); + b.flavor(asFlavorConfig("hostFlavor", hostResources)); + return b.build(); + } + + private static FlavorsConfig asConfig(List<Flavor> flavors) { + FlavorsConfig.Builder b = new FlavorsConfig.Builder(); + for (Flavor flavor : flavors) + b.flavor(asFlavorConfig(flavor.name(), flavor.resources())); + return b.build(); + } + + private static FlavorsConfig.Flavor.Builder asFlavorConfig(String flavorName, NodeResources resources) { + FlavorsConfig.Flavor.Builder flavor = new FlavorsConfig.Flavor.Builder(); + flavor.name(flavorName); + flavor.minCpuCores(resources.vcpu()); + flavor.minMainMemoryAvailableGb(resources.memoryGb()); + flavor.minDiskAvailableGb(resources.diskGb()); + flavor.bandwidth(resources.bandwidthGbps() * 1000); + return flavor; + } + + private static class MockHostResourcesCalculator implements HostResourcesCalculator { + + private final Zone zone; + + public MockHostResourcesCalculator(Zone zone) { + this.zone = zone; + } + + @Override + public NodeResources availableCapacityOf(String flavorName, NodeResources hostResources) { + if (zone.cloud().value().equals("aws")) + return hostResources.withMemoryGb(hostResources.memoryGb() + 3); + else + return hostResources; + } + + } + + private static class MockHostProvisioner implements HostProvisioner { + + private final List<Flavor> hostFlavors; + + public MockHostProvisioner(List<Flavor> hostFlavors) { + this.hostFlavors = hostFlavors; + } + + @Override + public List<ProvisionedHost> provisionHosts(List<Integer> provisionIndexes, NodeResources resources, ApplicationId applicationId) { + Flavor hostFlavor = hostFlavors.stream().filter(f -> f.resources().justNumbers().equals(resources.justNumbers())).findAny() + .orElseThrow(() -> new RuntimeException("No flavor matching " + resources + ". Flavors: " + hostFlavors)); + + List<ProvisionedHost> hosts = new ArrayList<>(); + for (int index : provisionIndexes) { + hosts.add(new ProvisionedHost("host" + index, + "hostname" + index, + hostFlavor, + "nodename" + index, + resources)); + } + return hosts; + } + + @Override + public List<Node> provision(Node host, Set<Node> children) throws FatalProvisioningException { + throw new RuntimeException("Not implemented"); + } + + @Override + public void deprovision(Node host) { + throw new RuntimeException("Not implemented"); + } + + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java new file mode 100644 index 00000000000..519235857f1 --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java @@ -0,0 +1,33 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.test.ManualClock; +import org.junit.Test; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class NodeMetricsDbTest { + + @Test + public void testNodeMetricsDb() { + ManualClock clock = new ManualClock(); + NodeMetricsDb db = new NodeMetricsDb(); + List<NodeMetrics.MetricValue> values = new ArrayList<>(); + for (int i = 0; i < 40; i++) { + values.add(new NodeMetrics.MetricValue("host0", "cpu.util", clock.instant().toEpochMilli(), 0.9f)); + clock.advance(Duration.ofHours(1)); + } + db.add(values); + + assertEquals(30, db.getWindow(clock.instant().minus(Duration.ofHours(30)), Resource.cpu, List.of("host0")).measurementCount()); + assertEquals( 0, db.getWindow(clock.instant().minus(Duration.ofHours(30)), Resource.memory, List.of("host0")).measurementCount()); + db.gc(clock); + assertEquals(24, db.getWindow(clock.instant().minus(Duration.ofHours(30)), Resource.cpu, List.of("host0")).measurementCount()); + assertEquals( 0, db.getWindow(clock.instant().minus(Duration.ofHours(30)), Resource.memory, List.of("host0")).measurementCount()); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java new file mode 100644 index 00000000000..4376bfd38b0 --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java @@ -0,0 +1,147 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; +import com.yahoo.vespa.hosted.provision.testutils.OrchestratorMock; +import com.yahoo.vespa.applicationmodel.HostName; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class NodeMetricsFetcherTest { + + @Test + public void testMetricsFetch() { + NodeResources resources = new NodeResources(1, 10, 100, 1); + ProvisioningTester tester = new ProvisioningTester.Builder().build(); + OrchestratorMock orchestrator = new OrchestratorMock(); + MockHttpClient httpClient = new MockHttpClient(); + NodeMetricsFetcher fetcher = new NodeMetricsFetcher(tester.nodeRepository(), orchestrator, httpClient); + + tester.makeReadyNodes(4, resources); // Creates (in order) host-1.yahoo.com, host-2.yahoo.com, host-3.yahoo.com, host-4.yahoo.com + tester.deployZoneApp(); + + ApplicationId application1 = tester.makeApplicationId(); + ApplicationId application2 = tester.makeApplicationId(); + tester.deploy(application1, Capacity.fromCount(2, resources)); // host-1.yahoo.com, host-2.yahoo.com + tester.deploy(application2, Capacity.fromCount(2, resources)); // host-4.yahoo.com, host-3.yahoo.com + + orchestrator.suspend(new HostName("host-4.yahoo.com")); + + { + httpClient.cannedResponse = cannedResponseForApplication1; + List<NodeMetrics.MetricValue> values = new ArrayList<>(fetcher.fetchMetrics(application1)); + assertEquals("http://host-1.yahoo.com:4080/metrics/v2/values?consumer=vespa-consumer-metrics", + httpClient.requestsReceived.get(0)); + assertEquals(5, values.size()); + assertEquals("metric value cpu.util: 16.2 at 1234 for host-1.yahoo.com", values.get(0).toString()); + assertEquals("metric value memory.util: 23.1 at 1234 for host-1.yahoo.com", values.get(1).toString()); + assertEquals("metric value disk.util: 82.0 at 1234 for host-1.yahoo.com", values.get(2).toString()); + assertEquals("metric value cpu.util: 20.0 at 1200 for host-2.yahoo.com", values.get(3).toString()); + assertEquals("metric value disk.util: 40.0 at 1200 for host-2.yahoo.com", values.get(4).toString()); + } + + { + httpClient.cannedResponse = cannedResponseForApplication2; + List<NodeMetrics.MetricValue> values = new ArrayList<>(fetcher.fetchMetrics(application2)); + assertEquals("http://host-3.yahoo.com:4080/metrics/v2/values?consumer=vespa-consumer-metrics", + httpClient.requestsReceived.get(1)); + assertEquals(3, values.size()); + assertEquals("metric value cpu.util: 10.0 at 1300 for host-3.yahoo.com", values.get(0).toString()); + assertEquals("metric value memory.util: 15.0 at 1300 for host-3.yahoo.com", values.get(1).toString()); + assertEquals("metric value disk.util: 20.0 at 1300 for host-3.yahoo.com", values.get(2).toString()); + } + } + + private static class MockHttpClient implements NodeMetricsFetcher.HttpClient { + + List<String> requestsReceived = new ArrayList<>(); + + String cannedResponse = null; + @Override + public String get(String url) { + requestsReceived.add(url); + return cannedResponse; + } + + @Override + public void close() { } + + } + + final String cannedResponseForApplication1 = + "{\n" + + " \"nodes\": [\n" + + " {\n" + + " \"hostname\": \"host-1.yahoo.com\",\n" + + " \"role\": \"role0\",\n" + + " \"node\": {\n" + + " \"timestamp\": 1234,\n" + + " \"metrics\": [\n" + + " {\n" + + " \"values\": {\n" + + " \"cpu.util\": 16.2,\n" + + " \"memory.util\": 23.1,\n" + + " \"disk.util\": 82\n" + + " },\n" + + " \"dimensions\": {\n" + + " \"state\": \"active\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " },\n" + + " {\n" + + " \"hostname\": \"host-2.yahoo.com\",\n" + + " \"role\": \"role1\",\n" + + " \"node\": {\n" + + " \"timestamp\": 1200,\n" + + " \"metrics\": [\n" + + " {\n" + + " \"values\": {\n" + + " \"cpu.util\": 20,\n" + + " \"disk.util\": 40\n" + + " },\n" + + " \"dimensions\": {\n" + + " \"state\": \"active\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " }\n" + + " ]\n" + + "}\n"; + + + final String cannedResponseForApplication2 = + "{\n" + + " \"nodes\": [\n" + + " {\n" + + " \"hostname\": \"host-3.yahoo.com\",\n" + + " \"role\": \"role0\",\n" + + " \"node\": {\n" + + " \"timestamp\": 1300,\n" + + " \"metrics\": [\n" + + " {\n" + + " \"values\": {\n" + + " \"cpu.util\": 10,\n" + + " \"memory.util\": 15,\n" + + " \"disk.util\": 20\n" + + " },\n" + + " \"dimensions\": {\n" + + " \"state\": \"active\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " }\n" + + " ]\n" + + "}\n"; + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java index 677aaf93336..8706661f261 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java @@ -69,7 +69,7 @@ public class DynamicDockerProvisionTest { @Test public void does_not_allocate_to_available_empty_hosts() { tester.makeReadyNodes(3, "small", NodeType.host, 10); - deployZoneApp(tester); + tester.deployZoneApp(); ApplicationId application = tester.makeApplicationId(); NodeResources flavor = new NodeResources(1, 4, 10, 1); @@ -89,7 +89,7 @@ public class DynamicDockerProvisionTest { tester.prepare(application, clusterSpec("myContent.t2.a2"), 2, 1, flavor); verify(hostProvisioner).provisionHosts(expectedProvisionIndexes, flavor, application); - // Ready the provisioned hosts, add an IP addreses to pool and activate them + // Ready the provisioned hosts, add an IP addresses to pool and activate them for (Integer i : expectedProvisionIndexes) { String hostname = "host-" + i; var ipConfig = new IP.Config(Set.of("::" + i + ":0"), Set.of("::" + i + ":2")); @@ -97,7 +97,7 @@ public class DynamicDockerProvisionTest { tester.nodeRepository().setReady(List.of(host), Agent.system, getClass().getSimpleName()); nameResolver.addRecord(hostname + "-2", "::" + i + ":2"); } - deployZoneApp(tester); + tester.deployZoneApp(); mockHostProvisioner(hostProvisioner, tester.nodeRepository().getAvailableFlavors().getFlavorOrThrow("small")); tester.prepare(application, clusterSpec("another-id"), 2, 1, flavor); @@ -145,19 +145,6 @@ public class DynamicDockerProvisionTest { assertTrue(indices.containsAll(IntStream.range(0, 10).boxed().collect(Collectors.toList()))); } - private static void deployZoneApp(ProvisioningTester tester) { - ApplicationId applicationId = tester.makeApplicationId(); - List<HostSpec> list = tester.prepare(applicationId, - ClusterSpec.request(ClusterSpec.Type.container, - ClusterSpec.Id.from("node-admin"), - Version.fromString("6.42"), - false), - Capacity.fromRequiredNodeType(NodeType.host), - 1); - tester.activate(applicationId, ImmutableSet.copyOf(list)); - } - - private static ClusterSpec clusterSpec(String clusterId) { return ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from(clusterId), Version.fromString("6.42"), false); } @@ -172,4 +159,5 @@ public class DynamicDockerProvisionTest { .collect(Collectors.toList()); }).when(hostProvisioner).provisionHosts(any(), any(), any()); } + } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index e464ed07472..85a6ed31073 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -447,6 +447,7 @@ public class ProvisioningTester { } public static final class Builder { + private Curator curator; private FlavorsConfig flavorsConfig; private Zone zone; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json index 02746f1c79a..ab608bac2b4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json @@ -1,6 +1,9 @@ { "jobs": [ { + "name": "AutoscalingMaintainer" + }, + { "name": "CapacityReportMaintainer" }, { @@ -25,6 +28,9 @@ "name": "NodeFailer" }, { + "name": "NodeMetricsDbMaintainer" + }, + { "name": "NodeRebooter" }, { |