summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2020-02-26 16:32:06 +0100
committerJon Bratseth <bratseth@verizonmedia.com>2020-02-26 16:32:06 +0100
commit5168d102ca8c5d82c6f93753acc01cd2668d46a5 (patch)
tree8bef310c37d1508c5faecba3b4cf81d5d986e748
parent2aa1ee8401290bfce3b97409e8fc634b8386f247 (diff)
Revert "Merge pull request #12351 from vespa-engine/revert-11875-bratseth/node-metrics"
This reverts commit 37f6c5b31cb2809a54c34dc3a4e51307f3320fbd, reversing changes made to 76221d0b9b7bda577ac61ce8a79c7b6ee3a8dbb4.
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java17
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingRewriteOperation.java7
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java5
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java1
-rw-r--r--config-provisioning/abi-spec.json1
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java1
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/Flavor.java5
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/HostName.java9
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java2
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/IndexFacts.java1
-rw-r--r--container-search/src/main/java/com/yahoo/search/Result.java1
-rw-r--r--container-search/src/main/java/com/yahoo/search/federation/FederationResult.java4
-rw-r--r--container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java8
-rw-r--r--defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java1
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java1
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/ValuesFetcher.java1
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/ApplicationMetricsRetriever.java1
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java2
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java1
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java2
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java6
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java1
-rw-r--r--node-repository/src/main/config/node-repository.xml2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java185
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java65
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResourcesWithCost.java26
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java74
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java48
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java169
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcher.java112
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java44
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java104
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java62
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java49
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java21
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java32
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java20
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java147
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java248
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java33
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java147
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java20
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json6
53 files changed, 1659 insertions, 71 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java
index 4fe6c3a96f2..30ce142d503 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java
@@ -91,7 +91,7 @@ public class SDField extends Field implements TypedKey, FieldOperationContainer,
private NormalizeLevel normalizing = new NormalizeLevel();
/** Extra query commands of this field */
- private List<String> queryCommands=new java.util.ArrayList<>(0);
+ private List<String> queryCommands = new java.util.ArrayList<>(0);
/** Summary fields defined in this field */
private Map<String, SummaryField> summaryFields = new java.util.LinkedHashMap<>(0);
@@ -749,20 +749,11 @@ public class SDField extends Field implements TypedKey, FieldOperationContainer,
return queryCommands.contains(name);
}
- /**
- * A list of query commands
- *
- * @return a list of strings with query commands.
- */
+ /** Returns a list of query commands */
@Override
- public List<String> getQueryCommands() {
- return queryCommands;
- }
+ public List<String> getQueryCommands() { return queryCommands; }
- /**
- * The document that this field was declared in, or null
- *
- */
+ /** Returns the document that this field was declared in, or null */
private SDDocumentType getOwnerDocType() {
return ownerDocType;
}
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingRewriteOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingRewriteOperation.java
index a0d47d7fa81..0a29fae04bf 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingRewriteOperation.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingRewriteOperation.java
@@ -4,9 +4,10 @@ package com.yahoo.searchdefinition.fieldoperation;
import com.yahoo.searchdefinition.document.SDField;
/**
- * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a>
+ * @author Einar M R Rosenvinge
*/
public class IndexingRewriteOperation implements FieldOperation {
- public void apply(SDField field) {
- }
+
+ public void apply(SDField field) { }
+
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java
index 9c752f3aa0d..a8fbcf50b02 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java
@@ -17,14 +17,15 @@ import static java.util.Collections.unmodifiableList;
*/
@Immutable
public class MetricsConsumer {
+
private final String id;
private final MetricSet metricSet;
private final List<CloudWatch> cloudWatches = new ArrayList<>();
/**
- * @param id The consumer
- * @param metricSet The metrics for this consumer
+ * @param id the consumer
+ * @param metricSet the metrics for this consumer
*/
public MetricsConsumer(String id, MetricSet metricSet) {
this.id = Objects.requireNonNull(id, "A consumer must have a non-null id.");;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java
index 58b77ee1297..c05cad89852 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java
@@ -9,6 +9,7 @@ import java.util.Set;
* @author gjoranv
*/
public class SystemMetrics {
+
public static final String CPU_UTIL = "cpu.util";
public static final String CPU_SYS_UTIL = "cpu.sys.util";
public static final String CPU_THROTTLED_TIME = "cpu.throttled_time.rate";
diff --git a/config-provisioning/abi-spec.json b/config-provisioning/abi-spec.json
index f2ae997a164..9a091f1161c 100644
--- a/config-provisioning/abi-spec.json
+++ b/config-provisioning/abi-spec.json
@@ -391,6 +391,7 @@
"methods": [
"public void <init>(com.yahoo.config.provisioning.FlavorsConfig$Flavor)",
"public void <init>(com.yahoo.config.provision.NodeResources)",
+ "public void <init>(java.lang.String, com.yahoo.config.provision.NodeResources)",
"public com.yahoo.config.provision.Flavor with(com.yahoo.config.provision.host.FlavorOverrides)",
"public com.yahoo.config.provision.Flavor with(com.yahoo.config.provision.NodeResources)",
"public java.lang.String name()",
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java
index 16369d82f9f..5aed5d8e2e7 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java
@@ -158,7 +158,6 @@ public final class ClusterSpec {
}
/** Identifier of a group within a cluster */
- @SuppressWarnings("deprecation")
public static final class Group {
private final int index;
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/Flavor.java b/config-provisioning/src/main/java/com/yahoo/config/provision/Flavor.java
index 2711406c216..d11d7137226 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/Flavor.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/Flavor.java
@@ -49,6 +49,11 @@ public class Flavor {
this(resources.toString(), resources, Optional.empty(), Type.DOCKER_CONTAINER, false, 0, resources.vcpu());
}
+ /** Creates a *host* flavor for testing */
+ public Flavor(String name, NodeResources resources) {
+ this(name, resources, Optional.empty(), Flavor.Type.VIRTUAL_MACHINE, true, 0, resources.vcpu());
+ }
+
private Flavor(String name,
NodeResources resources,
Optional<FlavorOverrides> flavorOverrides,
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/HostName.java b/config-provisioning/src/main/java/com/yahoo/config/provision/HostName.java
index 510122c2342..25c42884295 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/HostName.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/HostName.java
@@ -4,7 +4,7 @@ package com.yahoo.config.provision;
import java.util.Objects;
/**
- * Represents a host name
+ * A host name
*
* @author mortent
*/
@@ -18,12 +18,7 @@ public class HostName implements Comparable<HostName> {
public String value() { return name; }
- /**
- * Create a {@link HostName} with a given name.
- *
- * @param name Name
- * @return instance of {@link HostName}.
- */
+ /** Create a {@link HostName} with a given name */
public static HostName from(String name) {
return new HostName(name);
}
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java
index a9f031cae70..eb462c86f4f 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java
@@ -14,7 +14,7 @@ import java.util.Optional;
import java.util.stream.Collectors;
/**
- * All the flavors *configured* in this zone (i.e this should be called HostFlavors).
+ * All the flavors configured in this zone (i.e this should be called HostFlavors).
*
* @author bratseth
*/
diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
index 76eef33d6c0..aa3d6a2c0f8 100644
--- a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
+++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
@@ -48,7 +48,6 @@ public class IndexFacts {
static final String unionName = "unionOfAllKnown";
/** A search definition which contains the union of all settings. */
- @SuppressWarnings("deprecation")
private SearchDefinition unionSearchDefinition = new SearchDefinition(unionName);
private boolean frozen;
diff --git a/container-search/src/main/java/com/yahoo/search/Result.java b/container-search/src/main/java/com/yahoo/search/Result.java
index 4080b09f40b..ab48d5797b2 100644
--- a/container-search/src/main/java/com/yahoo/search/Result.java
+++ b/container-search/src/main/java/com/yahoo/search/Result.java
@@ -89,7 +89,6 @@ public final class Result extends com.yahoo.processing.Response implements Clone
* with a result. It should <b>always</b> be called when adding
* hits from a result, but there is no constraints on the order of the calls.
*/
- @SuppressWarnings("deprecation")
public void mergeWith(Result result) {
totalHitCount += result.getTotalHitCount();
deepHitCount += result.getDeepHitCount();
diff --git a/container-search/src/main/java/com/yahoo/search/federation/FederationResult.java b/container-search/src/main/java/com/yahoo/search/federation/FederationResult.java
index 5f1cfccf549..6243dc694c2 100644
--- a/container-search/src/main/java/com/yahoo/search/federation/FederationResult.java
+++ b/container-search/src/main/java/com/yahoo/search/federation/FederationResult.java
@@ -39,8 +39,8 @@ class FederationResult {
}
/**
- * Wait on each target for that targets timeout
- * On the worst case this is the same as waiting for the max target timeout,
+ * Wait on each target for that targets timeout.
+ * In the worst case this is the same as waiting for the max target timeout,
* in the average case it may be much better because lower timeout sources do not get to
* drive the timeout above their own timeout value.
* When this completes, results can be accessed from the TargetResults with no blocking
diff --git a/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java
index 421544b5b49..60c5d42c531 100644
--- a/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java
+++ b/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java
@@ -354,7 +354,7 @@ public class FederationSearcher extends ForkingSearcher {
}
private void warnIfUnresolvedSearchChains(List<UnresolvedSearchChainException> missingTargets,
- HitGroup errorHitGroup) {
+ HitGroup errorHitGroup) {
if (!missingTargets.isEmpty()) {
errorHitGroup.addError(missingSearchChainsErrorMessage(missingTargets));
}
@@ -492,9 +492,9 @@ public class FederationSearcher extends ForkingSearcher {
* TODO This is probably a dirty hack for bug 4711376. There are probably better ways.
* But I will leave that to trd-processing@
*
- * @param group The merging hitgroup to be updated if necessary
- * @param orderer The per provider hit orderer.
- * @return The hitorderer chosen
+ * @param group the merging hitgroup to be updated if necessary
+ * @param orderer the per provider hit orderer
+ * @return he hitorderer chosen
*/
private HitOrderer dirtyCopyIfModifiedOrderer(HitGroup group, HitOrderer orderer) {
if (orderer != null) {
diff --git a/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java b/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java
index 6fb6e4f0860..0565b1cff09 100644
--- a/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java
+++ b/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java
@@ -39,6 +39,7 @@ public class Defaults {
vespaPortConfigServerHttp = vespaPortConfigServerRpc + 1;
vespaPortConfigProxyRpc = findConfigProxyPort(vespaPortBase + 90);
}
+
static private String findVespaHome(String defHome) {
Optional<String> vespaHomeEnv = Optional.ofNullable(System.getenv("VESPA_HOME"));
if ( ! vespaHomeEnv.isPresent() || vespaHomeEnv.get().trim().isEmpty()) {
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java
index c9d7618b9d7..c04dca465a1 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java
@@ -35,6 +35,7 @@ import static com.google.common.base.Strings.isNullOrEmpty;
* @author gjoranv
*/
public class VespaMetrics {
+
private static final Logger log = Logger.getLogger(VespaMetrics.class.getPackage().getName());
public static final ConsumerId VESPA_CONSUMER_ID = toConsumerId("Vespa");
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/ValuesFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/ValuesFetcher.java
index ae0ef2fa57a..51bdae1aab3 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/ValuesFetcher.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/ValuesFetcher.java
@@ -21,6 +21,7 @@ import static ai.vespa.metricsproxy.metric.model.ConsumerId.toConsumerId;
* @author gjoranv
*/
public class ValuesFetcher {
+
private static final Logger log = Logger.getLogger(ValuesFetcher.class.getName());
public static final ConsumerId DEFAULT_PUBLIC_CONSUMER_ID = toConsumerId("default");
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/ApplicationMetricsRetriever.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/ApplicationMetricsRetriever.java
index c51970ce3ae..9ddd7885fcb 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/ApplicationMetricsRetriever.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/ApplicationMetricsRetriever.java
@@ -33,6 +33,7 @@ import static java.util.stream.Collectors.toMap;
* @author gjoranv
*/
public class ApplicationMetricsRetriever extends AbstractComponent {
+
private static final Logger log = Logger.getLogger(ApplicationMetricsRetriever.class.getName());
private static final int PARALLELISM = 20;
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java
index f2ee326029a..01cf6b19836 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/http/application/NodeMetricsClient.java
@@ -32,6 +32,7 @@ import static java.util.Collections.emptyList;
* @author gjoranv
*/
public class NodeMetricsClient {
+
private static final Logger log = Logger.getLogger(NodeMetricsClient.class.getName());
static final Duration METRICS_TTL = Duration.ofSeconds(30);
@@ -80,7 +81,6 @@ public class NodeMetricsClient {
return snapshotsRetrieved;
}
-
/**
* Convenience class for storing a metrics snapshot with its timestamp.
*/
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java
index 62de9649bb0..795d1005b10 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java
@@ -7,6 +7,7 @@ import java.util.Objects;
* @author gjoranv
*/
public class ConsumerId {
+
public final String id;
private ConsumerId(String id) { this.id = id; }
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java
index 481068f0df2..e07a67770bc 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java
@@ -2,6 +2,7 @@
package ai.vespa.metricsproxy.service;
class CpuJiffies {
+
private int cpuId;
private long jiffies;
@@ -37,4 +38,5 @@ class CpuJiffies {
public long getTotalJiffies() {
return jiffies;
}
+
}
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java
index 922a2a15ffd..9068be81b65 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java
@@ -20,6 +20,7 @@ import java.util.logging.Logger;
* @author bjorncs
*/
public abstract class HttpMetricFetcher {
+
private final static Logger log = Logger.getLogger(HttpMetricFetcher.class.getPackage().getName());
public final static String STATE_PATH = "/state/v1/";
// The call to apache will do 3 retries. As long as we check the services in series, we can't have this too high.
@@ -31,8 +32,8 @@ public abstract class HttpMetricFetcher {
/**
- * @param service The service to fetch metrics from
- * @param port The port to use
+ * @param service the service to fetch metrics from
+ * @param port the port to use
*/
HttpMetricFetcher(VespaService service, int port, String path) {
this.service = service;
@@ -86,4 +87,5 @@ public abstract class HttpMetricFetcher {
.build())
.build();
}
+
}
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java
index 379e5296bb8..c8fbc83eb59 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java
@@ -24,6 +24,7 @@ import java.util.logging.Logger;
* @author Eirik Nygaard
*/
public class SystemPoller {
+
final private static Logger log = Logger.getLogger(SystemPoller.class.getName());
private final int pollingIntervalSecs;
diff --git a/node-repository/src/main/config/node-repository.xml b/node-repository/src/main/config/node-repository.xml
index 274be6d572a..186f052a274 100644
--- a/node-repository/src/main/config/node-repository.xml
+++ b/node-repository/src/main/config/node-repository.xml
@@ -1,6 +1,8 @@
<!-- services.xml snippet for the node repository. Included in config server services.xml if the package is installed-->
<!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
<component id="com.yahoo.vespa.hosted.provision.provisioning.InfraDeployerImpl" bundle="node-repository"/>
+<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsFetcher" bundle="node-repository"/>
+<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb" bundle="node-repository"/>
<component id="com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner" bundle="node-repository" />
<component id="NodeRepository" class="com.yahoo.vespa.hosted.provision.NodeRepository" bundle="node-repository"/>
<component id="com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance" bundle="node-repository"/>
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index efb2a71264a..f881f888752 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -83,7 +83,7 @@ public final class Node {
this.reservedTo = Objects.requireNonNull(reservedTo, "reservedTo cannot be null");
if (state == State.active)
- requireNonEmpty(ipConfig.primary(), "An active node must have at least one valid IP address");
+ requireNonEmpty(ipConfig.primary(), "Active node " + hostname + " must have at least one valid IP address");
if (parentHostname.isPresent()) {
if (!ipConfig.pool().asSet().isEmpty()) throw new IllegalArgumentException("A child node cannot have an IP address pool");
@@ -375,8 +375,6 @@ public final class Node {
.deviation();
}
-
-
@Override
public boolean equals(Object o) {
if (this == o) return true;
@@ -436,6 +434,7 @@ public final class Node {
public static Set<State> allocatedStates() {
return Set.of(reserved, active, inactive, failed, parked);
}
+
}
/** The mean and mean deviation (squared difference) of a bunch of numbers */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
new file mode 100644
index 00000000000..71f7dc3701e
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -0,0 +1,185 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.CloudName;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Flavor;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
+import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceLimits;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+/**
+ * The autoscaler makes decisions about the flavor and node count that should be allocated to a cluster
+ * based on observed behavior.
+ *
+ * @author bratseth
+ */
+public class Autoscaler {
+
+ /*
+ TODO:
+ - Scale group size
+ - Have a better idea about whether we have sufficient information to make decisions
+ - Consider taking spikes/variance into account
+ - Measure observed regulation lag (startup+redistribution) into account when deciding regulation observation window
+ - Test AutoscalingMaintainer
+ - Scale by performance not just load+cost
+ */
+
+ private static final int minimumMeasurements = 500; // TODO: Per node instead? Also say something about interval?
+
+ /** What cost difference factor warrants reallocation? */
+ private static final double costDifferenceRatioWorthReallocation = 0.1;
+ /** What difference factor from ideal (for any resource) warrants a change? */
+ private static final double idealDivergenceWorthReallocation = 0.1;
+
+ // We only depend on the ratios between these values
+ private static final double cpuUnitCost = 12.0;
+ private static final double memoryUnitCost = 1.2;
+ private static final double diskUnitCost = 0.045;
+
+ private final HostResourcesCalculator hostResourcesCalculator;
+ private final NodeMetricsDb metricsDb;
+ private final NodeRepository nodeRepository;
+ private final NodeResourceLimits nodeResourceLimits;
+
+ public Autoscaler(HostResourcesCalculator hostResourcesCalculator,
+ NodeMetricsDb metricsDb,
+ NodeRepository nodeRepository) {
+ this.hostResourcesCalculator = hostResourcesCalculator;
+ this.metricsDb = metricsDb;
+ this.nodeRepository = nodeRepository;
+ this.nodeResourceLimits = new NodeResourceLimits(nodeRepository.zone());
+ }
+
+ public Optional<ClusterResources> autoscale(ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) {
+ if (clusterNodes.stream().anyMatch(node -> node.status().wantToRetire() ||
+ node.allocation().get().membership().retired() ||
+ node.allocation().get().isRemovable()))
+ return Optional.empty(); // Don't autoscale clusters that are in flux
+ ClusterResources currentAllocation = new ClusterResources(clusterNodes);
+ Optional<Double> cpuLoad = averageLoad(Resource.cpu, cluster, clusterNodes);
+ Optional<Double> memoryLoad = averageLoad(Resource.memory, cluster, clusterNodes);
+ Optional<Double> diskLoad = averageLoad(Resource.disk, cluster, clusterNodes);
+ if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) return Optional.empty();
+
+ Optional<ClusterResourcesWithCost> bestAllocation = findBestAllocation(cpuLoad.get(),
+ memoryLoad.get(),
+ diskLoad.get(),
+ currentAllocation,
+ cluster);
+ if (bestAllocation.isEmpty()) return Optional.empty();
+
+ if (closeToIdeal(Resource.cpu, cpuLoad.get()) &&
+ closeToIdeal(Resource.memory, memoryLoad.get()) &&
+ closeToIdeal(Resource.disk, diskLoad.get()) &&
+ similarCost(bestAllocation.get().cost(), currentAllocation.nodes() * costOf(currentAllocation.nodeResources())))
+ return Optional.empty(); // Avoid small, unnecessary changes
+ return bestAllocation.map(a -> a.clusterResources());
+ }
+
+ private Optional<ClusterResourcesWithCost> findBestAllocation(double cpuLoad, double memoryLoad, double diskLoad,
+ ClusterResources currentAllocation, ClusterSpec cluster) {
+ Optional<ClusterResourcesWithCost> bestAllocation = Optional.empty();
+ for (ResourceIterator i = new ResourceIterator(cpuLoad, memoryLoad, diskLoad, currentAllocation); i.hasNext(); ) {
+ ClusterResources allocation = i.next();
+ Optional<ClusterResourcesWithCost> allocatableResources = toAllocatableResources(allocation, cluster);
+ if (allocatableResources.isEmpty()) continue;
+ if (bestAllocation.isEmpty() || allocatableResources.get().cost() < bestAllocation.get().cost())
+ bestAllocation = allocatableResources;
+ }
+ return bestAllocation;
+ }
+
+ private boolean similarCost(double cost1, double cost2) {
+ return similar(cost1, cost2, costDifferenceRatioWorthReallocation);
+ }
+
+ private boolean closeToIdeal(Resource resource, double value) {
+ return similar(resource.idealAverageLoad(), value, idealDivergenceWorthReallocation);
+ }
+
+ private boolean similar(double r1, double r2, double threshold) {
+ return Math.abs(r1 - r2) / r1 < threshold;
+ }
+
+ /**
+ * Returns the smallest allocatable node resources larger than the given node resources,
+ * or empty if none available.
+ */
+ private Optional<ClusterResourcesWithCost> toAllocatableResources(ClusterResources resources, ClusterSpec cluster) {
+ if (allowsHostSharing(nodeRepository.zone().cloud())) {
+ // Return the requested resources, adjusted to be legal or empty if they cannot fit on existing hosts
+ NodeResources nodeResources = nodeResourceLimits.enlargeToLegal(resources.nodeResources(), cluster.type());
+ for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors())
+ if (flavor.resources().satisfies(nodeResources))
+ return Optional.of(new ClusterResourcesWithCost(resources.with(nodeResources),
+ costOf(nodeResources) * resources.nodes()));
+ return Optional.empty();
+ }
+ else {
+ // return the cheapest flavor satisfying the target resources, if any
+ double bestCost = Double.MAX_VALUE;
+ Optional<Flavor> bestFlavor = Optional.empty();
+ for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) {
+ if ( ! flavor.resources().satisfies(resources.nodeResources())) continue;
+ if (bestFlavor.isEmpty() || bestCost > costOf(flavor.resources())) {
+ bestFlavor = Optional.of(flavor);
+ bestCost = costOf(flavor);
+ }
+ }
+ if (bestFlavor.isEmpty())
+ return Optional.empty();
+ else
+ return Optional.of(new ClusterResourcesWithCost(resources.with(bestFlavor.get().resources()),
+ bestCost * resources.nodes()));
+ }
+ }
+
+ /**
+ * Returns the average load of this resource in the measurement window,
+ * or empty if we are not in a position to make decisions from these measurements at this time.
+ */
+ private Optional<Double> averageLoad(Resource resource, ClusterSpec cluster, List<Node> clusterNodes) {
+ NodeMetricsDb.Window window = metricsDb.getWindow(nodeRepository.clock().instant().minus(scalingWindow(cluster.type())),
+ resource,
+ clusterNodes.stream().map(Node::hostname).collect(Collectors.toList()));
+
+ if (window.measurementCount() < minimumMeasurements) return Optional.empty();
+ if (window.hostnames() != clusterNodes.size()) return Optional.empty(); // Regulate only when all nodes are measured
+
+ return Optional.of(window.average());
+ }
+
+ /** The duration of the window we need to consider to make a scaling decision */
+ private Duration scalingWindow(ClusterSpec.Type clusterType) {
+ if (clusterType.isContent()) return Duration.ofHours(12); // Ideally we should use observed redistribution time
+ return Duration.ofHours(12); // TODO: Measure much more often to get this down to minutes. And, ideally we should take node startup time into account
+ }
+
+ // TODO: Put this in zone config instead?
+ private boolean allowsHostSharing(CloudName cloudName) {
+ if (cloudName.value().equals("aws")) return false;
+ return true;
+ }
+
+ private double costOf(Flavor flavor) {
+ NodeResources chargedResources = hostResourcesCalculator.availableCapacityOf(flavor.name(), flavor.resources());
+ return costOf(chargedResources);
+ }
+
+ private double costOf(NodeResources resources) {
+ return resources.vcpu() * cpuUnitCost +
+ resources.memoryGb() * memoryUnitCost +
+ resources.diskGb() * diskUnitCost;
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java
new file mode 100644
index 00000000000..e068b4404d8
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java
@@ -0,0 +1,65 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.hosted.provision.Node;
+
+import java.util.List;
+import java.util.Objects;
+
+/** A description of the resources of a cluster */
+public class ClusterResources {
+
+ /** The node count in the cluster */
+ private final int nodes;
+
+ /** The number of node groups in the cluster */
+ private final int groups;
+
+ /** The resources of each node in the cluster */
+ private final NodeResources nodeResources;
+
+ public ClusterResources(List<Node> nodes) {
+ this(nodes.size(),
+ (int)nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(),
+ nodes.get(0).flavor().resources());
+ }
+
+ public ClusterResources(int nodes, int groups, NodeResources nodeResources) {
+ this.nodes = nodes;
+ this.groups = groups;
+ this.nodeResources = nodeResources;
+ }
+
+ /** Returns the total number of allocated nodes (over all groups) */
+ public int nodes() { return nodes; }
+ public int groups() { return groups; }
+ public NodeResources nodeResources() { return nodeResources; }
+
+ public ClusterResources with(NodeResources resources) {
+ return new ClusterResources(nodes, groups, resources);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) return true;
+ if ( ! (o instanceof ClusterResources)) return false;
+
+ ClusterResources other = (ClusterResources)o;
+ if (other.nodes != this.nodes) return false;
+ if (other.groups != this.groups) return false;
+ if (other.nodeResources != this.nodeResources) return false;
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(nodes, groups, nodeResources);
+ }
+
+ @Override
+ public String toString() {
+ return "cluster resources: " + nodes + " * " + nodeResources + (groups > 1 ? " in " + groups + " groups" : "");
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResourcesWithCost.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResourcesWithCost.java
new file mode 100644
index 00000000000..55b28ef3ce1
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResourcesWithCost.java
@@ -0,0 +1,26 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+/**
+ * @author bratseth
+ */
+public class ClusterResourcesWithCost {
+
+ private final ClusterResources resources;
+ private final double cost;
+
+ public ClusterResourcesWithCost(ClusterResources resources, double cost) {
+ this.resources = resources;
+ this.cost = cost;
+ }
+
+ public ClusterResources clusterResources() { return resources;}
+
+ public double cost() { return cost; }
+
+ @Override
+ public String toString() {
+ return "$" + cost + ": " + clusterResources();
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java
new file mode 100644
index 00000000000..a599606c314
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java
@@ -0,0 +1,74 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.slime.ArrayTraverser;
+import com.yahoo.slime.Inspector;
+import com.yahoo.slime.ObjectTraverser;
+import com.yahoo.slime.Slime;
+import com.yahoo.slime.SlimeUtils;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Consumes a response from the metrics/v2 API and populates the fields of this with the resulting values
+ *
+ * @author bratseth
+ */
+public class MetricsResponse {
+
+ private final List<NodeMetrics.MetricValue> metricValues = new ArrayList<>();
+
+ public MetricsResponse(byte[] response) {
+ this(SlimeUtils.jsonToSlime(response));
+ }
+
+ public MetricsResponse(String response) {
+ this(SlimeUtils.jsonToSlime(response));
+ }
+
+ public List<NodeMetrics.MetricValue> metrics() { return metricValues; }
+
+ private MetricsResponse(Slime response) {
+ Inspector root = response.get();
+ Inspector nodes = root.field("nodes");
+ nodes.traverse((ArrayTraverser)(__, node) -> consumeNode(node));
+ }
+
+ private void consumeNode(Inspector node) {
+ String hostname = node.field("hostname").asString();
+ consumeNodeMetrics(hostname, node.field("node"));
+ consumeServiceMetrics(hostname, node.field("services"));
+ }
+
+ private void consumeNodeMetrics(String hostname, Inspector node) {
+ long timestamp = node.field("timestamp").asLong();
+ Map<String, Double> values = consumeMetrics(node.field("metrics"));
+ for (Resource resource : Resource.values())
+ addMetricIfPresent(hostname, resource.metricName(), timestamp, values);
+ }
+
+ private void addMetricIfPresent(String hostname, String metricName, long timestamp, Map<String, Double> values) {
+ if (values.containsKey(metricName))
+ metricValues.add(new NodeMetrics.MetricValue(hostname, metricName, timestamp, values.get(metricName).floatValue()));
+ }
+
+ private void consumeServiceMetrics(String hostname, Inspector node) {
+ String name = node.field("name").asString();
+ long timestamp = node.field("timestamp").asLong();
+ Map<String, Double> values = consumeMetrics(node.field("metrics"));
+ }
+
+ private Map<String, Double> consumeMetrics(Inspector metrics) {
+ Map<String, Double> values = new HashMap<>();
+ metrics.traverse((ArrayTraverser) (__, item) -> consumeMetricsItem(item, values));
+ return values;
+ }
+
+ private void consumeMetricsItem(Inspector item, Map<String, Double> values) {
+ item.field("values").traverse((ObjectTraverser)(name, value) -> values.put(name, value.asDouble()));
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java
new file mode 100644
index 00000000000..97ac1e72be9
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java
@@ -0,0 +1,48 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.ApplicationId;
+
+import java.util.Collection;
+
+/**
+ * Interface to retrieve metrics on (tenant) nodes.
+ *
+ * @author bratseth
+ */
+public interface NodeMetrics {
+
+ /**
+ * Fetches metrics for an application. This call may be expensive.
+ *
+ * @param application the application to fetch metrics from
+ */
+ Collection<MetricValue> fetchMetrics(ApplicationId application);
+
+ final class MetricValue {
+
+ private final String hostname;
+ private final String name;
+ private long timestamp;
+ private final float value;
+
+ public MetricValue(String hostname, String name, long timestamp, float value) {
+ this.hostname = hostname;
+ this.name = name;
+ this.timestamp = timestamp;
+ this.value = value;
+ }
+
+ public String hostname() { return hostname; }
+ public String name() { return name; }
+ public long timestamp() { return timestamp; }
+ public float value() { return value; }
+
+ @Override
+ public String toString() {
+ return "metric value " + name + ": " + value + " at " + timestamp + " for " + hostname;
+ }
+
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java
new file mode 100644
index 00000000000..14a35e3efbc
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java
@@ -0,0 +1,169 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * An in-memory time-series "database" of node metrics.
+ * Thread model: One writer, many readers.
+ *
+ * @author bratseth
+ */
+public class NodeMetricsDb {
+
+ private static final Duration dbWindow = Duration.ofHours(24);
+
+ /** Measurements by key. Each list of measurements is sorted by increasing timestamp */
+ private Map<MeasurementKey, List<Measurement>> db = new HashMap<>();
+
+ /** Lock all access for now since we modify lists inside a map */
+ private final Object lock = new Object();
+
+ /** Add a measurement to this */
+ public void add(Collection<NodeMetrics.MetricValue> metricValues) {
+ synchronized (lock) {
+ for (var value : metricValues) {
+ List<Measurement> measurements = db.computeIfAbsent(new MeasurementKey(value.hostname(),
+ Resource.fromMetric(value.name())),
+ (__) -> new ArrayList<>());
+ measurements.add(new Measurement(value.timestamp(), value.value()));
+ }
+ }
+ }
+
+ /** Must be called intermittently (as long as add is called) to gc old measurements */
+ public void gc(Clock clock) {
+ synchronized (lock) {
+ // TODO: We may need to do something more complicated to avoid spending too much memory to
+ // lower the measurement interval (see NodeRepositoryMaintenance)
+ // Each measurement is Object + long + float = 16 + 8 + 4 = 28 bytes
+ // 24 hours with 1k nodes and 3 resources and 1 measurement/sec is about 10Gb
+
+ long oldestTimestamp = clock.instant().minus(dbWindow).toEpochMilli();
+ for (Iterator<List<Measurement>> i = db.values().iterator(); i.hasNext(); ) {
+ List<Measurement> measurements = i.next();
+
+ while (!measurements.isEmpty() && measurements.get(0).timestamp < oldestTimestamp)
+ measurements.remove(0);
+
+ if (measurements.isEmpty())
+ i.remove();
+ }
+ }
+ }
+
+ /** Returns a window within which we can ask for specific information from this db */
+ public Window getWindow(Instant startTime, Resource resource, List<String> hostnames) {
+ return new Window(startTime, resource, hostnames);
+ }
+
+ public class Window {
+
+ private final long startTime;
+ private List<MeasurementKey> keys;
+
+ private Window(Instant startTime, Resource resource, List<String> hostnames) {
+ this.startTime = startTime.toEpochMilli();
+ keys = hostnames.stream().map(hostname -> new MeasurementKey(hostname, resource)).collect(Collectors.toList());
+ }
+
+ public int measurementCount() {
+ synchronized (lock) {
+ return (int) keys.stream()
+ .flatMap(key -> db.getOrDefault(key, List.of()).stream())
+ .filter(measurement -> measurement.timestamp >= startTime)
+ .count();
+ }
+ }
+
+ /** Returns the count of hostnames which have measurements in this window */
+ public int hostnames() {
+ synchronized (lock) {
+ int count = 0;
+ for (MeasurementKey key : keys) {
+ List<Measurement> measurements = db.get(key);
+ if (measurements == null || measurements.isEmpty()) continue;
+
+ if (measurements.get(measurements.size() - 1).timestamp >= startTime)
+ count++;
+ }
+ return count;
+ }
+ }
+
+ public double average() {
+ synchronized (lock) {
+ double sum = 0;
+ int count = 0;
+ for (MeasurementKey key : keys) {
+ List<Measurement> measurements = db.get(key);
+ if (measurements == null) continue;
+
+ int index = measurements.size() - 1;
+ while (index >= 0 && measurements.get(index).timestamp >= startTime) {
+ sum += measurements.get(index).value;
+ count++;
+
+ index--;
+ }
+ }
+ return sum / count;
+ }
+ }
+
+ }
+
+ private static class MeasurementKey {
+
+ private final String hostname;
+ private final Resource resource;
+
+ public MeasurementKey(String hostname, Resource resource) {
+ this.hostname = hostname;
+ this.resource = resource;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(hostname, resource);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if ( ! (o instanceof MeasurementKey)) return false;
+ MeasurementKey other = (MeasurementKey)o;
+ if ( ! this.hostname.equals(other.hostname)) return false;
+ if ( ! this.resource.equals(other.resource)) return false;
+ return true;
+ }
+
+ }
+
+ private static class Measurement {
+
+ /** The time of this measurement in epoch millis */
+ private final long timestamp;
+
+ /** The measured value */
+ private final float value;
+
+ public Measurement(long timestamp, float value) {
+ this.timestamp = timestamp;
+ this.value = value;
+ }
+
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcher.java
new file mode 100644
index 00000000000..54d8eac238f
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcher.java
@@ -0,0 +1,112 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import ai.vespa.util.http.VespaHttpClientBuilder;
+import com.google.inject.Inject;
+import com.yahoo.component.AbstractComponent;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.orchestrator.HostNameNotFoundException;
+import com.yahoo.vespa.orchestrator.Orchestrator;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.BasicResponseHandler;
+import org.apache.http.impl.client.CloseableHttpClient;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Collection;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Fetches node metrics over the metrics/v2 API
+ *
+ * @author bratseth
+ */
+public class NodeMetricsFetcher extends AbstractComponent implements NodeMetrics {
+
+ private static final Logger log = Logger.getLogger(NodeMetricsFetcher.class.getName());
+
+ private static final String apiPath = "/metrics/v2/values";
+
+ private final NodeRepository nodeRepository;
+ private final Orchestrator orchestrator;
+ private final HttpClient httpClient;
+
+ @Inject
+ public NodeMetricsFetcher(NodeRepository nodeRepository, Orchestrator orchestrator) {
+ this(nodeRepository, orchestrator, new ApacheHttpClient());
+ }
+
+ NodeMetricsFetcher(NodeRepository nodeRepository, Orchestrator orchestrator, HttpClient httpClient) {
+ this.nodeRepository = nodeRepository;
+ this.orchestrator = orchestrator;
+ this.httpClient = httpClient;
+ }
+
+ @Override
+ public Collection<MetricValue> fetchMetrics(ApplicationId application) {
+ Node metricsV2Container = nodeRepository.list()
+ .owner(application)
+ .state(Node.State.active)
+ .container()
+ .filter(node -> expectedUp(node))
+ .asList().get(0);
+ String url = "http://" + metricsV2Container.hostname() + ":" + 4080 + apiPath + "?consumer=vespa-consumer-metrics";
+ String response = httpClient.get(url);
+ return new MetricsResponse(response).metrics();
+ }
+
+ @Override
+ public void deconstruct() {
+ httpClient.close();
+ }
+
+ private boolean expectedUp(Node node) {
+ try {
+ return ! orchestrator.getNodeStatus(new HostName(node.hostname())).isSuspended();
+ }
+ catch (HostNameNotFoundException e) {
+ return false;
+ }
+ }
+
+ /** The simplest possible http client interface */
+ public interface HttpClient {
+
+ String get(String url);
+ void close();
+
+ }
+
+ /** Implements the HttpClient interface by delegating to an Apache HTTP client */
+ public static class ApacheHttpClient implements HttpClient {
+
+ private final CloseableHttpClient httpClient = VespaHttpClientBuilder.createWithBasicConnectionManager().build();
+
+ @Override
+ public String get(String url) {
+ try {
+ return httpClient.execute(new HttpGet(url), new BasicResponseHandler());
+ }
+ catch (IOException e) {
+ throw new UncheckedIOException("Could not get " + url, e);
+ }
+ }
+
+ @Override
+ public void close() {
+ try {
+ httpClient.close();
+ }
+ catch (IOException e) {
+ log.log(Level.WARNING, "Exception deconstructing", e);
+ }
+ }
+
+
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java
new file mode 100644
index 00000000000..9c85ca870d5
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java
@@ -0,0 +1,44 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.NodeResources;
+
+/**
+ * A resource subject to autoscaling
+ *
+ * @author bratseth
+ */
+public enum Resource {
+
+ cpu {
+ String metricName() { return "cpu.util"; }
+ double idealAverageLoad() { return 0.2; }
+ double valueFrom(NodeResources resources) { return resources.vcpu(); }
+ },
+
+ memory {
+ String metricName() { return "memory.util"; }
+ double idealAverageLoad() { return 0.7; }
+ double valueFrom(NodeResources resources) { return resources.memoryGb(); }
+ },
+
+ disk {
+ String metricName() { return "disk.util"; }
+ double idealAverageLoad() { return 0.7; }
+ double valueFrom(NodeResources resources) { return resources.diskGb(); }
+ };
+
+ abstract String metricName();
+
+ /** The load we should have of this resource on average, when one node in the cluster is down */
+ abstract double idealAverageLoad();
+
+ abstract double valueFrom(NodeResources resources);
+
+ public static Resource fromMetric(String metricName) {
+ for (Resource resource : values())
+ if (resource.metricName().equals(metricName)) return resource;
+ throw new IllegalArgumentException("Metric '" + metricName + "' does not map to a resource");
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java
new file mode 100644
index 00000000000..464fe570b95
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java
@@ -0,0 +1,104 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.NodeResources;
+
+/**
+ * Provides iteration over possible cluster resource allocations given a target total load
+ * and current groups/nodes allocation.
+ */
+public class ResourceIterator {
+
+ // Configured min and max nodes TODO: These should come from the application package
+ private static final int minimumNodesPerCluster = 3; // Since this is with redundancy it cannot be lower than 2
+ private static final int maximumNodesPerCluster = 150;
+
+ // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component
+ // proportional to document count. We must account for this when comparing configurations with more or fewer nodes.
+ // TODO: Measure this, and only take it into account with queries
+ private static final double fixedCpuCostFraction = 0.1;
+
+ // Describes the observed state
+ private final ClusterResources allocation;
+ private final double cpuLoad;
+ private final double memoryLoad;
+ private final double diskLoad;
+ private final int groupSize;
+
+ // Derived from the observed state
+ private final int nodeIncrement;
+ private final boolean singleGroupMode;
+
+ // Iterator state
+ private int currentNodes;
+
+ public ResourceIterator(double cpuLoad, double memoryLoad, double diskLoad, ClusterResources currentAllocation) {
+ this.cpuLoad = cpuLoad;
+ this.memoryLoad = memoryLoad;
+ this.diskLoad = diskLoad;
+
+ // ceil: If the division does not produce a whole number we assume some node is missing
+ groupSize = (int)Math.ceil((double)currentAllocation.nodes() / currentAllocation.groups());
+ allocation = currentAllocation;
+
+ // What number of nodes is it effective to add or remove at the time from this cluster?
+ // This is the group size, since we (for now) assume the group size is decided by someone wiser than us
+ // and we decide the number of groups.
+ // The exception is when we only have one group, where we can add and remove single nodes in it.
+ singleGroupMode = currentAllocation.groups() == 1;
+ nodeIncrement = singleGroupMode ? 1 : groupSize;
+
+ currentNodes = currentAllocation.nodes();
+ while (currentNodes - nodeIncrement >= minimumNodesPerCluster
+ && (singleGroupMode || currentNodes - nodeIncrement > groupSize)) // group level redundancy
+ currentNodes -= nodeIncrement;
+ }
+
+ public ClusterResources next() {
+ int nodesWithRedundancy = currentNodes - (singleGroupMode ? 1 : groupSize);
+ ClusterResources next = new ClusterResources(currentNodes,
+ singleGroupMode ? 1 : currentNodes / groupSize,
+ resourcesFor(nodesWithRedundancy));
+ currentNodes += nodeIncrement;
+ return next;
+ }
+
+ public boolean hasNext() {
+ return currentNodes <= maximumNodesPerCluster;
+ }
+
+ /**
+ * For the observed load this instance is initialized with, returns the resources needed per node to be at
+ * ideal load given a target node count
+ */
+ private NodeResources resourcesFor(int nodeCount) {
+ // Cpu: Scales with cluster size (TODO: Only reads, writes scales with group size)
+ // Memory and disk: Scales with group size
+
+ double cpu, memory, disk;
+ if (singleGroupMode) {
+ // The fixed cost portion of cpu does not scale with changes to the node count
+ // TODO: Only for the portion of cpu consumed by queries
+ double totalCpu = totalUsage(Resource.cpu, cpuLoad);
+ cpu = fixedCpuCostFraction * totalCpu / groupSize / Resource.cpu.idealAverageLoad() +
+ (1 - fixedCpuCostFraction) * totalCpu / nodeCount / Resource.cpu.idealAverageLoad();
+ memory = totalGroupUsage(Resource.memory, memoryLoad) / nodeCount / Resource.memory.idealAverageLoad();
+ disk = totalGroupUsage(Resource.disk, diskLoad) / nodeCount / Resource.disk.idealAverageLoad();
+ }
+ else {
+ cpu = totalUsage(Resource.cpu, cpuLoad) / nodeCount / Resource.cpu.idealAverageLoad();
+ memory = totalGroupUsage(Resource.memory, memoryLoad) / groupSize / Resource.memory.idealAverageLoad();
+ disk = totalGroupUsage(Resource.disk, diskLoad) / groupSize / Resource.disk.idealAverageLoad();
+ }
+ return allocation.nodeResources().withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk);
+ }
+
+ private double totalUsage(Resource resource, double load) {
+ return load * resource.valueFrom(allocation.nodeResources()) * allocation.nodes();
+ }
+
+ private double totalGroupUsage(Resource resource, double load) {
+ return load * resource.valueFrom(allocation.nodeResources()) * groupSize;
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
new file mode 100644
index 00000000000..f3c1d8603b3
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -0,0 +1,62 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Deployer;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler;
+import com.yahoo.vespa.hosted.provision.autoscale.ClusterResources;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb;
+import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+/**
+ * Maintainer making automatic scaling decisions
+ *
+ * @author bratseth
+ */
+public class AutoscalingMaintainer extends Maintainer {
+
+ private final Autoscaler autoscaler;
+ private final Deployer deployer;
+
+ public AutoscalingMaintainer(NodeRepository nodeRepository,
+ HostResourcesCalculator hostResourcesCalculator,
+ NodeMetricsDb metricsDb,
+ Deployer deployer,
+ Duration interval) {
+ super(nodeRepository, interval);
+ this.autoscaler = new Autoscaler(hostResourcesCalculator, metricsDb, nodeRepository);
+ this.deployer = deployer;
+ }
+
+ @Override
+ protected void maintain() {
+ if ( ! nodeRepository().zone().environment().isProduction()) return;
+
+ activeNodesByApplication().forEach((applicationId, nodes) -> autoscale(applicationId, nodes));
+ }
+
+ private void autoscale(ApplicationId application, List<Node> applicationNodes) {
+ MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, nodeRepository());
+ if ( ! deployment.isValid()) return; // Another config server will consider this application
+ nodesByCluster(applicationNodes).forEach((clusterSpec, clusterNodes) -> {
+ Optional<ClusterResources> target = autoscaler.autoscale(application, clusterSpec, clusterNodes);
+ target.ifPresent(t -> log.info("Autoscale: Application " + application + " cluster " + clusterSpec +
+ " from " + applicationNodes.size() + " * " + applicationNodes.get(0).flavor().resources() +
+ " to " + t.nodes() + " * " + t.nodeResources()));
+ });
+ }
+
+ private Map<ClusterSpec, List<Node>> nodesByCluster(List<Node> applicationNodes) {
+ return applicationNodes.stream().collect(Collectors.groupingBy(n -> n.allocation().get().membership().cluster()));
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java
index 0d5a8587902..27fba9e8f8e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java
@@ -2,17 +2,22 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.component.AbstractComponent;
+import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.HostName;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
+import java.util.Map;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
+import java.util.stream.Collectors;
/**
* A maintainer is some job which runs at a fixed rate to perform some maintenance task on the node repo.
@@ -75,6 +80,12 @@ public abstract class Maintainer extends AbstractComponent implements Runnable {
private String name() { return this.getClass().getSimpleName(); }
+ /** A utility to group active tenant applications by application */
+ protected Map<ApplicationId, List<Node>> activeNodesByApplication() {
+ return nodeRepository().list().nodeType(NodeType.tenant).state(Node.State.active).asList()
+ .stream().collect(Collectors.groupingBy(n -> n.allocation().get().owner()));
+ }
+
static long staggeredDelay(List<HostName> cluster, HostName host, Instant now, Duration interval) {
if ( ! cluster.contains(host))
return interval.toMillis();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
new file mode 100644
index 00000000000..178e8385008
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
@@ -0,0 +1,49 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb;
+
+import java.time.Duration;
+import java.util.logging.Level;
+
+/**
+ * Maintainer which keeps the node metric db up to date by periodically fetching metrics from all
+ * active nodes.
+ *
+ * @author bratseth
+ */
+public class NodeMetricsDbMaintainer extends Maintainer {
+
+ private static final int maxWarningsPerInvocation = 2;
+
+ private final NodeMetrics nodeMetrics;
+ private final NodeMetricsDb nodeMetricsDb;
+
+ public NodeMetricsDbMaintainer(NodeRepository nodeRepository,
+ NodeMetrics nodeMetrics,
+ NodeMetricsDb nodeMetricsDb,
+ Duration interval) {
+ super(nodeRepository, interval);
+ this.nodeMetrics = nodeMetrics;
+ this.nodeMetricsDb = nodeMetricsDb;
+ }
+
+ @Override
+ protected void maintain() {
+ int warnings = 0;
+ for (ApplicationId application : activeNodesByApplication().keySet()) {
+ try {
+ nodeMetricsDb.add(nodeMetrics.fetchMetrics(application));
+ }
+ catch (Exception e) {
+ if (warnings++ < maxWarningsPerInvocation)
+ log.log(Level.WARNING, "Could not update metrics for " + application, e);
+ }
+ }
+ nodeMetricsDb.gc(nodeRepository().clock());
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 37620e17a95..ecc550527fc 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -8,9 +8,11 @@ import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostLivenessTracker;
import com.yahoo.config.provision.InfraDeployer;
import com.yahoo.config.provision.Zone;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
@@ -48,22 +50,25 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final CapacityReportMaintainer capacityReportMaintainer;
private final OsUpgradeActivator osUpgradeActivator;
private final Rebalancer rebalancer;
+ private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer;
+ private final AutoscalingMaintainer autoscalingMaintainer;
@SuppressWarnings("unused")
@Inject
public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer,
HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor,
Zone zone, Orchestrator orchestrator, Metric metric,
- ProvisionServiceProvider provisionServiceProvider,
- FlagSource flagSource) {
+ ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource,
+ NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) {
this(nodeRepository, deployer, infraDeployer, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(),
- orchestrator, metric, provisionServiceProvider, flagSource);
+ orchestrator, metric, provisionServiceProvider, flagSource, nodeMetrics, nodeMetricsDb);
}
public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer,
HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor,
Zone zone, Clock clock, Orchestrator orchestrator, Metric metric,
- ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource) {
+ ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource,
+ NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) {
DefaultTimes defaults = new DefaultTimes(zone);
nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, defaults.failGrace, clock, orchestrator, throttlePolicyFromEnv().orElse(defaults.throttlePolicy), metric);
@@ -85,6 +90,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, defaults.capacityReportInterval);
osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval);
rebalancer = new Rebalancer(deployer, nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), provisionServiceProvider.getHostProvisioner(), metric, clock, defaults.rebalancerInterval);
+ nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval);
+ autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), nodeMetricsDb, deployer, defaults.autoscalingInterval);
// The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now
infrastructureProvisioner.maintainButThrowOnException();
@@ -109,6 +116,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
dynamicProvisioningMaintainer.ifPresent(Maintainer::deconstruct);
osUpgradeActivator.deconstruct();
rebalancer.deconstruct();
+ nodeMetricsDbMaintainer.deconstruct();
+ autoscalingMaintainer.deconstruct();
}
private static Optional<NodeFailer.ThrottlePolicy> throttlePolicyFromEnv() {
@@ -149,6 +158,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration dynamicProvisionerInterval;
private final Duration osUpgradeActivatorInterval;
private final Duration rebalancerInterval;
+ private final Duration nodeMetricsCollectionInterval;
+ private final Duration autoscalingInterval;
private final NodeFailer.ThrottlePolicy throttlePolicy;
@@ -169,6 +180,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
dynamicProvisionerInterval = Duration.ofMinutes(5);
osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5);
rebalancerInterval = Duration.ofMinutes(40);
+ nodeMetricsCollectionInterval = Duration.ofMinutes(1);
+ autoscalingInterval = Duration.ofMinutes(5);
if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) {
inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java
index 7c5ff35878b..179d7f2703c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java
@@ -19,11 +19,15 @@ import java.util.Locale;
public class CapacityPolicies {
private final Zone zone;
+
+ private final NodeResourceLimits nodeResourceLimits;
+
/* Deployments must match 1-to-1 the advertised resources of a physical host */
private final boolean isUsingAdvertisedResources;
public CapacityPolicies(Zone zone) {
this.zone = zone;
+ this.nodeResourceLimits = new NodeResourceLimits(zone);
this.isUsingAdvertisedResources = zone.cloud().value().equals("aws");
}
@@ -64,7 +68,7 @@ public class CapacityPolicies {
}
private void ensureSufficientResources(NodeResources resources, ClusterSpec cluster) {
- double minMemoryGb = minMemoryGb(cluster.type());
+ double minMemoryGb = nodeResourceLimits.minMemoryGb(cluster.type());
if (resources.memoryGb() >= minMemoryGb) return;
throw new IllegalArgumentException(String.format(Locale.ENGLISH,
@@ -72,12 +76,6 @@ public class CapacityPolicies {
minMemoryGb, cluster.type().name(), cluster.id().value(), resources.memoryGb()));
}
- private int minMemoryGb(ClusterSpec.Type clusterType) {
- if (zone.system() == SystemName.dev) return 1; // Allow small containers in dev system
- if (clusterType == ClusterSpec.Type.admin) return 2;
- return 4;
- }
-
private NodeResources defaultNodeResources(ClusterSpec.Type clusterType) {
if (clusterType == ClusterSpec.Type.admin) {
if (zone.system() == SystemName.dev) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
index 5753bbb3c5a..af6fa8edf64 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
@@ -64,7 +64,6 @@ public class GroupPreparer {
.with(FetchVector.Dimension.APPLICATION_ID, application.serializedForm())
.value();
boolean allocateFully = dynamicProvisioningEnabled && preprovisionCapacityFlag.value().isEmpty();
-
try (Mutex lock = nodeRepository.lock(application)) {
// Lock ready pool to ensure that the same nodes are not simultaneously allocated by others
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
index 394549e4141..0423f762f2b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
@@ -18,7 +18,7 @@ public interface HostProvisioner {
/**
* Schedule provisioning of a given number of hosts.
*
- * @param provisionIndexes List of unique provision indexes which will be used to generate the node hostnames
+ * @param provisionIndexes list of unique provision indexes which will be used to generate the node hostnames
* on the form of <code>[prefix][index].[domain]</code>
* @param resources the resources needed per node
* @param applicationId id of the application that will own the provisioned host
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index ebd6a01e61f..c92f7889496 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -390,4 +390,5 @@ class NodeAllocation {
return count;
}
}
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java
new file mode 100644
index 00000000000..ca04bf66ce3
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java
@@ -0,0 +1,32 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.provisioning;
+
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.SystemName;
+import com.yahoo.config.provision.Zone;
+
+/**
+ * Defines the resource limits for nodes in various zones
+ *
+ * @author bratseth
+ */
+public class NodeResourceLimits {
+
+ private final Zone zone;
+
+ public NodeResourceLimits(Zone zone) {
+ this.zone = zone;
+ }
+
+ public int minMemoryGb(ClusterSpec.Type clusterType) {
+ if (zone.system() == SystemName.dev) return 1; // Allow small containers in dev system
+ if (clusterType == ClusterSpec.Type.admin) return 2;
+ return 4;
+ }
+
+ public NodeResources enlargeToLegal(NodeResources resources, ClusterSpec.Type clusterType) {
+ return resources.withMemoryGb(Math.max(minMemoryGb(clusterType), resources.memoryGb()));
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java
index 49d0ba5cf70..d26accd7a84 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java
@@ -25,6 +25,8 @@ public class ContainerConfig {
" <component id='com.yahoo.vespa.hosted.provision.testutils.ServiceMonitorStub'/>\n" +
" <component id='com.yahoo.vespa.hosted.provision.testutils.MockDuperModel'/>\n" +
" <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeFlavors'/>\n" +
+ " <component id='com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb'/>\n" +
+ " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeMetrics'/>\n" +
" <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository'/>\n" +
" <component id='com.yahoo.vespa.hosted.provision.testutils.MockProvisionServiceProvider'/>\n" +
" <component id='com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance'/>\n" +
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java
index 915ef0d9125..e7ebf049e51 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java
@@ -18,6 +18,7 @@ import java.util.concurrent.ConcurrentHashMap;
* @author hakonhall
*/
public class MockDuperModel implements DuperModelInfraApi {
+
private final Map<ApplicationId, InfraApplicationApi> supportedInfraApps = new HashMap<>();
private final ConcurrentHashMap<ApplicationId, List<HostName>> activeApps = new ConcurrentHashMap<>();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java
new file mode 100644
index 00000000000..d5397aa421c
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java
@@ -0,0 +1,20 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.testutils;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+/**
+ * @author bratseth
+ */
+public class MockNodeMetrics implements NodeMetrics {
+
+ @Override
+ public Collection<MetricValue> fetchMetrics(ApplicationId application) {
+ return new ArrayList<>();
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java
index ab813ddeb5a..95555185292 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java
@@ -59,7 +59,7 @@ public class NodeRepositoryTester {
public Node addNode(String id, String hostname, String parentHostname, String flavor, NodeType type) {
Node node = nodeRepository.createNode(id, hostname, Optional.of(parentHostname),
- nodeFlavors.getFlavorOrThrow(flavor), type);
+ nodeFlavors.getFlavorOrThrow(flavor), type);
return nodeRepository.addNodes(Collections.singletonList(node)).get(0);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
new file mode 100644
index 00000000000..fd0517a6e50
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -0,0 +1,147 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.CloudName;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.Flavor;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.SystemName;
+import com.yahoo.config.provision.Zone;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author bratseth
+ */
+public class AutoscalingTest {
+
+ @Test
+ public void testAutoscalingSingleGroup() {
+ NodeResources resources = new NodeResources(3, 100, 100, 1);
+ AutoscalingTester tester = new AutoscalingTester(resources);
+
+ ApplicationId application1 = tester.applicationId("application1");
+ ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
+
+ // deploy
+ tester.deploy(application1, cluster1, 5, 1, resources);
+
+ assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1);
+ assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1);
+ ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high",
+ 15, 1, 1.3, 28.6, 28.6,
+ tester.autoscale(application1, cluster1));
+
+ tester.deploy(application1, cluster1, scaledResources);
+ assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.deactivateRetired(application1, cluster1, scaledResources);
+ tester.addMeasurements(Resource.cpu, 0.8f, 1f, 3, application1);
+ assertTrue("Load change is large, but insufficient measurements for new config -> No change",
+ tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.addMeasurements(Resource.cpu, 0.19f, 1f, 100, application1);
+ assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1));
+
+ tester.addMeasurements(Resource.cpu, 0.1f, 1f, 120, application1);
+ tester.assertResources("Scaling down since resource usage has gone down significantly",
+ 26, 1, 0.6, 16.0, 16.0,
+ tester.autoscale(application1, cluster1));
+ }
+
+ @Test
+ public void testAutoscalingGroupSize1() {
+ NodeResources resources = new NodeResources(3, 100, 100, 1);
+ AutoscalingTester tester = new AutoscalingTester(resources);
+
+ ApplicationId application1 = tester.applicationId("application1");
+ ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
+
+ // deploy
+ tester.deploy(application1, cluster1, 5, 5, resources);
+ tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1);
+ tester.assertResources("Scaling up since resource usage is too high",
+ 7, 7, 2.5, 80.0, 80.0,
+ tester.autoscale(application1, cluster1));
+ }
+
+ @Test
+ public void testAutoscalingGroupSize3() {
+ NodeResources resources = new NodeResources(3, 100, 100, 1);
+ AutoscalingTester tester = new AutoscalingTester(resources);
+
+ ApplicationId application1 = tester.applicationId("application1");
+ ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
+
+ // deploy
+ tester.deploy(application1, cluster1, 6, 2, resources);
+ tester.addMeasurements(Resource.cpu, 0.22f, 1f, 120, application1);
+ tester.assertResources("Scaling up since resource usage is too high",
+ 9, 3, 2.7, 83.3, 83.3,
+ tester.autoscale(application1, cluster1));
+ }
+
+ @Test
+ public void testAutoscalingAvoidsIllegalConfigurations() {
+ NodeResources resources = new NodeResources(3, 100, 100, 1);
+ AutoscalingTester tester = new AutoscalingTester(resources);
+
+ ApplicationId application1 = tester.applicationId("application1");
+ ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
+
+ // deploy
+ tester.deploy(application1, cluster1, 6, 1, resources);
+ tester.addMeasurements(Resource.memory, 0.02f, 1f, 120, application1);
+ tester.assertResources("Scaling down",
+ 6, 1, 3.0, 4.0, 100.0,
+ tester.autoscale(application1, cluster1));
+ }
+
+ @Test
+ public void testAutoscalingAws() {
+ List<Flavor> flavors = new ArrayList<>();
+ flavors.add(new Flavor("aws-xlarge", new NodeResources(3, 200, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote)));
+ flavors.add(new Flavor("aws-large", new NodeResources(3, 150, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote)));
+ flavors.add(new Flavor("aws-medium", new NodeResources(3, 100, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote)));
+ flavors.add(new Flavor("aws-small", new NodeResources(3, 80, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote)));
+ AutoscalingTester tester = new AutoscalingTester(new Zone(CloudName.from("aws"), SystemName.main,
+ Environment.prod, RegionName.from("us-east")),
+ flavors);
+
+ ApplicationId application1 = tester.applicationId("application1");
+ ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
+
+ // deploy
+ tester.deploy(application1, cluster1, 5, 1, new NodeResources(3, 100, 100, 1));
+
+ tester.addMeasurements(Resource.memory, 0.9f, 0.6f, 120, application1);
+ ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high." +
+ "Scaling flavor not count since the latter is more expensive due to " +
+ "memory charged but taken by aws, see MockHostResourcesCalculator",
+ 5, 1, 3, 150, 100,
+ tester.autoscale(application1, cluster1));
+
+ tester.deploy(application1, cluster1, scaledResources);
+ tester.deactivateRetired(application1, cluster1, scaledResources);
+
+ tester.addMeasurements(Resource.memory, 0.3f, 0.6f, 1000, application1);
+ System.out.println("Low memory usage");
+ tester.assertResources("Scaling down since resource usage has gone down",
+ 4, 1, 3, 100, 100,
+ tester.autoscale(application1, cluster1));
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
new file mode 100644
index 00000000000..f15b7e4220b
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -0,0 +1,248 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.Flavor;
+import com.yahoo.config.provision.HostSpec;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.Zone;
+import com.yahoo.config.provisioning.FlavorsConfig;
+import com.yahoo.test.ManualClock;
+import com.yahoo.transaction.Mutex;
+import com.yahoo.vespa.flags.FlagSource;
+import com.yahoo.vespa.flags.Flags;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.node.IP;
+import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException;
+import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
+import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
+import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost;
+import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+class AutoscalingTester {
+
+ private final ProvisioningTester provisioningTester;
+ private final Autoscaler autoscaler;
+ private final NodeMetricsDb db;
+ private final MockHostResourcesCalculator hostResourcesCalculator;
+
+ /** Creates an autoscaling tester with a single host type ready */
+ public AutoscalingTester(NodeResources hostResources) {
+ this(new Zone(Environment.prod, RegionName.from("us-east")), null, null, asConfig(hostResources));
+ provisioningTester.makeReadyNodes(20, "hostFlavor", NodeType.host, 8); // "hostFlavor" generated by asConfig
+ provisioningTester.deployZoneApp();
+ }
+
+ public AutoscalingTester(Zone zone, List<Flavor> flavors) {
+ this(zone,
+ new MockHostProvisioner(flavors),
+ new InMemoryFlagSource().withBooleanFlag(Flags.ENABLE_DYNAMIC_PROVISIONING.id(), true),
+ asConfig(flavors));
+ }
+
+ private AutoscalingTester(Zone zone, MockHostProvisioner hostProvisioner, FlagSource flagSource, FlavorsConfig flavorsConfig) {
+ provisioningTester = new ProvisioningTester.Builder().zone(zone)
+ .flavorsConfig(flavorsConfig)
+ .hostProvisioner(hostProvisioner)
+ .flagSource(flagSource)
+ .build();
+
+ hostResourcesCalculator = new MockHostResourcesCalculator(zone);
+ db = new NodeMetricsDb();
+ autoscaler = new Autoscaler(hostResourcesCalculator, db, nodeRepository());
+ }
+
+ public ApplicationId applicationId(String applicationName) {
+ return ApplicationId.from("tenant1", applicationName, "instance1");
+ }
+
+ public ClusterSpec clusterSpec(ClusterSpec.Type type, String clusterId) {
+ return ClusterSpec.request(type,
+ ClusterSpec.Id.from(clusterId),
+ Version.fromString("7"),
+ false);
+ }
+
+ public void deploy(ApplicationId application, ClusterSpec cluster, ClusterResources resources) {
+ deploy(application, cluster, resources.nodes(), resources.groups(), resources.nodeResources());
+ }
+
+ public void deploy(ApplicationId application, ClusterSpec cluster, int nodes, int groups, NodeResources resources) {
+ List<HostSpec> hosts = provisioningTester.prepare(application, cluster, Capacity.fromCount(nodes, resources), groups);
+ for (HostSpec host : hosts)
+ makeReady(host.hostname());
+ provisioningTester.deployZoneApp();
+ provisioningTester.activate(application, hosts);
+ }
+
+ public void makeReady(String hostname) {
+ Node node = nodeRepository().getNode(hostname).get();
+ nodeRepository().write(node.with(new IP.Config(Set.of("::" + 0 + ":0"), Set.of())), nodeRepository().lock(node));
+ Node host = nodeRepository().getNode(node.parentHostname().get()).get();
+ host = host.with(new IP.Config(Set.of("::" + 0 + ":0"), Set.of("::" + 0 + ":2")));
+ if (host.state() == Node.State.provisioned)
+ nodeRepository().setReady(List.of(host), Agent.system, getClass().getSimpleName());
+ }
+
+ public void deactivateRetired(ApplicationId application, ClusterSpec cluster, ClusterResources resources) {
+ try (Mutex lock = nodeRepository().lock(application)){
+ for (Node node : nodeRepository().getNodes(application, Node.State.active)) {
+ if (node.allocation().get().membership().retired())
+ nodeRepository().write(node.with(node.allocation().get().removable()), lock);
+ }
+ }
+ deploy(application, cluster, resources);
+ }
+
+ /**
+ * Adds measurements with the given resource value and ideal values for the other resources,
+ * scaled to take one node redundancy into account.
+ * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler
+ * wanting to see the ideal load with one node missing.)
+ *
+ * @param resource the resource we are explicitly setting the value of
+ * @param otherResourcesLoad the load factor relative to ideal to use for other resources
+ * @param count the number of measurements
+ * @param applicationId the application we're adding measurements for all nodes of
+ */
+ public void addMeasurements(Resource resource, float value, float otherResourcesLoad,
+ int count, ApplicationId applicationId) {
+ List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active);
+ float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
+ for (int i = 0; i < count; i++) {
+ clock().advance(Duration.ofMinutes(1));
+ for (Node node : nodes) {
+ for (Resource r : Resource.values()) {
+ float effectiveValue = (r == resource ? value : (float) r.idealAverageLoad() * otherResourcesLoad)
+ * oneExtraNodeFactor;
+ db.add(List.of(new NodeMetrics.MetricValue(node.hostname(),
+ r.metricName(),
+ clock().instant().toEpochMilli(),
+ effectiveValue)));
+ }
+ }
+ }
+ }
+
+ public Optional<ClusterResources> autoscale(ApplicationId application, ClusterSpec cluster) {
+ return autoscaler.autoscale(application, cluster, nodeRepository().getNodes(application, Node.State.active));
+ }
+
+ public ClusterResources assertResources(String message,
+ int nodeCount, int groupCount,
+ double approxCpu, double approxMemory, double approxDisk,
+ Optional<ClusterResources> actualResources) {
+ double delta = 0.0000000001;
+ assertTrue(message, actualResources.isPresent());
+ assertEquals("Node count: " + message, nodeCount, actualResources.get().nodes());
+ assertEquals("Group count: " + message, groupCount, actualResources.get().groups());
+ assertEquals("Cpu: " + message, approxCpu, Math.round(actualResources.get().nodeResources().vcpu() * 10) / 10.0, delta);
+ assertEquals("Memory: " + message, approxMemory, Math.round(actualResources.get().nodeResources().memoryGb() * 10) / 10.0, delta);
+ assertEquals("Disk: " + message, approxDisk, Math.round(actualResources.get().nodeResources().diskGb() * 10) / 10.0, delta);
+ return actualResources.get();
+ }
+
+ public ManualClock clock() {
+ return provisioningTester.clock();
+ }
+
+ public NodeRepository nodeRepository() {
+ return provisioningTester.nodeRepository();
+ }
+
+ private static FlavorsConfig asConfig(NodeResources hostResources) {
+ FlavorsConfig.Builder b = new FlavorsConfig.Builder();
+ b.flavor(asFlavorConfig("hostFlavor", hostResources));
+ return b.build();
+ }
+
+ private static FlavorsConfig asConfig(List<Flavor> flavors) {
+ FlavorsConfig.Builder b = new FlavorsConfig.Builder();
+ for (Flavor flavor : flavors)
+ b.flavor(asFlavorConfig(flavor.name(), flavor.resources()));
+ return b.build();
+ }
+
+ private static FlavorsConfig.Flavor.Builder asFlavorConfig(String flavorName, NodeResources resources) {
+ FlavorsConfig.Flavor.Builder flavor = new FlavorsConfig.Flavor.Builder();
+ flavor.name(flavorName);
+ flavor.minCpuCores(resources.vcpu());
+ flavor.minMainMemoryAvailableGb(resources.memoryGb());
+ flavor.minDiskAvailableGb(resources.diskGb());
+ flavor.bandwidth(resources.bandwidthGbps() * 1000);
+ return flavor;
+ }
+
+ private static class MockHostResourcesCalculator implements HostResourcesCalculator {
+
+ private final Zone zone;
+
+ public MockHostResourcesCalculator(Zone zone) {
+ this.zone = zone;
+ }
+
+ @Override
+ public NodeResources availableCapacityOf(String flavorName, NodeResources hostResources) {
+ if (zone.cloud().value().equals("aws"))
+ return hostResources.withMemoryGb(hostResources.memoryGb() + 3);
+ else
+ return hostResources;
+ }
+
+ }
+
+ private static class MockHostProvisioner implements HostProvisioner {
+
+ private final List<Flavor> hostFlavors;
+
+ public MockHostProvisioner(List<Flavor> hostFlavors) {
+ this.hostFlavors = hostFlavors;
+ }
+
+ @Override
+ public List<ProvisionedHost> provisionHosts(List<Integer> provisionIndexes, NodeResources resources, ApplicationId applicationId) {
+ Flavor hostFlavor = hostFlavors.stream().filter(f -> f.resources().justNumbers().equals(resources.justNumbers())).findAny()
+ .orElseThrow(() -> new RuntimeException("No flavor matching " + resources + ". Flavors: " + hostFlavors));
+
+ List<ProvisionedHost> hosts = new ArrayList<>();
+ for (int index : provisionIndexes) {
+ hosts.add(new ProvisionedHost("host" + index,
+ "hostname" + index,
+ hostFlavor,
+ "nodename" + index,
+ resources));
+ }
+ return hosts;
+ }
+
+ @Override
+ public List<Node> provision(Node host, Set<Node> children) throws FatalProvisioningException {
+ throw new RuntimeException("Not implemented");
+ }
+
+ @Override
+ public void deprovision(Node host) {
+ throw new RuntimeException("Not implemented");
+ }
+
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java
new file mode 100644
index 00000000000..519235857f1
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java
@@ -0,0 +1,33 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.test.ManualClock;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+public class NodeMetricsDbTest {
+
+ @Test
+ public void testNodeMetricsDb() {
+ ManualClock clock = new ManualClock();
+ NodeMetricsDb db = new NodeMetricsDb();
+ List<NodeMetrics.MetricValue> values = new ArrayList<>();
+ for (int i = 0; i < 40; i++) {
+ values.add(new NodeMetrics.MetricValue("host0", "cpu.util", clock.instant().toEpochMilli(), 0.9f));
+ clock.advance(Duration.ofHours(1));
+ }
+ db.add(values);
+
+ assertEquals(30, db.getWindow(clock.instant().minus(Duration.ofHours(30)), Resource.cpu, List.of("host0")).measurementCount());
+ assertEquals( 0, db.getWindow(clock.instant().minus(Duration.ofHours(30)), Resource.memory, List.of("host0")).measurementCount());
+ db.gc(clock);
+ assertEquals(24, db.getWindow(clock.instant().minus(Duration.ofHours(30)), Resource.cpu, List.of("host0")).measurementCount());
+ assertEquals( 0, db.getWindow(clock.instant().minus(Duration.ofHours(30)), Resource.memory, List.of("host0")).measurementCount());
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java
new file mode 100644
index 00000000000..4376bfd38b0
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java
@@ -0,0 +1,147 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
+import com.yahoo.vespa.hosted.provision.testutils.OrchestratorMock;
+import com.yahoo.vespa.applicationmodel.HostName;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+public class NodeMetricsFetcherTest {
+
+ @Test
+ public void testMetricsFetch() {
+ NodeResources resources = new NodeResources(1, 10, 100, 1);
+ ProvisioningTester tester = new ProvisioningTester.Builder().build();
+ OrchestratorMock orchestrator = new OrchestratorMock();
+ MockHttpClient httpClient = new MockHttpClient();
+ NodeMetricsFetcher fetcher = new NodeMetricsFetcher(tester.nodeRepository(), orchestrator, httpClient);
+
+ tester.makeReadyNodes(4, resources); // Creates (in order) host-1.yahoo.com, host-2.yahoo.com, host-3.yahoo.com, host-4.yahoo.com
+ tester.deployZoneApp();
+
+ ApplicationId application1 = tester.makeApplicationId();
+ ApplicationId application2 = tester.makeApplicationId();
+ tester.deploy(application1, Capacity.fromCount(2, resources)); // host-1.yahoo.com, host-2.yahoo.com
+ tester.deploy(application2, Capacity.fromCount(2, resources)); // host-4.yahoo.com, host-3.yahoo.com
+
+ orchestrator.suspend(new HostName("host-4.yahoo.com"));
+
+ {
+ httpClient.cannedResponse = cannedResponseForApplication1;
+ List<NodeMetrics.MetricValue> values = new ArrayList<>(fetcher.fetchMetrics(application1));
+ assertEquals("http://host-1.yahoo.com:4080/metrics/v2/values?consumer=vespa-consumer-metrics",
+ httpClient.requestsReceived.get(0));
+ assertEquals(5, values.size());
+ assertEquals("metric value cpu.util: 16.2 at 1234 for host-1.yahoo.com", values.get(0).toString());
+ assertEquals("metric value memory.util: 23.1 at 1234 for host-1.yahoo.com", values.get(1).toString());
+ assertEquals("metric value disk.util: 82.0 at 1234 for host-1.yahoo.com", values.get(2).toString());
+ assertEquals("metric value cpu.util: 20.0 at 1200 for host-2.yahoo.com", values.get(3).toString());
+ assertEquals("metric value disk.util: 40.0 at 1200 for host-2.yahoo.com", values.get(4).toString());
+ }
+
+ {
+ httpClient.cannedResponse = cannedResponseForApplication2;
+ List<NodeMetrics.MetricValue> values = new ArrayList<>(fetcher.fetchMetrics(application2));
+ assertEquals("http://host-3.yahoo.com:4080/metrics/v2/values?consumer=vespa-consumer-metrics",
+ httpClient.requestsReceived.get(1));
+ assertEquals(3, values.size());
+ assertEquals("metric value cpu.util: 10.0 at 1300 for host-3.yahoo.com", values.get(0).toString());
+ assertEquals("metric value memory.util: 15.0 at 1300 for host-3.yahoo.com", values.get(1).toString());
+ assertEquals("metric value disk.util: 20.0 at 1300 for host-3.yahoo.com", values.get(2).toString());
+ }
+ }
+
+ private static class MockHttpClient implements NodeMetricsFetcher.HttpClient {
+
+ List<String> requestsReceived = new ArrayList<>();
+
+ String cannedResponse = null;
+ @Override
+ public String get(String url) {
+ requestsReceived.add(url);
+ return cannedResponse;
+ }
+
+ @Override
+ public void close() { }
+
+ }
+
+ final String cannedResponseForApplication1 =
+ "{\n" +
+ " \"nodes\": [\n" +
+ " {\n" +
+ " \"hostname\": \"host-1.yahoo.com\",\n" +
+ " \"role\": \"role0\",\n" +
+ " \"node\": {\n" +
+ " \"timestamp\": 1234,\n" +
+ " \"metrics\": [\n" +
+ " {\n" +
+ " \"values\": {\n" +
+ " \"cpu.util\": 16.2,\n" +
+ " \"memory.util\": 23.1,\n" +
+ " \"disk.util\": 82\n" +
+ " },\n" +
+ " \"dimensions\": {\n" +
+ " \"state\": \"active\"\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ " },\n" +
+ " {\n" +
+ " \"hostname\": \"host-2.yahoo.com\",\n" +
+ " \"role\": \"role1\",\n" +
+ " \"node\": {\n" +
+ " \"timestamp\": 1200,\n" +
+ " \"metrics\": [\n" +
+ " {\n" +
+ " \"values\": {\n" +
+ " \"cpu.util\": 20,\n" +
+ " \"disk.util\": 40\n" +
+ " },\n" +
+ " \"dimensions\": {\n" +
+ " \"state\": \"active\"\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ "}\n";
+
+
+ final String cannedResponseForApplication2 =
+ "{\n" +
+ " \"nodes\": [\n" +
+ " {\n" +
+ " \"hostname\": \"host-3.yahoo.com\",\n" +
+ " \"role\": \"role0\",\n" +
+ " \"node\": {\n" +
+ " \"timestamp\": 1300,\n" +
+ " \"metrics\": [\n" +
+ " {\n" +
+ " \"values\": {\n" +
+ " \"cpu.util\": 10,\n" +
+ " \"memory.util\": 15,\n" +
+ " \"disk.util\": 20\n" +
+ " },\n" +
+ " \"dimensions\": {\n" +
+ " \"state\": \"active\"\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ " }\n" +
+ " }\n" +
+ " ]\n" +
+ "}\n";
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java
index 677aaf93336..8706661f261 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java
@@ -69,7 +69,7 @@ public class DynamicDockerProvisionTest {
@Test
public void does_not_allocate_to_available_empty_hosts() {
tester.makeReadyNodes(3, "small", NodeType.host, 10);
- deployZoneApp(tester);
+ tester.deployZoneApp();
ApplicationId application = tester.makeApplicationId();
NodeResources flavor = new NodeResources(1, 4, 10, 1);
@@ -89,7 +89,7 @@ public class DynamicDockerProvisionTest {
tester.prepare(application, clusterSpec("myContent.t2.a2"), 2, 1, flavor);
verify(hostProvisioner).provisionHosts(expectedProvisionIndexes, flavor, application);
- // Ready the provisioned hosts, add an IP addreses to pool and activate them
+ // Ready the provisioned hosts, add an IP addresses to pool and activate them
for (Integer i : expectedProvisionIndexes) {
String hostname = "host-" + i;
var ipConfig = new IP.Config(Set.of("::" + i + ":0"), Set.of("::" + i + ":2"));
@@ -97,7 +97,7 @@ public class DynamicDockerProvisionTest {
tester.nodeRepository().setReady(List.of(host), Agent.system, getClass().getSimpleName());
nameResolver.addRecord(hostname + "-2", "::" + i + ":2");
}
- deployZoneApp(tester);
+ tester.deployZoneApp();
mockHostProvisioner(hostProvisioner, tester.nodeRepository().getAvailableFlavors().getFlavorOrThrow("small"));
tester.prepare(application, clusterSpec("another-id"), 2, 1, flavor);
@@ -145,19 +145,6 @@ public class DynamicDockerProvisionTest {
assertTrue(indices.containsAll(IntStream.range(0, 10).boxed().collect(Collectors.toList())));
}
- private static void deployZoneApp(ProvisioningTester tester) {
- ApplicationId applicationId = tester.makeApplicationId();
- List<HostSpec> list = tester.prepare(applicationId,
- ClusterSpec.request(ClusterSpec.Type.container,
- ClusterSpec.Id.from("node-admin"),
- Version.fromString("6.42"),
- false),
- Capacity.fromRequiredNodeType(NodeType.host),
- 1);
- tester.activate(applicationId, ImmutableSet.copyOf(list));
- }
-
-
private static ClusterSpec clusterSpec(String clusterId) {
return ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from(clusterId), Version.fromString("6.42"), false);
}
@@ -172,4 +159,5 @@ public class DynamicDockerProvisionTest {
.collect(Collectors.toList());
}).when(hostProvisioner).provisionHosts(any(), any(), any());
}
+
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
index e464ed07472..85a6ed31073 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
@@ -447,6 +447,7 @@ public class ProvisioningTester {
}
public static final class Builder {
+
private Curator curator;
private FlavorsConfig flavorsConfig;
private Zone zone;
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
index 02746f1c79a..ab608bac2b4 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
@@ -1,6 +1,9 @@
{
"jobs": [
{
+ "name": "AutoscalingMaintainer"
+ },
+ {
"name": "CapacityReportMaintainer"
},
{
@@ -25,6 +28,9 @@
"name": "NodeFailer"
},
{
+ "name": "NodeMetricsDbMaintainer"
+ },
+ {
"name": "NodeRebooter"
},
{