aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-03-11 18:40:06 +0100
committerJon Bratseth <bratseth@gmail.com>2021-03-11 18:40:06 +0100
commitd3c80d38bab48b77b64cb5529e08136fe796aca6 (patch)
treea00b3199541d8ae474d0be4c970217d4b4900435
parentcb3f517a6d0e6aeb4433552cbb9d2c0b6c3c935c (diff)
Expose more cluster data in nodes/v2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java16
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java50
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json11
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json11
9 files changed, 79 insertions, 38 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index 5d5c6fdac5a..c7549a5ddee 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -80,8 +80,13 @@ public class Autoscaler {
"Have measurements from " + nodesMeasured + " nodes, but require from " + clusterNodes.size());
+ var scalingDuration = cluster.scalingDuration(clusterNodes.clusterSpec());
var clusterTimeseries = metricsDb.getClusterTimeseries(application.id(), cluster.id());
- var target = ResourceTarget.idealLoad(clusterTimeseries, clusterNodesTimeseries, currentAllocation, application);
+ var target = ResourceTarget.idealLoad(scalingDuration,
+ clusterTimeseries,
+ clusterNodesTimeseries,
+ currentAllocation,
+ application);
Optional<AllocatableClusterResources> bestAllocation =
allocationOptimizer.findBestAllocation(target, currentAllocation, limits);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
index 173d76e4c26..2d0e77742ec 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
@@ -16,14 +16,12 @@ import java.util.stream.Collectors;
*/
public class ClusterNodesTimeseries {
- private final Cluster cluster;
private final NodeList clusterNodes;
/** The measurements for all nodes in this snapshot */
private final List<NodeTimeseries> timeseries;
public ClusterNodesTimeseries(Duration period, Cluster cluster, NodeList clusterNodes, MetricsDb db) {
- this.cluster = cluster;
this.clusterNodes = clusterNodes;
var timeseries = db.getNodeTimeseries(period, clusterNodes);
@@ -35,12 +33,6 @@ public class ClusterNodesTimeseries {
this.timeseries = timeseries;
}
- /** The cluster this is a timeseries for */
- public Cluster cluster() { return cluster; }
-
- /** The nodes of the cluster this is a timeseries for */
- public NodeList clusterNodes() { return clusterNodes; }
-
/** Returns the average number of measurements per node */
public int measurementsPerNode() {
int measurementCount = timeseries.stream().mapToInt(m -> m.size()).sum();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
index 5b6ed43b713..a435814c21e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
@@ -23,6 +23,8 @@ public class ClusterTimeseries {
private final ClusterSpec.Id cluster;
private final List<ClusterMetricSnapshot> snapshots;
+ private Double cachedMaxQueryGrowthRate = null;
+
ClusterTimeseries(ClusterSpec.Id cluster, List<ClusterMetricSnapshot> snapshots) {
this.cluster = cluster;
List<ClusterMetricSnapshot> sortedSnapshots = new ArrayList<>(snapshots);
@@ -48,6 +50,12 @@ public class ClusterTimeseries {
/** The max query growth rate we can predict from this time-series as a fraction of the current traffic per minute */
public double maxQueryGrowthRate() {
+ if (cachedMaxQueryGrowthRate != null)
+ return cachedMaxQueryGrowthRate;
+ return cachedMaxQueryGrowthRate = computeMaxQueryGrowthRate();
+ }
+
+ private double computeMaxQueryGrowthRate() {
if (snapshots.isEmpty()) return 0.1;
// Find the period having the highest growth rate, where total growth exceeds 30% increase
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
index f29181b8343..ab6a6d548e9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
@@ -49,12 +49,13 @@ public class ResourceTarget {
}
/** Create a target of achieving ideal load given a current load */
- public static ResourceTarget idealLoad(ClusterTimeseries clusterTimeseries,
+ public static ResourceTarget idealLoad(Duration scalingDuration,
+ ClusterTimeseries clusterTimeseries,
ClusterNodesTimeseries clusterNodesTimeseries,
AllocatableClusterResources current,
Application application) {
return new ResourceTarget(nodeUsage(Resource.cpu, clusterNodesTimeseries.averageLoad(Resource.cpu), current)
- / idealCpuLoad(clusterTimeseries, clusterNodesTimeseries, application),
+ / idealCpuLoad(scalingDuration, clusterTimeseries, application),
nodeUsage(Resource.memory, clusterNodesTimeseries.averageLoad(Resource.memory), current)
/ Resource.memory.idealAverageLoad(),
nodeUsage(Resource.disk, clusterNodesTimeseries.averageLoad(Resource.disk), current)
@@ -71,12 +72,11 @@ public class ResourceTarget {
}
/** Ideal cpu load must take the application traffic fraction into account */
- private static double idealCpuLoad(ClusterTimeseries clusterTimeseries,
- ClusterNodesTimeseries clusterNodesTimeseries,
- Application application) {
+ public static double idealCpuLoad(Duration scalingDuration,
+ ClusterTimeseries clusterTimeseries,
+ Application application) {
// What's needed to have headroom for growth during scale-up as a fraction of current resources?
double maxGrowthRate = clusterTimeseries.maxQueryGrowthRate(); // in fraction per minute of the current traffic
- Duration scalingDuration = clusterNodesTimeseries.cluster().scalingDuration(clusterNodesTimeseries.clusterNodes().clusterSpec());
double growthRateHeadroom = 1 + maxGrowthRate * scalingDuration.toMinutes();
// Cap headroom at 10% above the historical observed peak
double fractionOfMax = clusterTimeseries.currentQueryFractionOfMax();
@@ -96,4 +96,8 @@ public class ResourceTarget {
return 1 / growthRateHeadroom * 1 / trafficShiftHeadroom * Resource.cpu.idealAverageLoad();
}
+ public static double idealMemoryLoad() { return Resource.memory.idealAverageLoad(); }
+
+ public static double idealDiskLoad() { return Resource.disk.idealAverageLoad(); }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
index 79f3dad75d3..0307ae13b24 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
@@ -163,7 +163,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
ApplicationId application = nodeToRetire.get().allocation().get().owner();
try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) {
- if ( ! deployment.isValid()) return; // this will be done at another config server
+ if ( ! deployment.isValid()) return;
Optional<Node> nodeWithWantToRetire = nodeRepository().nodes().node(nodeToRetire.get().hostname())
.map(node -> node.withWantToRetire(true, Agent.SpareCapacityMaintainer, nodeRepository().clock().instant()));
@@ -171,7 +171,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
nodeRepository().nodes().write(nodeWithWantToRetire.get(), deployment.applicationLock().get());
log.log(Level.INFO, String.format("Redeploying %s to move %s from overcommitted host",
- application, nodeToRetire.get().hostname()));
+ application, nodeToRetire.get().hostname()));
deployment.activate();
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java
index 4235bae6850..4d1c963d8ea 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java
@@ -1,6 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.restapi;
+import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.slime.Cursor;
import com.yahoo.slime.Slime;
@@ -10,8 +11,10 @@ import com.yahoo.vespa.hosted.provision.applications.Application;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.applications.ScalingEvent;
import com.yahoo.vespa.hosted.provision.autoscale.ClusterNodesTimeseries;
+import com.yahoo.vespa.hosted.provision.autoscale.ClusterTimeseries;
import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb;
import com.yahoo.vespa.hosted.provision.autoscale.Resource;
+import com.yahoo.vespa.hosted.provision.autoscale.ResourceTarget;
import java.net.URI;
import java.time.Duration;
@@ -20,50 +23,60 @@ import java.util.List;
/**
* Serializes application information for nodes/v2/application responses
+ *
+ * @author bratseth
*/
public class ApplicationSerializer {
- public static Slime toSlime(Application application, List<Node> applicationNodes, MetricsDb metricsDb, URI applicationUri) {
+ public static Slime toSlime(Application application, NodeList applicationNodes, MetricsDb metricsDb, URI applicationUri) {
Slime slime = new Slime();
toSlime(application, applicationNodes, metricsDb, slime.setObject(), applicationUri);
return slime;
}
private static void toSlime(Application application,
- List<Node> applicationNodes,
+ NodeList applicationNodes,
MetricsDb metricsDb,
Cursor object,
URI applicationUri) {
object.setString("url", applicationUri.toString());
object.setString("id", application.id().toFullString());
- clustersToSlime(application.clusters().values(), applicationNodes, metricsDb, object.setObject("clusters"));
+ clustersToSlime(application, applicationNodes, metricsDb, object.setObject("clusters"));
}
- private static void clustersToSlime(Collection<Cluster> clusters,
- List<Node> applicationNodes,
+ private static void clustersToSlime(Application application,
+ NodeList applicationNodes,
MetricsDb metricsDb,
Cursor clustersObject) {
- clusters.forEach(cluster -> toSlime(cluster, applicationNodes, metricsDb, clustersObject));
+ application.clusters().values().forEach(cluster -> toSlime(application, cluster, applicationNodes, metricsDb, clustersObject));
}
- private static void toSlime(Cluster cluster,
- List<Node> applicationNodes,
+ private static void toSlime(Application application,
+ Cluster cluster,
+ NodeList applicationNodes,
MetricsDb metricsDb,
Cursor clustersObject) {
- NodeList nodes = NodeList.copyOf(applicationNodes).not().retired().cluster(cluster.id());
+ NodeList nodes = applicationNodes.not().retired().cluster(cluster.id());
if (nodes.isEmpty()) return;
ClusterResources currentResources = nodes.toResources();
+ Duration scalingDuration = cluster.scalingDuration(nodes.clusterSpec());
+ var clusterNodesTimeseries = new ClusterNodesTimeseries(Duration.ofHours(1), cluster, nodes, metricsDb);
+ var clusterTimeseries = metricsDb.getClusterTimeseries(application.id(), cluster.id());
Cursor clusterObject = clustersObject.setObject(cluster.id().value());
+ clusterObject.setString("type", nodes.clusterSpec().type().name());
toSlime(cluster.minResources(), clusterObject.setObject("min"));
toSlime(cluster.maxResources(), clusterObject.setObject("max"));
toSlime(currentResources, clusterObject.setObject("current"));
if (cluster.shouldSuggestResources(currentResources))
cluster.suggestedResources().ifPresent(suggested -> toSlime(suggested.resources(), clusterObject.setObject("suggested")));
cluster.targetResources().ifPresent(target -> toSlime(target, clusterObject.setObject("target")));
- clusterUtilizationToSlime(cluster, NodeList.copyOf(applicationNodes), metricsDb, clusterObject.setObject("utilization"));
+ clusterUtilizationToSlime(application, scalingDuration, clusterTimeseries, clusterNodesTimeseries, clusterObject.setObject("utilization"));
scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray("scalingEvents"));
clusterObject.setString("autoscalingStatus", cluster.autoscalingStatus());
+ clusterObject.setLong("scalingDuration", scalingDuration.toMillis());
+ clusterObject.setDouble("maxQueryGrowthRate", clusterTimeseries.maxQueryGrowthRate());
+ clusterObject.setDouble("currentQueryFractionOfMax", clusterTimeseries.currentQueryFractionOfMax());
}
private static void toSlime(ClusterResources resources, Cursor clusterResourcesObject) {
@@ -72,12 +85,17 @@ public class ApplicationSerializer {
NodeResourcesSerializer.toSlime(resources.nodeResources(), clusterResourcesObject.setObject("resources"));
}
- private static void clusterUtilizationToSlime(Cluster cluster, NodeList nodes, MetricsDb metricsDb, Cursor utilizationObject) {
- var timeseries = new ClusterNodesTimeseries(Duration.ofHours(1), cluster, nodes, metricsDb);
-
- utilizationObject.setDouble("cpu", timeseries.averageLoad(Resource.cpu));
- utilizationObject.setDouble("memory", timeseries.averageLoad(Resource.memory));
- utilizationObject.setDouble("disk", timeseries.averageLoad(Resource.disk));
+ private static void clusterUtilizationToSlime(Application application,
+ Duration scalingDuration,
+ ClusterTimeseries clusterTimeseries,
+ ClusterNodesTimeseries clusterNodesTimeseries,
+ Cursor utilizationObject) {
+ utilizationObject.setDouble("cpu", clusterNodesTimeseries.averageLoad(Resource.cpu));
+ utilizationObject.setDouble("idealCpu", ResourceTarget.idealCpuLoad(scalingDuration, clusterTimeseries, application));
+ utilizationObject.setDouble("memory", clusterNodesTimeseries.averageLoad(Resource.memory));
+ utilizationObject.setDouble("idealMemory", ResourceTarget.idealMemoryLoad());
+ utilizationObject.setDouble("disk", clusterNodesTimeseries.averageLoad(Resource.disk));
+ utilizationObject.setDouble("idealDisk", ResourceTarget.idealDiskLoad());
}
private static void scalingEventsToSlime(List<ScalingEvent> scalingEvents, Cursor scalingEventsArray) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
index 62c7f40f7da..2442ff9d565 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
@@ -445,7 +445,7 @@ public class NodesV2ApiHandler extends LoggingRequestHandler {
if (application.isEmpty())
return ErrorResponse.notFoundError("No application '" + id + "'");
Slime slime = ApplicationSerializer.toSlime(application.get(),
- nodeRepository.nodes().list(Node.State.active).owner(id).asList(),
+ nodeRepository.nodes().list(Node.State.active).owner(id),
metricsDb,
withPath("/nodes/v2/applications/" + id, uri));
return new SlimeJsonResponse(slime);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
index a35c742bc2a..1e9a2d60837 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
@@ -3,6 +3,7 @@
"id" : "tenant1.application1.instance1",
"clusters" : {
"id1" : {
+ "type": "container",
"min" : {
"nodes" : 2,
"groups" : 1,
@@ -65,8 +66,11 @@
},
"utilization" : {
"cpu" : 0.0,
+ "idealCpu": 0.2,
"memory" : 0.0,
- "disk" : 0.0
+ "idealMemory": 0.7,
+ "disk" : 0.0,
+ "idealDisk": 0.6
},
"scalingEvents" : [
{
@@ -97,7 +101,10 @@
"at" : 123
}
],
- "autoscalingStatus" : ""
+ "autoscalingStatus": "",
+ "scalingDuration": 600000,
+ "maxQueryGrowthRate": 0.1,
+ "currentQueryFractionOfMax": 0.5
}
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
index baf2528c74a..376b748ff8e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
@@ -3,6 +3,7 @@
"id": "tenant2.application2.instance2",
"clusters": {
"id2": {
+ "type": "content",
"min": {
"nodes": 2,
"groups": 1,
@@ -41,8 +42,11 @@
},
"utilization" : {
"cpu" : 0.0,
+ "idealCpu": 0.19047619047619047,
"memory" : 0.0,
- "disk" : 0.0
+ "idealMemory": 0.7,
+ "disk" : 0.0,
+ "idealDisk": 0.6
},
"scalingEvents" : [
{
@@ -73,7 +77,10 @@
"at" : 123
}
],
- "autoscalingStatus" : ""
+ "autoscalingStatus" : "",
+ "scalingDuration": 43200000,
+ "maxQueryGrowthRate": 0.1,
+ "currentQueryFractionOfMax": 0.5
}
}
}