diff options
author | Jon Bratseth <bratseth@gmail.com> | 2023-03-13 15:16:10 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2023-03-13 15:16:10 +0100 |
commit | 756a5944e6455f9c0379f37a53e18221ac9f0343 (patch) | |
tree | 1f813f52cf396b43e48ccae43171c68bd74dc925 /node-repository/src/main | |
parent | 3afdc7b7bb0f4535c80c7b9ea802bd4d10ac5774 (diff) |
Autoscale by gpu signals too
Just take max of cpu and gpu load.
Diffstat (limited to 'node-repository/src/main')
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java index 8b7a2bafc40..fa9cfff0d68 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java @@ -120,11 +120,13 @@ public class MetricsResponse { cpu { // a node resource @Override - public List<String> metricResponseNames() { return List.of(HostedNodeAdminMetrics.CPU_UTIL.baseName()); } + public List<String> metricResponseNames() { + return List.of(HostedNodeAdminMetrics.CPU_UTIL.baseName(), HostedNodeAdminMetrics.GPU_UTIL.baseName()); + } @Override double computeFinal(ListMap<String, Double> values) { - return values.values().stream().flatMap(List::stream).mapToDouble(v -> v).average().orElse(0) / 100; // % to ratio + return values.values().stream().flatMap(List::stream).mapToDouble(v -> v).max().orElse(0) / 100; // % to ratio } }, @@ -133,11 +135,17 @@ public class MetricsResponse { @Override public List<String> metricResponseNames() { return List.of(HostedNodeAdminMetrics.MEM_UTIL.baseName(), - SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average()); + SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average(), + HostedNodeAdminMetrics.GPU_MEM_USED.baseName(), + HostedNodeAdminMetrics.GPU_MEM_TOTAL.baseName()); } @Override double computeFinal(ListMap<String, Double> values) { + return Math.max(gpuMemUtil(values), cpuMemUtil(values)); + } + + private double cpuMemUtil(ListMap<String, Double> values) { var valueList = values.get(SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average()); // prefer over mem.util if ( ! valueList.isEmpty()) return valueList.get(0); @@ -147,6 +155,12 @@ public class MetricsResponse { return 0; } + private double gpuMemUtil(ListMap<String, Double> values) { + var usedGpuMemory = values.get(HostedNodeAdminMetrics.GPU_MEM_USED.baseName()).stream().mapToDouble(v -> v).sum(); + var totalGpuMemory = values.get(HostedNodeAdminMetrics.GPU_MEM_TOTAL.baseName()).stream().mapToDouble(v -> v).sum(); + return totalGpuMemory > 0 ? usedGpuMemory / totalGpuMemory : 0; + } + }, disk { // a node resource |