summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java4
-rw-r--r--docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerStats.java12
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java41
-rw-r--r--node-admin/src/test/resources/docker.stats.json12
-rw-r--r--node-admin/src/test/resources/expected.container.system.metrics.txt3
5 files changed, 51 insertions, 21 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java
index 6faceee3a13..711fedcfef6 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java
@@ -12,7 +12,8 @@ import java.util.Set;
public class SystemMetrics {
public static final String CPU_UTIL = "cpu.util";
public static final String CPU_SYS_UTIL = "cpu.sys.util";
- public static final String CPU_THROTTLED_TIME = "cpu.throttled_time.ns";
+ public static final String CPU_THROTTLED_TIME = "cpu.throttled_time";
+ public static final String CPU_THROTTLED_CPU_TIME = "cpu.throttled_cpu_time";
public static final String CPU_VCPUS = "cpu.vcpus";
public static final String DISK_LIMIT = "disk.limit";
public static final String DISK_USED = "disk.used";
@@ -30,6 +31,7 @@ public class SystemMetrics {
ImmutableSet.of(new Metric(CPU_UTIL),
new Metric(CPU_SYS_UTIL),
new Metric(CPU_THROTTLED_TIME),
+ new Metric(CPU_THROTTLED_CPU_TIME),
new Metric(CPU_VCPUS),
new Metric(DISK_LIMIT),
new Metric(DISK_USED),
diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerStats.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerStats.java
index abb89c3baf9..7fbe47b011c 100644
--- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerStats.java
+++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerStats.java
@@ -96,6 +96,8 @@ public class ContainerStats {
private final long totalUsage;
private final long usageInKernelMode;
private final long throttledTime;
+ private final long throttlingActivePeriods;
+ private final long throttledPeriods;
public CpuStats(CpuStatsConfig cpuStats) {
// Added in 1.27
@@ -104,13 +106,23 @@ public class ContainerStats {
this.totalUsage = cpuStats.getCpuUsage().getTotalUsage();
this.usageInKernelMode = cpuStats.getCpuUsage().getUsageInKernelmode();
this.throttledTime = cpuStats.getThrottlingData().getThrottledTime();
+ this.throttlingActivePeriods = cpuStats.getThrottlingData().getPeriods();
+ this.throttledPeriods = cpuStats.getThrottlingData().getThrottledPeriods();
}
public int getOnlineCpus() { return this.onlineCpus; }
public long getSystemCpuUsage() { return this.systemCpuUsage; }
public long getTotalUsage() { return totalUsage; }
public long getUsageInKernelMode() { return usageInKernelMode; }
+
+ /** Total CPU time processes in this container were throttled for */
public long getThrottledTime() { return throttledTime; }
+
+ /** Number of periods when throttling enabled for this container */
+ public long getThrottlingActivePeriods() { return throttlingActivePeriods; }
+
+ /** Number of periods this container hit the throttling limit */
+ public long getThrottledPeriods() { return throttledPeriods; }
}
// For testing only, create ContainerStats from JSON returned by docker daemon stats API
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 5ca02f82aa5..3a9eae45607 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -546,7 +546,8 @@ public class NodeAgentImpl implements NodeAgent {
final double allocatedCpuRatio = node.vcpus() / totalNumCpuCores;
double cpuUsageRatioOfAllocated = lastCpuMetric.getCpuUsageRatio() / allocatedCpuRatio;
double cpuKernelUsageRatioOfAllocated = lastCpuMetric.getCpuKernelUsageRatio() / allocatedCpuRatio;
- Long cpuThrottledTime = lastCpuMetric.getThrottledTime();
+ double cpuThrottledTime = lastCpuMetric.getThrottledTime();
+ double cpuThrottledCpuTime = lastCpuMetric.getThrottledCpuTime();
long memoryTotalBytesUsed = memoryTotalBytesUsage - memoryTotalBytesCache;
double memoryUsageRatio = (double) memoryTotalBytesUsed / memoryTotalBytes;
@@ -562,7 +563,8 @@ public class NodeAgentImpl implements NodeAgent {
.withMetric("mem_total.util", 100 * memoryTotalUsageRatio)
.withMetric("cpu.util", 100 * cpuUsageRatioOfAllocated)
.withMetric("cpu.sys.util", 100 * cpuKernelUsageRatioOfAllocated)
- .withMetric("cpu.throttled_time.ns", cpuThrottledTime)
+ .withMetric("cpu.throttled_time", cpuThrottledTime)
+ .withMetric("cpu.throttled_cpu_time", cpuThrottledCpuTime)
.withMetric("cpu.vcpus", node.vcpus())
.withMetric("disk.limit", diskTotalBytes);
@@ -620,26 +622,35 @@ public class NodeAgentImpl implements NodeAgent {
}
class CpuUsageReporter {
+ private static final double BILLION = 1_000_000_000d;
private long containerKernelUsage = 0;
private long totalContainerUsage = 0;
private long totalSystemUsage = 0;
private long throttledTime = 0;
+ private long throttlingActivePeriods = 0;
+ private long throttledPeriods = 0;
private long deltaContainerKernelUsage;
private long deltaContainerUsage;
private long deltaSystemUsage;
private long deltaThrottledTime;
+ private long deltaThrottlingActivePeriods;
+ private long deltaThrottledPeriods;
private void updateCpuDeltas(ContainerStats.CpuStats cpuStats) {
- deltaSystemUsage = this.totalSystemUsage == 0 ? 0 : (cpuStats.getSystemCpuUsage() - this.totalSystemUsage);
- deltaContainerUsage = cpuStats.getTotalUsage() - this.totalContainerUsage;
- deltaContainerKernelUsage = cpuStats.getUsageInKernelMode() - this.containerKernelUsage;
- deltaThrottledTime = cpuStats.getThrottledTime() - this.throttledTime;
-
- this.totalSystemUsage = cpuStats.getSystemCpuUsage();
- this.totalContainerUsage = cpuStats.getTotalUsage();
- this.containerKernelUsage = cpuStats.getUsageInKernelMode();
- this.throttledTime = cpuStats.getThrottledTime();
+ deltaSystemUsage = totalSystemUsage == 0 ? 0 : (cpuStats.getSystemCpuUsage() - totalSystemUsage);
+ deltaContainerUsage = cpuStats.getTotalUsage() - totalContainerUsage;
+ deltaContainerKernelUsage = cpuStats.getUsageInKernelMode() - containerKernelUsage;
+ deltaThrottledTime = cpuStats.getThrottledTime() - throttledTime;
+ deltaThrottlingActivePeriods = cpuStats.getThrottlingActivePeriods() - throttlingActivePeriods;
+ deltaThrottledPeriods = cpuStats.getThrottledPeriods() - throttledPeriods;
+
+ totalSystemUsage = cpuStats.getSystemCpuUsage();
+ totalContainerUsage = cpuStats.getTotalUsage();
+ containerKernelUsage = cpuStats.getUsageInKernelMode();
+ throttledTime = cpuStats.getThrottledTime();
+ throttlingActivePeriods = cpuStats.getThrottlingActivePeriods();
+ throttledPeriods = cpuStats.getThrottledPeriods();
}
/**
@@ -655,8 +666,12 @@ public class NodeAgentImpl implements NodeAgent {
return deltaSystemUsage == 0 ? Double.NaN : (double) deltaContainerKernelUsage / deltaSystemUsage;
}
- Long getThrottledTime() {
- return deltaSystemUsage == 0 ? null : deltaThrottledTime;
+ double getThrottledTime() {
+ return deltaSystemUsage == 0 ? Double.NaN : 60d * deltaThrottledPeriods / deltaThrottlingActivePeriods;
+ }
+
+ double getThrottledCpuTime() {
+ return deltaSystemUsage == 0 ? Double.NaN : deltaThrottledTime / BILLION;
}
}
diff --git a/node-admin/src/test/resources/docker.stats.json b/node-admin/src/test/resources/docker.stats.json
index ff4a2fde943..5b42d9a2428 100644
--- a/node-admin/src/test/resources/docker.stats.json
+++ b/node-admin/src/test/resources/docker.stats.json
@@ -18,9 +18,9 @@
},
"system_cpu_usage":5876874910000000,
"throttling_data":{
- "periods":3212,
- "throttled_periods":322,
- "throttled_time":4490
+ "periods":820694,
+ "throttled_periods":177731,
+ "throttled_time":81891944744550
}
},
"cpu_stats":{
@@ -41,9 +41,9 @@
},
"system_cpu_usage":5876882680000000,
"throttling_data":{
- "periods":3242,
- "throttled_periods":332,
- "throttled_time":4523
+ "periods":821264,
+ "throttled_periods":178201,
+ "throttled_time":82181944744550
}
},
"memory_stats":{
diff --git a/node-admin/src/test/resources/expected.container.system.metrics.txt b/node-admin/src/test/resources/expected.container.system.metrics.txt
index 5400edfec65..4876466d8ca 100644
--- a/node-admin/src/test/resources/expected.container.system.metrics.txt
+++ b/node-admin/src/test/resources/expected.container.system.metrics.txt
@@ -10,7 +10,8 @@ s:
},
"metrics": {
"cpu.sys.util": 3.402,
- "cpu.throttled_time.ns": 33,
+ "cpu.throttled_cpu_time": 290.0,
+ "cpu.throttled_time": 49.473,
"cpu.util": 5.4,
"cpu.vcpus": 2.0,
"disk.limit": 250000000000,