diff options
author | Valerij Fredriksen <valerijf@verizonmedia.com> | 2019-06-21 14:58:20 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@verizonmedia.com> | 2019-06-21 14:58:20 +0200 |
commit | 05b97d110229d5633b733b29fb93c785c8256346 (patch) | |
tree | 8889ec6a189e0e014f54ccc7384b7b91761eeaec | |
parent | ecf4464a633104d180fad9ca5da15baae36a8514 (diff) |
Rename throttled_time to throttled_cpu_time and add new throttled_time
5 files changed, 51 insertions, 21 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java index 6faceee3a13..711fedcfef6 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java @@ -12,7 +12,8 @@ import java.util.Set; public class SystemMetrics { public static final String CPU_UTIL = "cpu.util"; public static final String CPU_SYS_UTIL = "cpu.sys.util"; - public static final String CPU_THROTTLED_TIME = "cpu.throttled_time.ns"; + public static final String CPU_THROTTLED_TIME = "cpu.throttled_time"; + public static final String CPU_THROTTLED_CPU_TIME = "cpu.throttled_cpu_time"; public static final String CPU_VCPUS = "cpu.vcpus"; public static final String DISK_LIMIT = "disk.limit"; public static final String DISK_USED = "disk.used"; @@ -30,6 +31,7 @@ public class SystemMetrics { ImmutableSet.of(new Metric(CPU_UTIL), new Metric(CPU_SYS_UTIL), new Metric(CPU_THROTTLED_TIME), + new Metric(CPU_THROTTLED_CPU_TIME), new Metric(CPU_VCPUS), new Metric(DISK_LIMIT), new Metric(DISK_USED), diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerStats.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerStats.java index abb89c3baf9..7fbe47b011c 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerStats.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerStats.java @@ -96,6 +96,8 @@ public class ContainerStats { private final long totalUsage; private final long usageInKernelMode; private final long throttledTime; + private final long throttlingActivePeriods; + private final long throttledPeriods; public CpuStats(CpuStatsConfig cpuStats) { // Added in 1.27 @@ -104,13 +106,23 @@ public class ContainerStats { this.totalUsage = cpuStats.getCpuUsage().getTotalUsage(); this.usageInKernelMode = cpuStats.getCpuUsage().getUsageInKernelmode(); this.throttledTime = cpuStats.getThrottlingData().getThrottledTime(); + this.throttlingActivePeriods = cpuStats.getThrottlingData().getPeriods(); + this.throttledPeriods = cpuStats.getThrottlingData().getThrottledPeriods(); } public int getOnlineCpus() { return this.onlineCpus; } public long getSystemCpuUsage() { return this.systemCpuUsage; } public long getTotalUsage() { return totalUsage; } public long getUsageInKernelMode() { return usageInKernelMode; } + + /** Total CPU time processes in this container were throttled for */ public long getThrottledTime() { return throttledTime; } + + /** Number of periods when throttling enabled for this container */ + public long getThrottlingActivePeriods() { return throttlingActivePeriods; } + + /** Number of periods this container hit the throttling limit */ + public long getThrottledPeriods() { return throttledPeriods; } } // For testing only, create ContainerStats from JSON returned by docker daemon stats API diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 5ca02f82aa5..3a9eae45607 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -546,7 +546,8 @@ public class NodeAgentImpl implements NodeAgent { final double allocatedCpuRatio = node.vcpus() / totalNumCpuCores; double cpuUsageRatioOfAllocated = lastCpuMetric.getCpuUsageRatio() / allocatedCpuRatio; double cpuKernelUsageRatioOfAllocated = lastCpuMetric.getCpuKernelUsageRatio() / allocatedCpuRatio; - Long cpuThrottledTime = lastCpuMetric.getThrottledTime(); + double cpuThrottledTime = lastCpuMetric.getThrottledTime(); + double cpuThrottledCpuTime = lastCpuMetric.getThrottledCpuTime(); long memoryTotalBytesUsed = memoryTotalBytesUsage - memoryTotalBytesCache; double memoryUsageRatio = (double) memoryTotalBytesUsed / memoryTotalBytes; @@ -562,7 +563,8 @@ public class NodeAgentImpl implements NodeAgent { .withMetric("mem_total.util", 100 * memoryTotalUsageRatio) .withMetric("cpu.util", 100 * cpuUsageRatioOfAllocated) .withMetric("cpu.sys.util", 100 * cpuKernelUsageRatioOfAllocated) - .withMetric("cpu.throttled_time.ns", cpuThrottledTime) + .withMetric("cpu.throttled_time", cpuThrottledTime) + .withMetric("cpu.throttled_cpu_time", cpuThrottledCpuTime) .withMetric("cpu.vcpus", node.vcpus()) .withMetric("disk.limit", diskTotalBytes); @@ -620,26 +622,35 @@ public class NodeAgentImpl implements NodeAgent { } class CpuUsageReporter { + private static final double BILLION = 1_000_000_000d; private long containerKernelUsage = 0; private long totalContainerUsage = 0; private long totalSystemUsage = 0; private long throttledTime = 0; + private long throttlingActivePeriods = 0; + private long throttledPeriods = 0; private long deltaContainerKernelUsage; private long deltaContainerUsage; private long deltaSystemUsage; private long deltaThrottledTime; + private long deltaThrottlingActivePeriods; + private long deltaThrottledPeriods; private void updateCpuDeltas(ContainerStats.CpuStats cpuStats) { - deltaSystemUsage = this.totalSystemUsage == 0 ? 0 : (cpuStats.getSystemCpuUsage() - this.totalSystemUsage); - deltaContainerUsage = cpuStats.getTotalUsage() - this.totalContainerUsage; - deltaContainerKernelUsage = cpuStats.getUsageInKernelMode() - this.containerKernelUsage; - deltaThrottledTime = cpuStats.getThrottledTime() - this.throttledTime; - - this.totalSystemUsage = cpuStats.getSystemCpuUsage(); - this.totalContainerUsage = cpuStats.getTotalUsage(); - this.containerKernelUsage = cpuStats.getUsageInKernelMode(); - this.throttledTime = cpuStats.getThrottledTime(); + deltaSystemUsage = totalSystemUsage == 0 ? 0 : (cpuStats.getSystemCpuUsage() - totalSystemUsage); + deltaContainerUsage = cpuStats.getTotalUsage() - totalContainerUsage; + deltaContainerKernelUsage = cpuStats.getUsageInKernelMode() - containerKernelUsage; + deltaThrottledTime = cpuStats.getThrottledTime() - throttledTime; + deltaThrottlingActivePeriods = cpuStats.getThrottlingActivePeriods() - throttlingActivePeriods; + deltaThrottledPeriods = cpuStats.getThrottledPeriods() - throttledPeriods; + + totalSystemUsage = cpuStats.getSystemCpuUsage(); + totalContainerUsage = cpuStats.getTotalUsage(); + containerKernelUsage = cpuStats.getUsageInKernelMode(); + throttledTime = cpuStats.getThrottledTime(); + throttlingActivePeriods = cpuStats.getThrottlingActivePeriods(); + throttledPeriods = cpuStats.getThrottledPeriods(); } /** @@ -655,8 +666,12 @@ public class NodeAgentImpl implements NodeAgent { return deltaSystemUsage == 0 ? Double.NaN : (double) deltaContainerKernelUsage / deltaSystemUsage; } - Long getThrottledTime() { - return deltaSystemUsage == 0 ? null : deltaThrottledTime; + double getThrottledTime() { + return deltaSystemUsage == 0 ? Double.NaN : 60d * deltaThrottledPeriods / deltaThrottlingActivePeriods; + } + + double getThrottledCpuTime() { + return deltaSystemUsage == 0 ? Double.NaN : deltaThrottledTime / BILLION; } } diff --git a/node-admin/src/test/resources/docker.stats.json b/node-admin/src/test/resources/docker.stats.json index ff4a2fde943..5b42d9a2428 100644 --- a/node-admin/src/test/resources/docker.stats.json +++ b/node-admin/src/test/resources/docker.stats.json @@ -18,9 +18,9 @@ }, "system_cpu_usage":5876874910000000, "throttling_data":{ - "periods":3212, - "throttled_periods":322, - "throttled_time":4490 + "periods":820694, + "throttled_periods":177731, + "throttled_time":81891944744550 } }, "cpu_stats":{ @@ -41,9 +41,9 @@ }, "system_cpu_usage":5876882680000000, "throttling_data":{ - "periods":3242, - "throttled_periods":332, - "throttled_time":4523 + "periods":821264, + "throttled_periods":178201, + "throttled_time":82181944744550 } }, "memory_stats":{ diff --git a/node-admin/src/test/resources/expected.container.system.metrics.txt b/node-admin/src/test/resources/expected.container.system.metrics.txt index 5400edfec65..4876466d8ca 100644 --- a/node-admin/src/test/resources/expected.container.system.metrics.txt +++ b/node-admin/src/test/resources/expected.container.system.metrics.txt @@ -10,7 +10,8 @@ s: }, "metrics": { "cpu.sys.util": 3.402, - "cpu.throttled_time.ns": 33, + "cpu.throttled_cpu_time": 290.0, + "cpu.throttled_time": 49.473, "cpu.util": 5.4, "cpu.vcpus": 2.0, "disk.limit": 250000000000, |