summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@oath.com>2017-11-23 14:38:03 +0100
committerValerij Fredriksen <valerijf@oath.com>2017-11-24 15:20:17 +0100
commit090b2faec4fe68689521872db90043665cdf1018 (patch)
tree0c2eb9f2f2c4af3f87bfc2f8e3509d92b50d0238
parent8407446fe4e8df5fa16ef3b019ccb1bea2f87099 (diff)
Add metric for kernel cpu usage in docker container
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java50
-rw-r--r--node-admin/src/test/resources/docker.stats.json2
-rw-r--r--node-admin/src/test/resources/expected.container.system.metrics.txt1
3 files changed, 37 insertions, 16 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 09eb14039e8..b66ef50236c 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -524,6 +524,7 @@ public class NodeAgentImpl implements NodeAgent {
Docker.ContainerStats stats = containerStats.get();
final String APP = MetricReceiverWrapper.APPLICATION_NODE;
final int totalNumCpuCores = ((List<Number>) ((Map) stats.getCpuStats().get("cpu_usage")).get("percpu_usage")).size();
+ final long cpuContainerKernelTime = ((Number) ((Map) stats.getCpuStats().get("cpu_usage")).get("usage_in_kernelmode")).longValue();
final long cpuContainerTotalTime = ((Number) ((Map) stats.getCpuStats().get("cpu_usage")).get("total_usage")).longValue();
final long cpuSystemTotalTime = ((Number) stats.getCpuStats().get("system_cpu_usage")).longValue();
final long memoryTotalBytes = ((Number) stats.getMemoryStats().get("limit")).longValue();
@@ -532,26 +533,32 @@ public class NodeAgentImpl implements NodeAgent {
final long diskTotalBytes = (long) (nodeSpec.minDiskAvailableGb * BYTES_IN_GB);
final Optional<Long> diskTotalBytesUsed = storageMaintainer.getDiskUsageFor(containerName);
+ lastCpuMetric.updateCpuDeltas(cpuSystemTotalTime, cpuContainerTotalTime, cpuContainerKernelTime);
+
// CPU usage by a container as percentage of total host CPU, cpuPercentageOfHost, is given by dividing used
- // CPU time by the container with CPU time used by the entire system.
+ // CPU time used by the container with CPU time used by the entire system.
+ double cpuUsageRatioOfHost = lastCpuMetric.getCpuUsageRatio();
+
// CPU usage by a container as percentage of total CPU allocated to it is given by dividing the
// cpuPercentageOfHost with the ratio of container minCpuCores by total number of CPU cores.
- double cpuPercentageOfHost = lastCpuMetric.getCpuUsagePercentage(cpuContainerTotalTime, cpuSystemTotalTime);
- double cpuPercentageOfAllocated = totalNumCpuCores * cpuPercentageOfHost / nodeSpec.minCpuCores;
+ double cpuUsageRatioOfAllocated = totalNumCpuCores * cpuUsageRatioOfHost / nodeSpec.minCpuCores;
+ double cpuKernelUsageRatioOfAllocated = cpuUsageRatioOfAllocated * lastCpuMetric.getCpuKernelUsageRatio();
+
long memoryTotalBytesUsed = memoryTotalBytesUsage - memoryTotalBytesCache;
- double memoryPercentUsed = 100.0 * memoryTotalBytesUsed / memoryTotalBytes;
- Optional<Double> diskPercentUsed = diskTotalBytesUsed.map(used -> 100.0 * used / diskTotalBytes);
+ double memoryUsageRatio = (double) memoryTotalBytesUsed / memoryTotalBytes;
+ Optional<Double> diskUsageRatio = diskTotalBytesUsed.map(used -> (double) used / diskTotalBytes);
List<DimensionMetrics> metrics = new ArrayList<>();
DimensionMetrics.Builder systemMetricsBuilder = new DimensionMetrics.Builder(APP, dimensions)
.withMetric("mem.limit", memoryTotalBytes)
.withMetric("mem.used", memoryTotalBytesUsed)
- .withMetric("mem.util", memoryPercentUsed)
- .withMetric("cpu.util", cpuPercentageOfAllocated)
+ .withMetric("mem.util", 100 * memoryUsageRatio)
+ .withMetric("cpu.util", 100 * cpuUsageRatioOfAllocated)
+ .withMetric("cpu.sys.util", 100 * cpuKernelUsageRatioOfAllocated)
.withMetric("disk.limit", diskTotalBytes);
diskTotalBytesUsed.ifPresent(diskUsed -> systemMetricsBuilder.withMetric("disk.used", diskUsed));
- diskPercentUsed.ifPresent(diskUtil -> systemMetricsBuilder.withMetric("disk.util", diskUtil));
+ diskUsageRatio.ifPresent(diskRatio -> systemMetricsBuilder.withMetric("disk.util", 100 * diskRatio));
metrics.add(systemMetricsBuilder.build());
stats.getNetworks().forEach((interfaceName, interfaceStats) -> {
@@ -612,17 +619,30 @@ public class NodeAgentImpl implements NodeAgent {
}
class CpuUsageReporter {
+ private long containerKernelUsage = 0;
private long totalContainerUsage = 0;
private long totalSystemUsage = 0;
- double getCpuUsagePercentage(long currentContainerUsage, long currentSystemUsage) {
- long deltaSystemUsage = currentSystemUsage - totalSystemUsage;
- double cpuUsagePct = (deltaSystemUsage == 0 || totalSystemUsage == 0) ?
- 0 : 100.0 * (currentContainerUsage - totalContainerUsage) / deltaSystemUsage;
+ private long deltaContainerKernelUsage;
+ private long deltaContainerUsage;
+ private long deltaSystemUsage;
+
+ private void updateCpuDeltas(long totalSystemUsage, long totalContainerUsage, long containerKernelUsage) {
+ deltaSystemUsage = totalSystemUsage - this.totalSystemUsage;
+ deltaContainerUsage = totalContainerUsage - this.totalContainerUsage;
+ deltaContainerKernelUsage = containerKernelUsage - this.containerKernelUsage;
+
+ this.totalSystemUsage = totalSystemUsage;
+ this.totalContainerUsage = totalContainerUsage;
+ this.containerKernelUsage = containerKernelUsage;
+ }
+
+ double getCpuKernelUsageRatio() {
+ return deltaContainerUsage == 0 ? 0 : (double) deltaContainerKernelUsage / deltaContainerUsage;
+ }
- totalContainerUsage = currentContainerUsage;
- totalSystemUsage = currentSystemUsage;
- return cpuUsagePct;
+ double getCpuUsageRatio() {
+ return deltaSystemUsage == 0 ? 0 : (double) deltaContainerUsage / deltaSystemUsage;
}
}
diff --git a/node-admin/src/test/resources/docker.stats.json b/node-admin/src/test/resources/docker.stats.json
index 3b1087b9202..ff4a2fde943 100644
--- a/node-admin/src/test/resources/docker.stats.json
+++ b/node-admin/src/test/resources/docker.stats.json
@@ -36,7 +36,7 @@
44567860460,
39049895962
],
- "usage_in_kernelmode":44050000000,
+ "usage_in_kernelmode":44106083850,
"usage_in_usermode":158950000000
},
"system_cpu_usage":5876882680000000,
diff --git a/node-admin/src/test/resources/expected.container.system.metrics.txt b/node-admin/src/test/resources/expected.container.system.metrics.txt
index 8a4d696b08e..023d3958c60 100644
--- a/node-admin/src/test/resources/expected.container.system.metrics.txt
+++ b/node-admin/src/test/resources/expected.container.system.metrics.txt
@@ -11,6 +11,7 @@ s:
"mem.limit": 4294967296,
"mem.used": 1073741824,
"disk.used": 39625000000,
+ "cpu.sys.util": 3.402,
"disk.util": 15.85,
"cpu.util": 5.4,
"mem.util": 25.0,