summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2017-11-27 12:41:56 +0100
committerGitHub <noreply@github.com>2017-11-27 12:41:56 +0100
commit3d3d44e357246d9f23c3face130729a52c0fb37b (patch)
treefbb03b0f6087a475959af810c6fc90c4216c047e
parent0b029516b14ccf7a8c15579117b78bdab8ba43a8 (diff)
parent5cc155fbac9306527b2451836ffad1ac74555046 (diff)
Merge pull request #4277 from vespa-engine/freva/docker-container-kernel-usage
Freva/docker container kernel usage
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java57
-rw-r--r--node-admin/src/test/resources/docker.stats.json2
-rw-r--r--node-admin/src/test/resources/expected.container.system.metrics.txt1
4 files changed, 43 insertions, 19 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java
index cafdf83608f..3db09f6b566 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java
@@ -11,6 +11,7 @@ import java.util.Set;
@SuppressWarnings("UnusedDeclaration") // Used by model amenders
public class SystemMetrics {
public static final String CPU_UTIL = "cpu.util";
+ public static final String CPU_SYS_UTIL = "cpu.sys.util";
public static final String DISK_LIMIT = "disk.limit";
public static final String DISK_USED = "disk.used";
public static final String DISK_UTIL = "disk.util";
@@ -23,6 +24,7 @@ public class SystemMetrics {
private static MetricSet createSystemMetricSet() {
Set<Metric> dockerNodeMetrics =
ImmutableSet.of(new Metric(CPU_UTIL),
+ new Metric(CPU_SYS_UTIL),
new Metric(DISK_LIMIT),
new Metric(DISK_USED),
new Metric(DISK_UTIL),
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 09eb14039e8..d01692f1f05 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -524,6 +524,7 @@ public class NodeAgentImpl implements NodeAgent {
Docker.ContainerStats stats = containerStats.get();
final String APP = MetricReceiverWrapper.APPLICATION_NODE;
final int totalNumCpuCores = ((List<Number>) ((Map) stats.getCpuStats().get("cpu_usage")).get("percpu_usage")).size();
+ final long cpuContainerKernelTime = ((Number) ((Map) stats.getCpuStats().get("cpu_usage")).get("usage_in_kernelmode")).longValue();
final long cpuContainerTotalTime = ((Number) ((Map) stats.getCpuStats().get("cpu_usage")).get("total_usage")).longValue();
final long cpuSystemTotalTime = ((Number) stats.getCpuStats().get("system_cpu_usage")).longValue();
final long memoryTotalBytes = ((Number) stats.getMemoryStats().get("limit")).longValue();
@@ -532,26 +533,28 @@ public class NodeAgentImpl implements NodeAgent {
final long diskTotalBytes = (long) (nodeSpec.minDiskAvailableGb * BYTES_IN_GB);
final Optional<Long> diskTotalBytesUsed = storageMaintainer.getDiskUsageFor(containerName);
- // CPU usage by a container as percentage of total host CPU, cpuPercentageOfHost, is given by dividing used
- // CPU time by the container with CPU time used by the entire system.
- // CPU usage by a container as percentage of total CPU allocated to it is given by dividing the
- // cpuPercentageOfHost with the ratio of container minCpuCores by total number of CPU cores.
- double cpuPercentageOfHost = lastCpuMetric.getCpuUsagePercentage(cpuContainerTotalTime, cpuSystemTotalTime);
- double cpuPercentageOfAllocated = totalNumCpuCores * cpuPercentageOfHost / nodeSpec.minCpuCores;
+ lastCpuMetric.updateCpuDeltas(cpuSystemTotalTime, cpuContainerTotalTime, cpuContainerKernelTime);
+
+ // Ratio of CPU cores allocated to this container to total number of CPU cores on this host
+ final double allocatedCpuRatio = nodeSpec.minCpuCores / totalNumCpuCores;
+ double cpuUsageRatioOfAllocated = lastCpuMetric.getCpuUsageRatio() / allocatedCpuRatio;
+ double cpuKernelUsageRatioOfAllocated = lastCpuMetric.getCpuKernelUsageRatio() / allocatedCpuRatio;
+
long memoryTotalBytesUsed = memoryTotalBytesUsage - memoryTotalBytesCache;
- double memoryPercentUsed = 100.0 * memoryTotalBytesUsed / memoryTotalBytes;
- Optional<Double> diskPercentUsed = diskTotalBytesUsed.map(used -> 100.0 * used / diskTotalBytes);
+ double memoryUsageRatio = (double) memoryTotalBytesUsed / memoryTotalBytes;
+ Optional<Double> diskUsageRatio = diskTotalBytesUsed.map(used -> (double) used / diskTotalBytes);
List<DimensionMetrics> metrics = new ArrayList<>();
DimensionMetrics.Builder systemMetricsBuilder = new DimensionMetrics.Builder(APP, dimensions)
.withMetric("mem.limit", memoryTotalBytes)
.withMetric("mem.used", memoryTotalBytesUsed)
- .withMetric("mem.util", memoryPercentUsed)
- .withMetric("cpu.util", cpuPercentageOfAllocated)
+ .withMetric("mem.util", 100 * memoryUsageRatio)
+ .withMetric("cpu.util", 100 * cpuUsageRatioOfAllocated)
+ .withMetric("cpu.sys.util", 100 * cpuKernelUsageRatioOfAllocated)
.withMetric("disk.limit", diskTotalBytes);
diskTotalBytesUsed.ifPresent(diskUsed -> systemMetricsBuilder.withMetric("disk.used", diskUsed));
- diskPercentUsed.ifPresent(diskUtil -> systemMetricsBuilder.withMetric("disk.util", diskUtil));
+ diskUsageRatio.ifPresent(diskRatio -> systemMetricsBuilder.withMetric("disk.util", 100 * diskRatio));
metrics.add(systemMetricsBuilder.build());
stats.getNetworks().forEach((interfaceName, interfaceStats) -> {
@@ -612,17 +615,35 @@ public class NodeAgentImpl implements NodeAgent {
}
class CpuUsageReporter {
+ private long containerKernelUsage = 0;
private long totalContainerUsage = 0;
private long totalSystemUsage = 0;
- double getCpuUsagePercentage(long currentContainerUsage, long currentSystemUsage) {
- long deltaSystemUsage = currentSystemUsage - totalSystemUsage;
- double cpuUsagePct = (deltaSystemUsage == 0 || totalSystemUsage == 0) ?
- 0 : 100.0 * (currentContainerUsage - totalContainerUsage) / deltaSystemUsage;
+ private long deltaContainerKernelUsage;
+ private long deltaContainerUsage;
+ private long deltaSystemUsage;
+
+ private void updateCpuDeltas(long totalSystemUsage, long totalContainerUsage, long containerKernelUsage) {
+ deltaSystemUsage = totalSystemUsage - this.totalSystemUsage;
+ deltaContainerUsage = totalContainerUsage - this.totalContainerUsage;
+ deltaContainerKernelUsage = containerKernelUsage - this.containerKernelUsage;
+
+ this.totalSystemUsage = totalSystemUsage;
+ this.totalContainerUsage = totalContainerUsage;
+ this.containerKernelUsage = containerKernelUsage;
+ }
+
+ /**
+ * Returns the CPU usage ratio for the docker container that this NodeAgent is managing
+ * in the time between the last two times updateCpuDeltas() was called. This is calculated
+ * by dividing the CPU time used by the container with the CPU time used by the entire system.
+ */
+ double getCpuUsageRatio() {
+ return deltaSystemUsage == 0 ? 0 : (double) deltaContainerUsage / deltaSystemUsage;
+ }
- totalContainerUsage = currentContainerUsage;
- totalSystemUsage = currentSystemUsage;
- return cpuUsagePct;
+ double getCpuKernelUsageRatio() {
+ return deltaSystemUsage == 0 ? 0 : (double) deltaContainerKernelUsage / deltaSystemUsage;
}
}
diff --git a/node-admin/src/test/resources/docker.stats.json b/node-admin/src/test/resources/docker.stats.json
index 3b1087b9202..ff4a2fde943 100644
--- a/node-admin/src/test/resources/docker.stats.json
+++ b/node-admin/src/test/resources/docker.stats.json
@@ -36,7 +36,7 @@
44567860460,
39049895962
],
- "usage_in_kernelmode":44050000000,
+ "usage_in_kernelmode":44106083850,
"usage_in_usermode":158950000000
},
"system_cpu_usage":5876882680000000,
diff --git a/node-admin/src/test/resources/expected.container.system.metrics.txt b/node-admin/src/test/resources/expected.container.system.metrics.txt
index 8a4d696b08e..023d3958c60 100644
--- a/node-admin/src/test/resources/expected.container.system.metrics.txt
+++ b/node-admin/src/test/resources/expected.container.system.metrics.txt
@@ -11,6 +11,7 @@ s:
"mem.limit": 4294967296,
"mem.used": 1073741824,
"disk.used": 39625000000,
+ "cpu.sys.util": 3.402,
"disk.util": 15.85,
"cpu.util": 5.4,
"mem.util": 25.0,