summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorvalerijf <valerijf@yahoo-inc.com>2017-03-15 13:24:48 +0100
committervalerijf <valerijf@yahoo-inc.com>2017-03-15 13:24:48 +0100
commit4edb40a2037bdc770026372279661c66ba3748bc (patch)
tree4be3ef3aad993c1386b29e64920684c909857149 /node-admin
parent910907eb531275a5009dac4aebf6e4b4ef2934dc (diff)
Update CPU usage metric
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java10
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java12
-rw-r--r--node-admin/src/test/resources/docker.stats.metrics.expected.json2
5 files changed, 18 insertions, 10 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
index 35ef4072d10..ad9352896d6 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
@@ -74,7 +74,7 @@ public class NodeAdminImpl implements NodeAdmin {
scheduler.scheduleWithFixedDelay(() -> {
try {
- nodeAgents.values().forEach(NodeAgent::updateContainerNodeMetrics);
+ nodeAgents.values().forEach(nodeAgent -> nodeAgent.updateContainerNodeMetrics(nodeAgents.size()));
} catch (Throwable e) {
logger.warning("Metric fetcher scheduler failed", e);
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
index e4ea3acef11..dcaa82db81f 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
@@ -64,7 +64,7 @@ public interface NodeAgent {
/**
* Updates metric receiver with the latest node-agent stats
*/
- void updateContainerNodeMetrics();
+ void updateContainerNodeMetrics(int numAllocatedContainersOnHost);
ContainerName getContainerName();
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 7a88bcad024..f91b8d2ad47 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -503,7 +503,7 @@ public class NodeAgentImpl implements NodeAgent {
}
@SuppressWarnings("unchecked")
- public void updateContainerNodeMetrics() {
+ public void updateContainerNodeMetrics(int numAllocatedContainersOnHost) {
ContainerNodeSpec nodeSpec;
synchronized (monitor) {
nodeSpec = lastNodeSpec;
@@ -541,8 +541,12 @@ public class NodeAgentImpl implements NodeAgent {
long currentCpuContainerTotalTime = ((Number) ((Map) stats.getCpuStats().get("cpu_usage")).get("total_usage")).longValue();
long currentCpuSystemTotalTime = ((Number) stats.getCpuStats().get("system_cpu_usage")).longValue();
- double cpuPercentage = lastCpuMetric.getCpuUsagePercentage(currentCpuContainerTotalTime, currentCpuSystemTotalTime);
- metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "node.cpu.busy.pct").sample(cpuPercentage);
+ // CPU usage by a container is given by dividing used CPU time by the container with CPU time used by the entire
+ // system. Because each container is allocated same amount of CPU shares, no container should use more than 1/n
+ // of the total CPU time, where n is the number of running containers.
+ double cpuPercentageOfHost = lastCpuMetric.getCpuUsagePercentage(currentCpuContainerTotalTime, currentCpuSystemTotalTime);
+ double cpuPercentageOfAllocated = numAllocatedContainersOnHost * cpuPercentageOfHost;
+ metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "node.cpu.busy.pct").sample(cpuPercentageOfAllocated);
addIfNotNull(dimensions, "node.cpu.throttled_time", stats.getCpuStats().get("throttling_data"), "throttled_time");
addIfNotNull(dimensions, "node.memory.limit", stats.getMemoryStats(), "limit");
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index 47e59aca4c7..cdb415de945 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -497,10 +497,14 @@ public class NodeAgentImplTest {
long totalContainerCpuTime = (long) ((Map) cpu_stats.get("cpu_usage")).get("total_usage");
long totalSystemCpuTime = (long) cpu_stats.get("system_cpu_usage");
- nodeAgent.lastCpuMetric.getCpuUsagePercentage(totalContainerCpuTime - 456_789_123, (long) (totalSystemCpuTime - 1e9));
- // During the last 10^9 total cpu ns, 456,789,123ns were spent on running the container. That means the expected
- // cpu usage percentage is 100 * (456,789,123 / 10^9) = 45.6789123%
- nodeAgent.updateContainerNodeMetrics();
+ nodeAgent.lastCpuMetric.getCpuUsagePercentage(totalContainerCpuTime - 123_456_789, (long) (totalSystemCpuTime - 1e9));
+ int numAllocatedContainersOnHost = 4;
+ // During the last 10^9 total CPU ns, 123,456,789ns were spent on running the container. That means the container
+ // used 100 * (123,456,789 / 10^9) = 12.3456789% of total system CPU time.
+ // There are a total of 4 allocated nodes on this host, which means that the container only has 100 / 4 = 25%
+ // of total system CPU time at its disposal. Therefore, the expected CPU usage by this container is:
+ // 12.3456789% / 25% = 49.3827156%
+ nodeAgent.updateContainerNodeMetrics(4);
Set<Map<String, Object>> actualMetrics = new HashSet<>();
for (MetricReceiverWrapper.DimensionMetrics dimensionMetrics : metricReceiver.getMetrics(MetricReceiverWrapper.APPLICATION_DOCKER)) {
diff --git a/node-admin/src/test/resources/docker.stats.metrics.expected.json b/node-admin/src/test/resources/docker.stats.metrics.expected.json
index 0845f5acb82..8222c6beca0 100644
--- a/node-admin/src/test/resources/docker.stats.metrics.expected.json
+++ b/node-admin/src/test/resources/docker.stats.metrics.expected.json
@@ -76,7 +76,7 @@
"zone":"dev.us-east-1"
},
"metrics":{
- "node.cpu.busy.pct": 45.6789123,
+ "node.cpu.busy.pct": 49.3827156,
"node.cpu.throttled_time": 4523.0,
"node.memory.usage":1.326026752E9,
"node.memory.limit":4.294967296E9,