summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorvalerijf <valerijf@yahoo-inc.com>2017-06-20 13:41:01 +0200
committervalerijf <valerijf@yahoo-inc.com>2017-06-20 13:42:33 +0200
commit5ed3a292967e03d32cb9e4d59803b169deb0d253 (patch)
treee8e5e1d0df3930f7f37706426ec60774739e4c9b /node-admin
parente1204bb76fab8e181d49a1d69d8247d319be253d (diff)
Fix cpu util calculation to work with multi-flavors and without ready nodes
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java21
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java8
4 files changed, 23 insertions, 10 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
index eaae5030b50..84679d1dadd 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
@@ -83,7 +83,7 @@ public class NodeAdminImpl implements NodeAdmin {
metricsScheduler.scheduleAtFixedRate(() -> {
try {
- nodeAgents.values().forEach(nodeAgent -> nodeAgent.updateContainerNodeMetrics(nodeAgents.size()));
+ nodeAgents.values().forEach(NodeAgent::updateContainerNodeMetrics);
} catch (Throwable e) {
logger.warning("Metric fetcher scheduler failed", e);
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
index b0facdec09d..5d31c10fcc1 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
@@ -41,7 +41,7 @@ public interface NodeAgent {
/**
* Updates metric receiver with the latest node-agent stats
*/
- void updateContainerNodeMetrics(int numAllocatedContainersOnHost);
+ void updateContainerNodeMetrics();
String getHostname();
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 08fae2b707a..6145e2e372e 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -501,7 +501,7 @@ public class NodeAgentImpl implements NodeAgent {
}
@SuppressWarnings("unchecked")
- public void updateContainerNodeMetrics(int numAllocatedContainersOnHost) {
+ public void updateContainerNodeMetrics() {
final ContainerNodeSpec nodeSpec = lastNodeSpec;
if (nodeSpec == null || containerState == ABSENT) return;
@@ -527,11 +527,14 @@ public class NodeAgentImpl implements NodeAgent {
final Optional<Long> diskTotalBytesUsed = storageMaintainer.flatMap(maintainer -> maintainer
.updateIfNeededAndGetDiskMetricsFor(containerName));
- // CPU usage by a container is given by dividing used CPU time by the container with CPU time used by the entire
- // system. Because each container is allocated same amount of CPU shares, no container should use more than 1/n
- // of the total CPU time, where n is the number of running containers.
+ // CPU usage by a container as percentage of total host CPU, cpuPercentageOfHost, is given by dividing used
+ // CPU time by the container with CPU time used by the entire system.
+ // CPU usage by a container as percentage of total CPU allocated to it is given by dividing the
+ // cpuPercentageOfHost with the ratio of container resources over total host resources. This calculation
+ // assumes that the ratio between container and host resources for disk, memory, and cpu is roughly equal
+ // and therefore only calculates the ratio of container memory against host memory.
double cpuPercentageOfHost = lastCpuMetric.getCpuUsagePercentage(cpuContainerTotalTime, cpuSystemTotalTime);
- double cpuPercentageOfAllocated = numAllocatedContainersOnHost * cpuPercentageOfHost;
+ double cpuPercentageOfAllocated = getInverseContainerShareOfHost(nodeSpec) * cpuPercentageOfHost;
long memoryTotalBytesUsed = memoryTotalBytesUsage - memoryTotalBytesCache;
double memoryPercentUsed = 100.0 * memoryTotalBytesUsed / memoryTotalBytes;
Optional<Double> diskPercentUsed = diskTotalBytes.flatMap(total -> diskTotalBytesUsed.map(used -> 100.0 * used / total));
@@ -603,6 +606,14 @@ public class NodeAgentImpl implements NodeAgent {
return temp;
}
+ private double getInverseContainerShareOfHost(ContainerNodeSpec nodeSpec) {
+ return nodeSpec.minMainMemoryAvailableGb
+ .map(memory -> {
+ double hostMemory = storageMaintainer.map(StorageMaintainer::getHostTotalMemoryGb).orElse(0d);
+ return hostMemory / memory;
+ }).orElse(0d);
+ }
+
class CpuUsageReporter {
private long totalContainerUsage = 0;
private long totalSystemUsage = 0;
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index 9114561346f..d2a90abaffb 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -518,18 +518,20 @@ public class NodeAgentImplTest {
.vespaVersion(vespaVersion)
.owner(owner)
.membership(membership)
+ .minMainMemoryAvailableGb(2)
.build();
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getContainerNodeSpec(eq(hostName))).thenReturn(Optional.of(nodeSpec));
when(storageMaintainer.updateIfNeededAndGetDiskMetricsFor(eq(containerName))).thenReturn(Optional.of(42547019776L));
+ when(storageMaintainer.getHostTotalMemoryGb()).thenReturn(10d);
when(dockerOperations.getContainerStats(eq(containerName)))
.thenReturn(Optional.of(stats1))
.thenReturn(Optional.of(stats2));
nodeAgent.converge(); // Run the converge loop once to initialize lastNodeSpec
- nodeAgent.updateContainerNodeMetrics(5); // Update metrics once to init and lastCpuMetric
+ nodeAgent.updateContainerNodeMetrics(); // Update metrics once to init and lastCpuMetric
clock.advance(Duration.ofSeconds(1234));
@@ -552,7 +554,7 @@ public class NodeAgentImplTest {
return null;
}).when(dockerOperations).executeCommandInContainerAsRoot(any(), any(), anyVararg());
- nodeAgent.updateContainerNodeMetrics(5);
+ nodeAgent.updateContainerNodeMetrics();
}
@Test
@@ -568,7 +570,7 @@ public class NodeAgentImplTest {
nodeAgent.converge(); // Run the converge loop once to initialize lastNodeSpec
- nodeAgent.updateContainerNodeMetrics(5);
+ nodeAgent.updateContainerNodeMetrics();
Set<Map<String, Object>> actualMetrics = metricReceiver.getAllMetricsRaw();
assertEquals(Collections.emptySet(), actualMetrics);