diff options
author | valerijf <valerijf@yahoo-inc.com> | 2017-06-20 13:41:01 +0200 |
---|---|---|
committer | valerijf <valerijf@yahoo-inc.com> | 2017-06-20 13:42:33 +0200 |
commit | 5ed3a292967e03d32cb9e4d59803b169deb0d253 (patch) | |
tree | e8e5e1d0df3930f7f37706426ec60774739e4c9b /node-admin | |
parent | e1204bb76fab8e181d49a1d69d8247d319be253d (diff) |
Fix cpu util calculation to work with multi-flavors and without ready nodes
Diffstat (limited to 'node-admin')
4 files changed, 23 insertions, 10 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index eaae5030b50..84679d1dadd 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -83,7 +83,7 @@ public class NodeAdminImpl implements NodeAdmin { metricsScheduler.scheduleAtFixedRate(() -> { try { - nodeAgents.values().forEach(nodeAgent -> nodeAgent.updateContainerNodeMetrics(nodeAgents.size())); + nodeAgents.values().forEach(NodeAgent::updateContainerNodeMetrics); } catch (Throwable e) { logger.warning("Metric fetcher scheduler failed", e); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java index b0facdec09d..5d31c10fcc1 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java @@ -41,7 +41,7 @@ public interface NodeAgent { /** * Updates metric receiver with the latest node-agent stats */ - void updateContainerNodeMetrics(int numAllocatedContainersOnHost); + void updateContainerNodeMetrics(); String getHostname(); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 08fae2b707a..6145e2e372e 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -501,7 +501,7 @@ public class NodeAgentImpl implements NodeAgent { } @SuppressWarnings("unchecked") - public void updateContainerNodeMetrics(int numAllocatedContainersOnHost) { + public void updateContainerNodeMetrics() { final ContainerNodeSpec nodeSpec = lastNodeSpec; if (nodeSpec == null || containerState == ABSENT) return; @@ -527,11 +527,14 @@ public class NodeAgentImpl implements NodeAgent { final Optional<Long> diskTotalBytesUsed = storageMaintainer.flatMap(maintainer -> maintainer .updateIfNeededAndGetDiskMetricsFor(containerName)); - // CPU usage by a container is given by dividing used CPU time by the container with CPU time used by the entire - // system. Because each container is allocated same amount of CPU shares, no container should use more than 1/n - // of the total CPU time, where n is the number of running containers. + // CPU usage by a container as percentage of total host CPU, cpuPercentageOfHost, is given by dividing used + // CPU time by the container with CPU time used by the entire system. + // CPU usage by a container as percentage of total CPU allocated to it is given by dividing the + // cpuPercentageOfHost with the ratio of container resources over total host resources. This calculation + // assumes that the ratio between container and host resources for disk, memory, and cpu is roughly equal + // and therefore only calculates the ratio of container memory against host memory. double cpuPercentageOfHost = lastCpuMetric.getCpuUsagePercentage(cpuContainerTotalTime, cpuSystemTotalTime); - double cpuPercentageOfAllocated = numAllocatedContainersOnHost * cpuPercentageOfHost; + double cpuPercentageOfAllocated = getInverseContainerShareOfHost(nodeSpec) * cpuPercentageOfHost; long memoryTotalBytesUsed = memoryTotalBytesUsage - memoryTotalBytesCache; double memoryPercentUsed = 100.0 * memoryTotalBytesUsed / memoryTotalBytes; Optional<Double> diskPercentUsed = diskTotalBytes.flatMap(total -> diskTotalBytesUsed.map(used -> 100.0 * used / total)); @@ -603,6 +606,14 @@ public class NodeAgentImpl implements NodeAgent { return temp; } + private double getInverseContainerShareOfHost(ContainerNodeSpec nodeSpec) { + return nodeSpec.minMainMemoryAvailableGb + .map(memory -> { + double hostMemory = storageMaintainer.map(StorageMaintainer::getHostTotalMemoryGb).orElse(0d); + return hostMemory / memory; + }).orElse(0d); + } + class CpuUsageReporter { private long totalContainerUsage = 0; private long totalSystemUsage = 0; diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index 9114561346f..d2a90abaffb 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -518,18 +518,20 @@ public class NodeAgentImplTest { .vespaVersion(vespaVersion) .owner(owner) .membership(membership) + .minMainMemoryAvailableGb(2) .build(); NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); when(nodeRepository.getContainerNodeSpec(eq(hostName))).thenReturn(Optional.of(nodeSpec)); when(storageMaintainer.updateIfNeededAndGetDiskMetricsFor(eq(containerName))).thenReturn(Optional.of(42547019776L)); + when(storageMaintainer.getHostTotalMemoryGb()).thenReturn(10d); when(dockerOperations.getContainerStats(eq(containerName))) .thenReturn(Optional.of(stats1)) .thenReturn(Optional.of(stats2)); nodeAgent.converge(); // Run the converge loop once to initialize lastNodeSpec - nodeAgent.updateContainerNodeMetrics(5); // Update metrics once to init and lastCpuMetric + nodeAgent.updateContainerNodeMetrics(); // Update metrics once to init and lastCpuMetric clock.advance(Duration.ofSeconds(1234)); @@ -552,7 +554,7 @@ public class NodeAgentImplTest { return null; }).when(dockerOperations).executeCommandInContainerAsRoot(any(), any(), anyVararg()); - nodeAgent.updateContainerNodeMetrics(5); + nodeAgent.updateContainerNodeMetrics(); } @Test @@ -568,7 +570,7 @@ public class NodeAgentImplTest { nodeAgent.converge(); // Run the converge loop once to initialize lastNodeSpec - nodeAgent.updateContainerNodeMetrics(5); + nodeAgent.updateContainerNodeMetrics(); Set<Map<String, Object>> actualMetrics = metricReceiver.getAllMetricsRaw(); assertEquals(Collections.emptySet(), actualMetrics); |