summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-08-24 09:45:00 +0200
committerBjørn Christian Seime <bjorncs@yahooinc.com>2023-08-24 09:50:11 +0200
commit65655cbcadc890bcad907d6cc34995a926c29183 (patch)
treed0a80d02f648ca6981bea39c70e4073c2672fd5a
parenta5b9150a448ed1c30d13c64055a0bc534a1b4a95 (diff)
Add additional metrics for container memory
-rw-r--r--metrics/src/main/java/ai/vespa/metrics/HostedNodeAdminMetrics.java4
-rw-r--r--metrics/src/main/java/ai/vespa/metrics/set/SystemMetrics.java4
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/MemoryController.java17
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java8
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java6
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/cgroup/CgroupTest.java8
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java4
7 files changed, 43 insertions, 8 deletions
diff --git a/metrics/src/main/java/ai/vespa/metrics/HostedNodeAdminMetrics.java b/metrics/src/main/java/ai/vespa/metrics/HostedNodeAdminMetrics.java
index 97185e9c703..a5f21eeba44 100644
--- a/metrics/src/main/java/ai/vespa/metrics/HostedNodeAdminMetrics.java
+++ b/metrics/src/main/java/ai/vespa/metrics/HostedNodeAdminMetrics.java
@@ -21,6 +21,10 @@ public enum HostedNodeAdminMetrics implements VespaMetrics {
MEM_UTIL("mem.util", Unit.PERCENTAGE, "Memory utilisation"),
MEM_TOTAL_USED("mem_total.used", Unit.BYTE, "Total amount of memory used by the node, including OS buffer caches"),
MEM_TOTAL_UTIL("mem_total.util", Unit.PERCENTAGE, "Total memory utilisation"),
+ MEM_SOCK("mem.sock", Unit.BYTE, "Amount of memory used in network transmission buffers"),
+ MEM_SLAB_RECLAIMABLE("mem.slab_reclaimable", Unit.BYTE, "Amount of 'slab' that might be reclaimed"),
+ MEM_SLAB("mem.slab", Unit.BYTE, "Amount of memory used for storing in-kernel data structures"),
+ MEM_ANON("mem.anon", Unit.BYTE, "Amount of memory used in anonymous mappings"),
GPU_UTIL("gpu.util", Unit.PERCENTAGE, "GPU utilisation"),
GPU_MEM_USED("gpu.memory.used", Unit.BYTE, "GPU memory used"),
GPU_MEM_TOTAL("gpu.memory.total", Unit.BYTE, "GPU memory available"),
diff --git a/metrics/src/main/java/ai/vespa/metrics/set/SystemMetrics.java b/metrics/src/main/java/ai/vespa/metrics/set/SystemMetrics.java
index 0560daebc43..a86deb3830b 100644
--- a/metrics/src/main/java/ai/vespa/metrics/set/SystemMetrics.java
+++ b/metrics/src/main/java/ai/vespa/metrics/set/SystemMetrics.java
@@ -30,6 +30,10 @@ public class SystemMetrics {
new Metric(HostedNodeAdminMetrics.MEM_UTIL.baseName()),
new Metric(HostedNodeAdminMetrics.MEM_TOTAL_USED.baseName()),
new Metric(HostedNodeAdminMetrics.MEM_TOTAL_UTIL.baseName()),
+ new Metric(HostedNodeAdminMetrics.MEM_SOCK.baseName()),
+ new Metric(HostedNodeAdminMetrics.MEM_SLAB_RECLAIMABLE.baseName()),
+ new Metric(HostedNodeAdminMetrics.MEM_SLAB.baseName()),
+ new Metric(HostedNodeAdminMetrics.MEM_ANON.baseName()),
new Metric(HostedNodeAdminMetrics.GPU_UTIL.baseName()),
new Metric(HostedNodeAdminMetrics.GPU_MEM_USED.baseName()),
new Metric(HostedNodeAdminMetrics.GPU_MEM_TOTAL.baseName())
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/MemoryController.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/MemoryController.java
index 840cd025917..3dc32e14a9d 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/MemoryController.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/MemoryController.java
@@ -31,9 +31,11 @@ public class MemoryController {
return cgroup.readIfExists("memory.current").map(Size::from);
}
- /** @return Number of bytes used to cache filesystem data, including tmpfs and shared memory. */
- public Size readFileSystemCache() {
- return Size.from(readField(cgroup.readLines("memory.stat"), "file"));
+ public Stats readStat() {
+ var lines = cgroup.readLines("memory.stat");
+ return new Stats(
+ Size.from(readField(lines, "file")), Size.from(readField(lines, "sock")), Size.from(readField(lines, "slab")),
+ Size.from(readField(lines, "slab_reclaimable")), Size.from(readField(lines, "anon")));
}
private static String readField(List<String> lines, String fieldName) {
@@ -45,4 +47,13 @@ public class MemoryController {
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("No such field: " + fieldName));
}
+
+ /**
+ * @param file Number of bytes used to cache filesystem data, including tmpfs and shared memory.
+ * @param sock Amount of memory used in network transmission buffers.
+ * @param slab Amount of memory used for storing in-kernel data structures.
+ * @param slabReclaimable Part of "slab" that might be reclaimed, such as dentries and inodes.
+ * @param anon Amount of memory used in anonymous mappings such as brk(), sbrk(), and mmap(MAP_ANONYMOUS).
+ */
+ public record Stats(Size file, Size sock, Size slab, Size slabReclaimable, Size anon) {}
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java
index a5606784c12..1c02072ed2b 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java
@@ -44,8 +44,14 @@ public record ContainerStats(Map<String, NetworkStats> networks,
* @param cache memory used by cache in bytes
* @param usage memory usage in bytes
* @param limit memory limit in bytes
+ * @param sock network transmission buffers in bytes
+ * @param slab in-kernel data structures in bytes
+ * @param slabReclaimable part of "slab" that might be reclaimed in bytes
+ * @param anon anonymous mappings in bytes
*/
- public record MemoryStats(long cache, long usage, long limit) {}
+ public record MemoryStats(long cache, long usage, long limit, long sock, long slab, long slabReclaimable, long anon) {
+ public MemoryStats(long cache, long usage, long limit) { this(cache, usage, limit, 0, 0, 0, 0); }
+ }
/**
* Statistics for CPU usage
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java
index 8244666f9e0..0e16e2cabf6 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java
@@ -105,8 +105,10 @@ class ContainerStatsCollector {
MemoryController memoryController = rootCgroup.resolveContainer(containerId).memory();
Size max = memoryController.readMax();
long memoryUsageInBytes = memoryController.readCurrent().value();
- long cachedInBytes = memoryController.readFileSystemCache().value();
- return new ContainerStats.MemoryStats(cachedInBytes, memoryUsageInBytes, max.isMax() ? -1 : max.value());
+ var stats = memoryController.readStat();
+ return new ContainerStats.MemoryStats(
+ stats.file().value(), memoryUsageInBytes, max.isMax() ? -1 : max.value(),
+ stats.sock().value(), stats.slab().value(), stats.slabReclaimable().value(), stats.anon().value());
}
private ContainerStats.NetworkStats collectNetworkStats(String iface, int containerPid) throws IOException {
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/cgroup/CgroupTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/cgroup/CgroupTest.java
index c93d90329f7..dd81ea8e76a 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/cgroup/CgroupTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/cgroup/CgroupTest.java
@@ -106,8 +106,14 @@ public class CgroupTest {
shmem 8380416
file_mapped 1081344
file_dirty 135168
+ slab_reclaimable 1424320
""");
- assertEquals(69206016L, containerCgroup.memory().readFileSystemCache().value());
+ var stats = containerCgroup.memory().readStat();
+ assertEquals(69206016L, stats.file().value());
+ assertEquals(3481600L, stats.anon().value());
+ assertEquals(3552304L, stats.slab().value());
+ assertEquals(73728L, stats.sock().value());
+ assertEquals(1424320L, stats.slabReclaimable().value());
}
@Test
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java
index d4598c8923f..2990e881640 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.hosted.node.admin.container;
import com.yahoo.vespa.hosted.node.admin.cgroup.Cgroup;
+import com.yahoo.vespa.hosted.node.admin.cgroup.MemoryController;
import com.yahoo.vespa.hosted.node.admin.cgroup.Size;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
@@ -97,7 +98,8 @@ public class ContainerStatsCollectorTest {
private void mockMemoryStats(ContainerId containerId) {
when(cgroup.resolveContainer(eq(containerId)).memory().readCurrent()).thenReturn(Size.from(1228017664L));
when(cgroup.resolveContainer(eq(containerId)).memory().readMax()).thenReturn(Size.from(2147483648L));
- when(cgroup.resolveContainer(eq(containerId)).memory().readFileSystemCache()).thenReturn(Size.from(470790144L));
+ when(cgroup.resolveContainer(eq(containerId)).memory().readStat()).thenReturn(
+ new MemoryController.Stats(Size.from(470790144L), Size.from(0), Size.from(0), Size.from(0), Size.from(0)));
}
private void mockCpuStats(ContainerId containerId) throws IOException {