diff options
author | Valerij Fredriksen <valerijf@yahooinc.com> | 2021-10-28 14:39:12 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@yahooinc.com> | 2021-10-28 14:39:12 +0200 |
commit | 2db9b00be157a7d4fdc29937cd0f3c5485c9128c (patch) | |
tree | 5b1f7f4fb409429f7cf8ccc463db76fc8b927e61 /node-admin/src/main | |
parent | 9a5d47e2098a595dc81c013930fb9fe8089bd9e1 (diff) |
Support CGroups v2
Diffstat (limited to 'node-admin/src/main')
8 files changed, 388 insertions, 186 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java new file mode 100644 index 00000000000..d194198b2d7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java @@ -0,0 +1,83 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.collections.Pair; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalInt; + +import static com.yahoo.vespa.hosted.node.admin.container.ContainerStatsCollector.userHzToMicroSeconds; + +/** + * Read and write interface to the CGroup of a podman container. + * + * @author freva + */ +public interface CGroup { + + Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId); + OptionalInt cpuShares(ContainerId containerId); + + boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs); + boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares); + + Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException; + + /** @return Maximum amount of memory that can be used by the cgroup and its descendants. */ + long memoryLimitInBytes(ContainerId containerId) throws IOException; + + /** @return The total amount of memory currently being used by the cgroup and its descendants. */ + long memoryUsageInBytes(ContainerId containerId) throws IOException; + + /** @return Number of bytes used to cache filesystem data, including tmpfs and shared memory. */ + long memoryCacheInBytes(ContainerId containerId) throws IOException; + + enum CpuStatField { + TOTAL_USAGE_USEC(null/* in a dedicated file */, "usage_usec"), + USER_USAGE_USEC("user", "user_usec"), + SYSTEM_USAGE_USEC("system", "system_usec"), + TOTAL_PERIODS("nr_periods", "nr_periods"), + THROTTLED_PERIODS("nr_throttled", "nr_throttled"), + THROTTLED_TIME_USEC("throttled_time", "throttled_usec"); + + private final String v1Name; + private final String v2Name; + CpuStatField(String v1Name, String v2Name) { + this.v1Name = v1Name; + this.v2Name = v2Name; + } + + long parseValueV1(String value) { + long longValue = Long.parseLong(value); + switch (this) { + case THROTTLED_TIME_USEC: + case TOTAL_USAGE_USEC: + return longValue / 1000; // Value in ns + case USER_USAGE_USEC: + case SYSTEM_USAGE_USEC: + return userHzToMicroSeconds(longValue); + default: return longValue; + } + } + + long parseValueV2(String value) { + return Long.parseLong(value); + } + + static Optional<CpuStatField> fromV1Field(String name) { + return Arrays.stream(values()) + .filter(field -> name.equals(field.v1Name)) + .findFirst(); + } + + static Optional<CpuStatField> fromV2Field(String name) { + return Arrays.stream(values()) + .filter(field -> name.equals(field.v2Name)) + .findFirst(); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java new file mode 100644 index 00000000000..62e70ca26aa --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java @@ -0,0 +1,129 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.collections.Pair; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; + +import java.io.IOException; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalInt; +import java.util.logging.Logger; +import java.util.stream.Stream; + +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.parseLong; + +/** + * Read and write interface to the CGroup V1 of a Podman container. + * + * @see <a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/index.html">CGroups V1</a> + * @author freva + */ +public class CGroupV1 implements CGroup { + + private static final Logger logger = Logger.getLogger(CGroupV1.class.getName()); + + private final FileSystem fileSystem; + + public CGroupV1(FileSystem fileSystem) { + this.fileSystem = fileSystem; + } + + @Override + public Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId) { + OptionalInt quota = readCgroupsCpuInt(cfsQuotaPath(containerId)); + if (quota.isEmpty() || quota.getAsInt() < 0) return Optional.empty(); + OptionalInt period = readCgroupsCpuInt(cfsPeriodPath(containerId)); + if (period.isEmpty()) return Optional.empty(); + return Optional.of(new Pair<>(quota.getAsInt(), period.getAsInt())); + } + + @Override + public OptionalInt cpuShares(ContainerId containerId) { + return readCgroupsCpuInt(sharesPath(containerId)); + } + + @Override + public boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs) { + return writeCgroupsCpuInt(context, cfsQuotaPath(containerId), cpuQuotaUs) | + writeCgroupsCpuInt(context, cfsPeriodPath(containerId), periodUs); + } + + @Override + public boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares) { + return writeCgroupsCpuInt(context, sharesPath(containerId), shares); + } + + @Override + public Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException { + Map<CpuStatField, Long> stats = new HashMap<>(); + stats.put(CpuStatField.TOTAL_USAGE_USEC, parseLong(cpuacctPath(containerId).resolve("cpuacct.usage")) / 1000); + Stream.concat(Files.readAllLines(cpuacctPath(containerId).resolve("cpuacct.stat")).stream(), + Files.readAllLines(cpuacctPath(containerId).resolve("cpu.stat")).stream()) + .forEach(line -> { + String[] parts = line.split("\\s+"); + if (parts.length != 2) return; + CpuStatField.fromV1Field(parts[0]).ifPresent(field -> stats.put(field, field.parseValueV1(parts[1]))); + }); + return stats; + } + + @Override + public long memoryLimitInBytes(ContainerId containerId) throws IOException { + return parseLong(memoryPath(containerId).resolve("memory.limit_in_bytes")); + } + + @Override + public long memoryUsageInBytes(ContainerId containerId) throws IOException { + return parseLong(memoryPath(containerId).resolve("memory.usage_in_bytes")); + } + + @Override + public long memoryCacheInBytes(ContainerId containerId) throws IOException { + return parseLong(memoryPath(containerId).resolve("memory.stat"), "cache"); + } + + private Path cpuacctPath(ContainerId containerId) { + return fileSystem.getPath("/sys/fs/cgroup/cpuacct/machine.slice/libpod-" + containerId + ".scope"); + } + + private Path cpuPath(ContainerId containerId) { + return fileSystem.getPath("/sys/fs/cgroup/cpu/machine.slice/libpod-" + containerId + ".scope"); + } + + private Path memoryPath(ContainerId containerId) { + return fileSystem.getPath("/sys/fs/cgroup/memory/machine.slice/libpod-" + containerId + ".scope"); + } + + private UnixPath cfsQuotaPath(ContainerId containerId) { + return new UnixPath(cpuPath(containerId).resolve("cpu.cfs_quota_us")); + } + + private UnixPath cfsPeriodPath(ContainerId containerId) { + return new UnixPath(cpuPath(containerId).resolve("cpu.cfs_period_us")); + } + + private UnixPath sharesPath(ContainerId containerId) { + return new UnixPath(cpuPath(containerId).resolve("cpu.shares")); + } + + private static OptionalInt readCgroupsCpuInt(UnixPath unixPath) { + return unixPath.readUtf8FileIfExists() + .map(s -> OptionalInt.of(Integer.parseInt(s.strip()))) + .orElseGet(OptionalInt::empty); + } + + private static boolean writeCgroupsCpuInt(NodeAgentContext context, UnixPath unixPath, int value) { + int currentValue = readCgroupsCpuInt(unixPath).orElseThrow(); + if (currentValue == value) return false; + + context.recordSystemModification(logger, "Updating " + unixPath + " from " + currentValue + " to " + value); + unixPath.writeUtf8File(Integer.toString(value)); + return true; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java new file mode 100644 index 00000000000..6b2d98a682a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java @@ -0,0 +1,133 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.collections.Pair; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; + +import java.io.IOException; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalInt; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * Read and write interface to the CGroup V1 of a Podman container. + * + * @see <a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html">CGroups V2</a> + * @author freva + */ +public class CGroupV2 implements CGroup { + + private static final Logger logger = Logger.getLogger(CGroupV2.class.getName()); + private static final String MAX = "max"; + + private final FileSystem fileSystem; + + public CGroupV2(FileSystem fileSystem) { + this.fileSystem = fileSystem; + } + + @Override + public Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId) { + return cpuMaxPath(containerId).readUtf8FileIfExists() + .filter(s -> !s.startsWith(MAX)) + .map(s -> { + String[] parts = s.strip().split(" "); + return new Pair<>(Integer.parseInt(parts[0]), Integer.parseInt(parts[1])); + }); + } + + @Override + public OptionalInt cpuShares(ContainerId containerId) { + return cpuWeightPath(containerId).readUtf8FileIfExists() + .map(s -> OptionalInt.of(weightToShares(Integer.parseInt(s.strip())))) + .orElseGet(OptionalInt::empty); + } + + @Override + public boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs) { + String wanted = String.format("%s %d", cpuQuotaUs < 0 ? MAX : cpuQuotaUs, periodUs); + return writeCGroupsValue(context, cpuMaxPath(containerId), wanted); + } + + @Override + public boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares) { + return writeCGroupsValue(context, cpuWeightPath(containerId), Integer.toString(sharesToWeight(shares))); + } + + @Override + public Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException { + return Files.readAllLines(cgroupRoot(containerId).resolve("cpu.stat")).stream() + .map(line -> line.split("\\s+")) + .filter(parts -> parts.length == 2) + .flatMap(parts -> CpuStatField.fromV2Field(parts[0]).stream().map(field -> new Pair<>(field, field.parseValueV2(parts[1])))) + .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); + } + + @Override + public long memoryLimitInBytes(ContainerId containerId) throws IOException { + String limit = Files.readString(cgroupRoot(containerId).resolve("memory.max")).strip(); + return MAX.equals(limit) ? -1L : Long.parseLong(limit); + } + + @Override + public long memoryUsageInBytes(ContainerId containerId) throws IOException { + return parseLong(cgroupRoot(containerId).resolve("memory.current")); + } + + @Override + public long memoryCacheInBytes(ContainerId containerId) throws IOException { + return parseLong(cgroupRoot(containerId).resolve("memory.stat"), "file"); + } + + private Path cgroupRoot(ContainerId containerId) { + // crun path, runc path is without the 'container' directory + return fileSystem.getPath("/sys/fs/cgroup/machine.slice/libpod-" + containerId + ".scope/container"); + } + + private UnixPath cpuMaxPath(ContainerId containerId) { + return new UnixPath(cgroupRoot(containerId).resolve("cpu.max")); + } + + private UnixPath cpuWeightPath(ContainerId containerId) { + return new UnixPath(cgroupRoot(containerId).resolve("cpu.weight")); + } + + private static boolean writeCGroupsValue(NodeAgentContext context, UnixPath unixPath, String value) { + String currentValue = unixPath.readUtf8File().strip(); + if (currentValue.equals(value)) return false; + + context.recordSystemModification(logger, "Updating " + unixPath + " from " + currentValue + " to " + value); + unixPath.writeUtf8File(value); + return true; + } + + // Must be same as in crun: https://github.com/containers/crun/blob/72c6e60ade0e4716fe2d8353f0d97d72cc8d1510/src/libcrun/cgroup.c#L3061 + static int sharesToWeight(int shares) { return (int) (1 + ((shares - 2L) * 9999) / 262142); } + static int weightToShares(int weight) { return (int) (2 + ((weight - 1L) * 262142) / 9999); } + + static long parseLong(Path path) throws IOException { + return Long.parseLong(Files.readString(path).trim()); + } + + static long parseLong(Path path, String fieldName) throws IOException { + return parseLong(Files.readAllLines(path), fieldName); + } + + static long parseLong(List<String> lines, String fieldName) { + for (String line : lines) { + String[] fields = line.split("\\s+"); + if (fields.length != 2) + throw new IllegalArgumentException("Expected line on the format 'key value', got: '" + line + "'"); + + if (fieldName.equals(fields[0])) return Long.parseLong(fields[1]); + } + throw new IllegalArgumentException("No such field: " + fieldName); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Cgroup.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Cgroup.java deleted file mode 100644 index 1d87415b78e..00000000000 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Cgroup.java +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// -package com.yahoo.vespa.hosted.node.admin.container; - -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; -import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; - -import java.nio.file.FileSystem; -import java.nio.file.Path; -import java.util.OptionalInt; -import java.util.logging.Logger; - -/** - * Read and write interface to the v1 cgroup of a podman container. - * See <a href="https://man7.org/linux/man-pages/man7/cgroups.7.html">cgroups(7)</a> for background. - * - * @author hakon - */ -public class Cgroup { - - private static final Logger logger = Logger.getLogger(Cgroup.class.getName()); - - private final FileSystem fileSystem; - private final ContainerId containerId; - - public Cgroup(FileSystem fileSystem, ContainerId containerId) { - this.fileSystem = fileSystem; - this.containerId = containerId; - } - - public OptionalInt readCpuQuota() { - return readCgroupsCpuInt(cfsQuotaPath()); - } - - public OptionalInt readCpuPeriod() { - return readCgroupsCpuInt(cfsPeriodPath()); - } - - public OptionalInt readCpuShares() { - return readCgroupsCpuInt(sharesPath()); - } - - public boolean updateCpuQuota(NodeAgentContext context, int cpuQuotaUs) { - return writeCgroupsCpuInt(context, cfsQuotaPath(), cpuQuotaUs); - } - - public boolean updateCpuPeriod(NodeAgentContext context, int periodUs) { - return writeCgroupsCpuInt(context, cfsPeriodPath(), periodUs); - } - - public boolean updateCpuShares(NodeAgentContext context, int shares) { - return writeCgroupsCpuInt(context, sharesPath(), shares); - } - - /** Returns the path to the podman container's scope directory for the cpuacct controller. */ - public Path cpuacctPath() { - return fileSystem.getPath("/sys/fs/cgroup/cpuacct/machine.slice/libpod-" + containerId + ".scope"); - } - - /** Returns the path to the podman container's scope directory for the cpu controller. */ - public Path cpuPath() { - return fileSystem.getPath("/sys/fs/cgroup/cpu/machine.slice/libpod-" + containerId + ".scope"); - } - - /** Returns the path to the podman container's scope directory for the memory controller. */ - public Path memoryPath() { - return fileSystem.getPath("/sys/fs/cgroup/memory/machine.slice/libpod-" + containerId + ".scope"); - } - - private UnixPath cfsQuotaPath() { - return new UnixPath(cpuPath().resolve("cpu.cfs_quota_us")); - } - - private UnixPath cfsPeriodPath() { - return new UnixPath(cpuPath().resolve("cpu.cfs_period_us")); - } - - private UnixPath sharesPath() { - return new UnixPath(cpuPath().resolve("cpu.shares")); - } - - private OptionalInt readCgroupsCpuInt(UnixPath unixPath) { - return unixPath.readUtf8FileIfExists() - .map(s -> OptionalInt.of(Integer.parseInt(s.strip()))) - .orElseGet(OptionalInt::empty); - } - - private boolean writeCgroupsCpuInt(NodeAgentContext context, UnixPath unixPath, int value) { - int currentValue = readCgroupsCpuInt(unixPath).orElseThrow(); - if (currentValue == value) { - return false; - } - - context.recordSystemModification(logger, "Updating " + unixPath + " from " + currentValue + " to " + value); - unixPath.writeUtf8File(Integer.toString(value)); - return true; - } -} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java index fd38d38b381..af12a6201d3 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java @@ -33,11 +33,11 @@ public class ContainerOperations { private final ContainerImagePruner imagePruner; private final ContainerStatsCollector containerStatsCollector; - public ContainerOperations(ContainerEngine containerEngine, FileSystem fileSystem) { + public ContainerOperations(ContainerEngine containerEngine, CGroup cgroup, FileSystem fileSystem) { this.containerEngine = Objects.requireNonNull(containerEngine); this.imageDownloader = new ContainerImageDownloader(containerEngine); this.imagePruner = new ContainerImagePruner(containerEngine, Clock.systemUTC()); - this.containerStatsCollector = new ContainerStatsCollector(Objects.requireNonNull(fileSystem)); + this.containerStatsCollector = new ContainerStatsCollector(cgroup, fileSystem); } public void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources) { @@ -86,7 +86,7 @@ public class ContainerOperations { } /** - * Suspend node and return output. Suspending a node means the node should be taken temporarly offline, + * Suspend node and return output. Suspending a node means the node should be taken temporarily offline, * such that maintenance of the node can be done (upgrading, rebooting, etc). */ public String suspendNode(NodeAgentContext context) { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerResources.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerResources.java index a7f6430035d..1838d8a8ac0 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerResources.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerResources.java @@ -87,7 +87,12 @@ public class ContainerResources { /** Returns true iff the CPU component(s) of between <code>this</code> and <code>other</code> are equal */ public boolean equalsCpu(ContainerResources other) { - return Math.abs(other.cpus - cpus) < 0.0001 && cpuShares == other.cpuShares; + return Math.abs(other.cpus - cpus) < 0.0001 && + // When using CGroups V2, CPU shares (range [2, 262144]) is mapped to CPU weight (range [1, 10000]), + // because there are ~26.2 shares/weight, we must allow for small deviation in cpuShares + // when comparing ContainerResources created from NodeResources vs one created from reading the + // CGroups weight file + Math.abs(cpuShares - other.cpuShares) < 28; } @Override diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java index fae4fc72145..168f319febd 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java @@ -182,16 +182,16 @@ public class ContainerStats { public int getOnlineCpus() { return this.onlineCpus; } - /** Total CPU time (in ns) spent executing all the processes on this host */ + /** Total CPU time (in µs) spent executing all the processes on this host */ public long getSystemCpuUsage() { return this.systemCpuUsage; } - /** Total CPU time (in ns) spent running all the processes in this container */ + /** Total CPU time (in µs) spent running all the processes in this container */ public long getTotalUsage() { return totalUsage; } - /** Total CPU time (in ns) spent in kernel mode while executing processes in this container */ + /** Total CPU time (in µs) spent in kernel mode while executing processes in this container */ public long getUsageInKernelMode() { return usageInKernelMode; } - /** Total CPU time (in ns) processes in this container were throttled for */ + /** Total CPU time (in µs) processes in this container were throttled for */ public long getThrottledTime() { return throttledTime; } /** Number of periods with throttling enabled for this container */ diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java index e2176341bc0..67956892898 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java @@ -7,7 +7,6 @@ import java.nio.file.FileSystem; import java.nio.file.Files; import java.nio.file.NoSuchFileException; import java.nio.file.Path; -import java.time.Duration; import java.util.List; import java.util.Map; import java.util.Objects; @@ -22,18 +21,25 @@ import java.util.Optional; */ class ContainerStatsCollector { + private final CGroup cgroup; private final FileSystem fileSystem; + private final int onlineCpus; - public ContainerStatsCollector(FileSystem fileSystem) { + ContainerStatsCollector(CGroup cgroup, FileSystem fileSystem) { + this(cgroup, fileSystem, Runtime.getRuntime().availableProcessors()); + } + + ContainerStatsCollector(CGroup cgroup, FileSystem fileSystem, int onlineCpus) { + this.cgroup = Objects.requireNonNull(cgroup); this.fileSystem = Objects.requireNonNull(fileSystem); + this.onlineCpus = onlineCpus; } /** Collect statistics for given container ID and PID */ public Optional<ContainerStats> collect(ContainerId containerId, int pid, String iface) { - Cgroup cgroup = new Cgroup(fileSystem, containerId); try { - ContainerStats.CpuStats cpuStats = collectCpuStats(cgroup); - ContainerStats.MemoryStats memoryStats = collectMemoryStats(cgroup); + ContainerStats.CpuStats cpuStats = collectCpuStats(containerId); + ContainerStats.MemoryStats memoryStats = collectMemoryStats(containerId); Map<String, ContainerStats.NetworkStats> networkStats = Map.of(iface, collectNetworkStats(iface, pid)); return Optional.of(new ContainerStats(networkStats, memoryStats, cpuStats)); } catch (NoSuchFileException ignored) { @@ -43,24 +49,21 @@ class ContainerStatsCollector { } } - private ContainerStats.CpuStats collectCpuStats(Cgroup cgroup) throws IOException { - List<String> cpuStatLines = Files.readAllLines(cpuStatPath(cgroup)); - long throttledActivePeriods = parseLong(cpuStatLines, "nr_periods"); - long throttledPeriods = parseLong(cpuStatLines, "nr_throttled"); - long throttledTime = parseLong(cpuStatLines, "throttled_time"); - return new ContainerStats.CpuStats(cpuCount(cgroup), - systemCpuUsage().toNanos(), - containerCpuUsage(cgroup).toNanos(), - containerCpuUsageSystem(cgroup).toNanos(), - throttledTime, - throttledActivePeriods, - throttledPeriods); + private ContainerStats.CpuStats collectCpuStats(ContainerId containerId) throws IOException { + Map<CGroup.CpuStatField, Long> cpuStats = cgroup.cpuStats(containerId); + return new ContainerStats.CpuStats(onlineCpus, + systemCpuUsage(), + cpuStats.get(CGroup.CpuStatField.TOTAL_USAGE_USEC), + cpuStats.get(CGroup.CpuStatField.SYSTEM_USAGE_USEC), + cpuStats.get(CGroup.CpuStatField.THROTTLED_TIME_USEC), + cpuStats.get(CGroup.CpuStatField.TOTAL_PERIODS), + cpuStats.get(CGroup.CpuStatField.THROTTLED_PERIODS)); } - private ContainerStats.MemoryStats collectMemoryStats(Cgroup cgroup) throws IOException { - long memoryLimitInBytes = parseLong(memoryLimitPath(cgroup)); - long memoryUsageInBytes = parseLong(memoryUsagePath(cgroup)); - long cachedInBytes = parseLong(memoryStatPath(cgroup), "cache"); + private ContainerStats.MemoryStats collectMemoryStats(ContainerId containerId) throws IOException { + long memoryLimitInBytes = cgroup.memoryLimitInBytes(containerId); + long memoryUsageInBytes = cgroup.memoryUsageInBytes(containerId); + long cachedInBytes = cgroup.memoryCacheInBytes(containerId); return new ContainerStats.MemoryStats(cachedInBytes, memoryUsageInBytes, memoryLimitInBytes); } @@ -82,34 +85,10 @@ class ContainerStatsCollector { throw new IllegalArgumentException("No statistics found for interface " + iface); } - /** Number of CPUs seen by given container */ - private int cpuCount(Cgroup cgroup) throws IOException { - return fields(Files.readString(perCpuUsagePath(cgroup))).length; - } - - /** Returns total CPU time spent executing all the processes on this host */ - private Duration systemCpuUsage() throws IOException { - long ticks = parseLong(fileSystem.getPath("/proc/stat"), "cpu"); - return ticksToDuration(ticks); - } - - /** Returns total CPU time spent running all processes inside given container */ - private Duration containerCpuUsage(Cgroup cgroup) throws IOException { - return Duration.ofNanos(parseLong(cpuUsagePath(cgroup))); - } - - /** Returns total CPU time spent in kernel/system mode while executing processes inside given container */ - private Duration containerCpuUsageSystem(Cgroup cgroup) throws IOException { - long ticks = parseLong(cpuacctStatPath(cgroup), "system"); - return ticksToDuration(ticks); - } - - private long parseLong(Path path) throws IOException { - return Long.parseLong(Files.readString(path).trim()); - } - - private long parseLong(Path path, String fieldName) throws IOException { - return parseLong(Files.readAllLines(path), fieldName); + /** Returns total CPU time in µs spent executing all the processes on this host */ + private long systemCpuUsage() throws IOException { + long ticks = parseLong(Files.readAllLines(fileSystem.getPath("/proc/stat")), "cpu"); + return userHzToMicroSeconds(ticks); } private long parseLong(List<String> lines, String fieldName) { @@ -129,38 +108,9 @@ class ContainerStatsCollector { return fileSystem.getPath("/proc/" + containerPid + "/net/dev"); } - private Path cpuacctStatPath(Cgroup cgroup) { - return cgroup.cpuacctPath().resolve("cpuacct.stat"); - } - - private Path cpuUsagePath(Cgroup cgroup) { - return cgroup.cpuacctPath().resolve("cpuacct.usage"); - } - - private Path perCpuUsagePath(Cgroup cgroup) { - return cgroup.cpuacctPath().resolve("cpuacct.usage_percpu"); - } - - private Path cpuStatPath(Cgroup cgroup) { - return cgroup.cpuacctPath().resolve("cpu.stat"); - } - - private Path memoryStatPath(Cgroup cgroup) { - return cgroup.memoryPath().resolve("memory.stat"); - } - - private Path memoryUsagePath(Cgroup cgroup) { - return cgroup.memoryPath().resolve("memory.usage_in_bytes"); - } - - private Path memoryLimitPath(Cgroup cgroup) { - return cgroup.memoryPath().resolve("memory.limit_in_bytes"); - } - - private static Duration ticksToDuration(long ticks) { + static long userHzToMicroSeconds(long ticks) { // Ideally we would read this from _SC_CLK_TCK, but then we need JNI. However, in practice this is always 100 on x86 Linux - long ticksPerSecond = 100; - return Duration.ofNanos((ticks * Duration.ofSeconds(1).toNanos()) / ticksPerSecond); + return ticks * 10_000; } private static String[] fields(String s) { |