diff options
author | Håkon Hallingstad <hakon@yahooinc.com> | 2023-04-21 13:40:10 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@yahooinc.com> | 2023-04-21 13:40:10 +0200 |
commit | 24d2edf2efd509e1755da6cee746e8085b9df4e2 (patch) | |
tree | 62cf6f086719723857192f869d8829f6c95adb8a /node-admin/src/main | |
parent | 16665469cad3ff8c9415168bbecd27ae297da0dc (diff) |
Remove CGroupV1
Diffstat (limited to 'node-admin/src/main')
6 files changed, 62 insertions, 240 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java deleted file mode 100644 index b98ad7a11bc..00000000000 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.container; - -import com.yahoo.collections.Pair; -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalInt; - -import static com.yahoo.vespa.hosted.node.admin.container.ContainerStatsCollector.userHzToMicroSeconds; - -/** - * Read and write interface to the CGroup of a podman container. - * - * @author freva - */ -public interface CGroup { - - /** - * Returns quota and period values used for CPU scheduling. This serves as hard cap on CPU usage by allowing - * the CGroup to use up to {@code quota} each {@code period}. If uncapped, quota will be negative. - * - * @param containerId full container ID. - * @return CPU quota and period for the given container. Empty if CGroup for this container is not found. - */ - Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId); - - /** @return number of shares allocated to this CGroup for purposes of CPU time scheduling, empty if CGroup not found */ - OptionalInt cpuShares(ContainerId containerId); - - /** Update CPU quota and period for the given container ID, set quota to -1 value for unlimited */ - boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs); - - boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares); - - Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException; - - /** @return Maximum amount of memory that can be used by the cgroup and its descendants. */ - long memoryLimitInBytes(ContainerId containerId) throws IOException; - - /** @return The total amount of memory currently being used by the cgroup and its descendants. */ - long memoryUsageInBytes(ContainerId containerId) throws IOException; - - /** @return Number of bytes used to cache filesystem data, including tmpfs and shared memory. */ - long memoryCacheInBytes(ContainerId containerId) throws IOException; - - enum CpuStatField { - TOTAL_USAGE_USEC(null/* in a dedicated file */, "usage_usec"), - USER_USAGE_USEC("user", "user_usec"), - SYSTEM_USAGE_USEC("system", "system_usec"), - TOTAL_PERIODS("nr_periods", "nr_periods"), - THROTTLED_PERIODS("nr_throttled", "nr_throttled"), - THROTTLED_TIME_USEC("throttled_time", "throttled_usec"); - - private final String v1Name; - private final String v2Name; - CpuStatField(String v1Name, String v2Name) { - this.v1Name = v1Name; - this.v2Name = v2Name; - } - - long parseValueV1(String value) { - long longValue = Long.parseLong(value); - return switch (this) { - case THROTTLED_TIME_USEC, TOTAL_USAGE_USEC -> longValue / 1000; // Value in ns - case USER_USAGE_USEC, SYSTEM_USAGE_USEC -> userHzToMicroSeconds(longValue); - default -> longValue; - }; - } - - long parseValueV2(String value) { - return Long.parseLong(value); - } - - static Optional<CpuStatField> fromV1Field(String name) { - return Arrays.stream(values()) - .filter(field -> name.equals(field.v1Name)) - .findFirst(); - } - - static Optional<CpuStatField> fromV2Field(String name) { - return Arrays.stream(values()) - .filter(field -> name.equals(field.v2Name)) - .findFirst(); - } - } -} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java deleted file mode 100644 index 7607858ec85..00000000000 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.container; - -import com.yahoo.collections.Pair; -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; -import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; - -import java.io.IOException; -import java.nio.file.FileSystem; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalInt; -import java.util.logging.Logger; -import java.util.stream.Stream; - -import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.parseLong; - -/** - * Read and write interface to the CGroup V1 of a Podman container. - * - * @see <a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/index.html">CGroups V1</a> - * @author freva - */ -public class CGroupV1 implements CGroup { - - private static final Logger logger = Logger.getLogger(CGroupV1.class.getName()); - - private final FileSystem fileSystem; - - public CGroupV1(FileSystem fileSystem) { - this.fileSystem = fileSystem; - } - - @Override - public Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId) { - OptionalInt quota = readCgroupsCpuInt(cfsQuotaPath(containerId)); - if (quota.isEmpty()) return Optional.empty(); - OptionalInt period = readCgroupsCpuInt(cfsPeriodPath(containerId)); - if (period.isEmpty()) return Optional.empty(); - return Optional.of(new Pair<>(quota.getAsInt(), period.getAsInt())); - } - - @Override - public OptionalInt cpuShares(ContainerId containerId) { - return readCgroupsCpuInt(sharesPath(containerId)); - } - - @Override - public boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs) { - return writeCgroupsCpuInt(context, cfsQuotaPath(containerId), cpuQuotaUs) | - writeCgroupsCpuInt(context, cfsPeriodPath(containerId), periodUs); - } - - @Override - public boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares) { - return writeCgroupsCpuInt(context, sharesPath(containerId), shares); - } - - @Override - public Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException { - Map<CpuStatField, Long> stats = new HashMap<>(); - stats.put(CpuStatField.TOTAL_USAGE_USEC, parseLong(cpuacctPath(containerId).resolve("cpuacct.usage")) / 1000); - Stream.concat(Files.readAllLines(cpuacctPath(containerId).resolve("cpuacct.stat")).stream(), - Files.readAllLines(cpuacctPath(containerId).resolve("cpu.stat")).stream()) - .forEach(line -> { - String[] parts = line.split("\\s+"); - if (parts.length != 2) return; - CpuStatField.fromV1Field(parts[0]).ifPresent(field -> stats.put(field, field.parseValueV1(parts[1]))); - }); - return stats; - } - - @Override - public long memoryLimitInBytes(ContainerId containerId) throws IOException { - return parseLong(memoryPath(containerId).resolve("memory.limit_in_bytes")); - } - - @Override - public long memoryUsageInBytes(ContainerId containerId) throws IOException { - return parseLong(memoryPath(containerId).resolve("memory.usage_in_bytes")); - } - - @Override - public long memoryCacheInBytes(ContainerId containerId) throws IOException { - return parseLong(memoryPath(containerId).resolve("memory.stat"), "cache"); - } - - private Path cpuacctPath(ContainerId containerId) { - return fileSystem.getPath("/sys/fs/cgroup/cpuacct/machine.slice/libpod-" + containerId + ".scope"); - } - - private Path cpuPath(ContainerId containerId) { - return fileSystem.getPath("/sys/fs/cgroup/cpu/machine.slice/libpod-" + containerId + ".scope"); - } - - private Path memoryPath(ContainerId containerId) { - return fileSystem.getPath("/sys/fs/cgroup/memory/machine.slice/libpod-" + containerId + ".scope"); - } - - private UnixPath cfsQuotaPath(ContainerId containerId) { - return new UnixPath(cpuPath(containerId).resolve("cpu.cfs_quota_us")); - } - - private UnixPath cfsPeriodPath(ContainerId containerId) { - return new UnixPath(cpuPath(containerId).resolve("cpu.cfs_period_us")); - } - - private UnixPath sharesPath(ContainerId containerId) { - return new UnixPath(cpuPath(containerId).resolve("cpu.shares")); - } - - private static OptionalInt readCgroupsCpuInt(UnixPath unixPath) { - return unixPath.readUtf8FileIfExists() - .map(s -> OptionalInt.of(Integer.parseInt(s.strip()))) - .orElseGet(OptionalInt::empty); - } - - private static boolean writeCgroupsCpuInt(NodeAgentContext context, UnixPath unixPath, int value) { - int currentValue = readCgroupsCpuInt(unixPath).orElseThrow(); - if (currentValue == value) return false; - - context.recordSystemModification(logger, "Updating " + unixPath + " from " + currentValue + " to " + value); - unixPath.writeUtf8File(Integer.toString(value)); - return true; - } -} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java index 0c86829b96d..fef7f4bbd4c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.nio.file.FileSystem; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Optional; @@ -17,15 +18,16 @@ import java.util.logging.Logger; import java.util.stream.Collectors; /** - * Read and write interface to the CGroup V2 of a Podman container. + * Read and write interface to the cgroup v2 of a Podman container. * * @see <a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html">CGroups V2</a> * @author freva */ -public class CGroupV2 implements CGroup { +public class CGroupV2 { private static final Logger logger = Logger.getLogger(CGroupV2.class.getName()); private static final String MAX = "max"; + public static final String VESPA_CGEXEC_PATH = "/opt/vespa/bin/vespa-cgexec"; private final FileSystem fileSystem; @@ -33,7 +35,13 @@ public class CGroupV2 implements CGroup { this.fileSystem = fileSystem; } - @Override + /** + * Returns quota and period values used for CPU scheduling. This serves as hard cap on CPU usage by allowing + * the CGroupV2 to use up to {@code quota} each {@code period}. If uncapped, quota will be negative. + * + * @param containerId full container ID. + * @return CPU quota and period for the given container. Empty if CGroupV2 for this container is not found. + */ public Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId) { return cpuMaxPath(containerId).readUtf8FileIfExists() .map(s -> { @@ -42,45 +50,68 @@ public class CGroupV2 implements CGroup { }); } - @Override + /** @return number of shares allocated to this CGroupV2 for purposes of CPU time scheduling, empty if CGroupV2 not found */ public OptionalInt cpuShares(ContainerId containerId) { return cpuWeightPath(containerId).readUtf8FileIfExists() .map(s -> OptionalInt.of(weightToShares(Integer.parseInt(s.strip())))) .orElseGet(OptionalInt::empty); } - @Override + /** Update CPU quota and period for the given container ID, set quota to -1 value for unlimited */ public boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs) { String wanted = String.format("%s %d", cpuQuotaUs < 0 ? MAX : cpuQuotaUs, periodUs); return writeCGroupsValue(context, cpuMaxPath(containerId), wanted); } - @Override public boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares) { return writeCGroupsValue(context, cpuWeightPath(containerId), Integer.toString(sharesToWeight(shares))); } - @Override + enum CpuStatField { + TOTAL_USAGE_USEC("usage_usec"), + USER_USAGE_USEC("user_usec"), + SYSTEM_USAGE_USEC("system_usec"), + TOTAL_PERIODS("nr_periods"), + THROTTLED_PERIODS("nr_throttled"), + THROTTLED_TIME_USEC("throttled_usec"); + + private final String name; + + CpuStatField(String name) { + this.name = name; + } + + long parseValue(String value) { + return Long.parseLong(value); + } + + static Optional<CpuStatField> fromField(String fieldName) { + return Arrays.stream(values()) + .filter(field -> fieldName.equals(field.name)) + .findFirst(); + } + } + public Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException { return Files.readAllLines(cgroupRoot(containerId).resolve("cpu.stat")).stream() .map(line -> line.split("\\s+")) .filter(parts -> parts.length == 2) - .flatMap(parts -> CpuStatField.fromV2Field(parts[0]).stream().map(field -> new Pair<>(field, field.parseValueV2(parts[1])))) + .flatMap(parts -> CpuStatField.fromField(parts[0]).stream().map(field -> new Pair<>(field, field.parseValue(parts[1])))) .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); } - @Override + /** @return Maximum amount of memory that can be used by the cgroup and its descendants. */ public long memoryLimitInBytes(ContainerId containerId) throws IOException { String limit = Files.readString(cgroupRoot(containerId).resolve("memory.max")).strip(); return MAX.equals(limit) ? -1L : Long.parseLong(limit); } - @Override + /** @return The total amount of memory currently being used by the cgroup and its descendants. */ public long memoryUsageInBytes(ContainerId containerId) throws IOException { return parseLong(cgroupRoot(containerId).resolve("memory.current")); } - @Override + /** @return Number of bytes used to cache filesystem data, including tmpfs and shared memory. */ public long memoryCacheInBytes(ContainerId containerId) throws IOException { return parseLong(cgroupRoot(containerId).resolve("memory.stat"), "file"); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java index fb789874acf..e76a46b1c3b 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java @@ -51,6 +51,16 @@ public class Container extends PartialContainer { } @Override + public String toString() { + return "Container{" + + "hostname='" + hostname + '\'' + + ", resources=" + resources + + ", conmonPid=" + conmonPid + + ", networks=" + networks + + '}'; + } + + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java index f131aca2db0..ce2a6bb22ac 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java @@ -33,7 +33,7 @@ public class ContainerOperations { private final ContainerImagePruner imagePruner; private final ContainerStatsCollector containerStatsCollector; - public ContainerOperations(ContainerEngine containerEngine, CGroup cgroup, FileSystem fileSystem) { + public ContainerOperations(ContainerEngine containerEngine, CGroupV2 cgroup, FileSystem fileSystem) { this.containerEngine = Objects.requireNonNull(containerEngine); this.imageDownloader = new ContainerImageDownloader(containerEngine); this.imagePruner = new ContainerImagePruner(containerEngine, Clock.systemUTC()); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java index c17f98b9c9d..870809123a9 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java @@ -27,15 +27,15 @@ import java.util.stream.Stream; class ContainerStatsCollector { private final ContainerEngine containerEngine; - private final CGroup cgroup; + private final CGroupV2 cgroup; private final FileSystem fileSystem; private final int onlineCpus; - ContainerStatsCollector(ContainerEngine containerEngine, CGroup cgroup, FileSystem fileSystem) { + ContainerStatsCollector(ContainerEngine containerEngine, CGroupV2 cgroup, FileSystem fileSystem) { this(containerEngine, cgroup, fileSystem, Runtime.getRuntime().availableProcessors()); } - ContainerStatsCollector(ContainerEngine containerEngine, CGroup cgroup, FileSystem fileSystem, int onlineCpus) { + ContainerStatsCollector(ContainerEngine containerEngine, CGroupV2 cgroup, FileSystem fileSystem, int onlineCpus) { this.containerEngine = Objects.requireNonNull(containerEngine); this.cgroup = Objects.requireNonNull(cgroup); this.fileSystem = Objects.requireNonNull(fileSystem); @@ -83,14 +83,14 @@ class ContainerStatsCollector { } private ContainerStats.CpuStats collectCpuStats(ContainerId containerId) throws IOException { - Map<CGroup.CpuStatField, Long> cpuStats = cgroup.cpuStats(containerId); + Map<CGroupV2.CpuStatField, Long> cpuStats = cgroup.cpuStats(containerId); return new ContainerStats.CpuStats(onlineCpus, systemCpuUsage(), - cpuStats.get(CGroup.CpuStatField.TOTAL_USAGE_USEC), - cpuStats.get(CGroup.CpuStatField.SYSTEM_USAGE_USEC), - cpuStats.get(CGroup.CpuStatField.THROTTLED_TIME_USEC), - cpuStats.get(CGroup.CpuStatField.TOTAL_PERIODS), - cpuStats.get(CGroup.CpuStatField.THROTTLED_PERIODS)); + cpuStats.get(CGroupV2.CpuStatField.TOTAL_USAGE_USEC), + cpuStats.get(CGroupV2.CpuStatField.SYSTEM_USAGE_USEC), + cpuStats.get(CGroupV2.CpuStatField.THROTTLED_TIME_USEC), + cpuStats.get(CGroupV2.CpuStatField.TOTAL_PERIODS), + cpuStats.get(CGroupV2.CpuStatField.THROTTLED_PERIODS)); } private ContainerStats.MemoryStats collectMemoryStats(ContainerId containerId) throws IOException { |