aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin/src/main
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@yahooinc.com>2023-04-21 13:40:10 +0200
committerHåkon Hallingstad <hakon@yahooinc.com>2023-04-21 13:40:10 +0200
commit24d2edf2efd509e1755da6cee746e8085b9df4e2 (patch)
tree62cf6f086719723857192f869d8829f6c95adb8a /node-admin/src/main
parent16665469cad3ff8c9415168bbecd27ae297da0dc (diff)
Remove CGroupV1
Diffstat (limited to 'node-admin/src/main')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java90
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java129
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java53
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java10
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java18
6 files changed, 62 insertions, 240 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java
deleted file mode 100644
index b98ad7a11bc..00000000000
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.node.admin.container;
-
-import com.yahoo.collections.Pair;
-import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Map;
-import java.util.Optional;
-import java.util.OptionalInt;
-
-import static com.yahoo.vespa.hosted.node.admin.container.ContainerStatsCollector.userHzToMicroSeconds;
-
-/**
- * Read and write interface to the CGroup of a podman container.
- *
- * @author freva
- */
-public interface CGroup {
-
- /**
- * Returns quota and period values used for CPU scheduling. This serves as hard cap on CPU usage by allowing
- * the CGroup to use up to {@code quota} each {@code period}. If uncapped, quota will be negative.
- *
- * @param containerId full container ID.
- * @return CPU quota and period for the given container. Empty if CGroup for this container is not found.
- */
- Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId);
-
- /** @return number of shares allocated to this CGroup for purposes of CPU time scheduling, empty if CGroup not found */
- OptionalInt cpuShares(ContainerId containerId);
-
- /** Update CPU quota and period for the given container ID, set quota to -1 value for unlimited */
- boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs);
-
- boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares);
-
- Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException;
-
- /** @return Maximum amount of memory that can be used by the cgroup and its descendants. */
- long memoryLimitInBytes(ContainerId containerId) throws IOException;
-
- /** @return The total amount of memory currently being used by the cgroup and its descendants. */
- long memoryUsageInBytes(ContainerId containerId) throws IOException;
-
- /** @return Number of bytes used to cache filesystem data, including tmpfs and shared memory. */
- long memoryCacheInBytes(ContainerId containerId) throws IOException;
-
- enum CpuStatField {
- TOTAL_USAGE_USEC(null/* in a dedicated file */, "usage_usec"),
- USER_USAGE_USEC("user", "user_usec"),
- SYSTEM_USAGE_USEC("system", "system_usec"),
- TOTAL_PERIODS("nr_periods", "nr_periods"),
- THROTTLED_PERIODS("nr_throttled", "nr_throttled"),
- THROTTLED_TIME_USEC("throttled_time", "throttled_usec");
-
- private final String v1Name;
- private final String v2Name;
- CpuStatField(String v1Name, String v2Name) {
- this.v1Name = v1Name;
- this.v2Name = v2Name;
- }
-
- long parseValueV1(String value) {
- long longValue = Long.parseLong(value);
- return switch (this) {
- case THROTTLED_TIME_USEC, TOTAL_USAGE_USEC -> longValue / 1000; // Value in ns
- case USER_USAGE_USEC, SYSTEM_USAGE_USEC -> userHzToMicroSeconds(longValue);
- default -> longValue;
- };
- }
-
- long parseValueV2(String value) {
- return Long.parseLong(value);
- }
-
- static Optional<CpuStatField> fromV1Field(String name) {
- return Arrays.stream(values())
- .filter(field -> name.equals(field.v1Name))
- .findFirst();
- }
-
- static Optional<CpuStatField> fromV2Field(String name) {
- return Arrays.stream(values())
- .filter(field -> name.equals(field.v2Name))
- .findFirst();
- }
- }
-}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java
deleted file mode 100644
index 7607858ec85..00000000000
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.node.admin.container;
-
-import com.yahoo.collections.Pair;
-import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
-import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath;
-
-import java.io.IOException;
-import java.nio.file.FileSystem;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Optional;
-import java.util.OptionalInt;
-import java.util.logging.Logger;
-import java.util.stream.Stream;
-
-import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.parseLong;
-
-/**
- * Read and write interface to the CGroup V1 of a Podman container.
- *
- * @see <a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/index.html">CGroups V1</a>
- * @author freva
- */
-public class CGroupV1 implements CGroup {
-
- private static final Logger logger = Logger.getLogger(CGroupV1.class.getName());
-
- private final FileSystem fileSystem;
-
- public CGroupV1(FileSystem fileSystem) {
- this.fileSystem = fileSystem;
- }
-
- @Override
- public Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId) {
- OptionalInt quota = readCgroupsCpuInt(cfsQuotaPath(containerId));
- if (quota.isEmpty()) return Optional.empty();
- OptionalInt period = readCgroupsCpuInt(cfsPeriodPath(containerId));
- if (period.isEmpty()) return Optional.empty();
- return Optional.of(new Pair<>(quota.getAsInt(), period.getAsInt()));
- }
-
- @Override
- public OptionalInt cpuShares(ContainerId containerId) {
- return readCgroupsCpuInt(sharesPath(containerId));
- }
-
- @Override
- public boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs) {
- return writeCgroupsCpuInt(context, cfsQuotaPath(containerId), cpuQuotaUs) |
- writeCgroupsCpuInt(context, cfsPeriodPath(containerId), periodUs);
- }
-
- @Override
- public boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares) {
- return writeCgroupsCpuInt(context, sharesPath(containerId), shares);
- }
-
- @Override
- public Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException {
- Map<CpuStatField, Long> stats = new HashMap<>();
- stats.put(CpuStatField.TOTAL_USAGE_USEC, parseLong(cpuacctPath(containerId).resolve("cpuacct.usage")) / 1000);
- Stream.concat(Files.readAllLines(cpuacctPath(containerId).resolve("cpuacct.stat")).stream(),
- Files.readAllLines(cpuacctPath(containerId).resolve("cpu.stat")).stream())
- .forEach(line -> {
- String[] parts = line.split("\\s+");
- if (parts.length != 2) return;
- CpuStatField.fromV1Field(parts[0]).ifPresent(field -> stats.put(field, field.parseValueV1(parts[1])));
- });
- return stats;
- }
-
- @Override
- public long memoryLimitInBytes(ContainerId containerId) throws IOException {
- return parseLong(memoryPath(containerId).resolve("memory.limit_in_bytes"));
- }
-
- @Override
- public long memoryUsageInBytes(ContainerId containerId) throws IOException {
- return parseLong(memoryPath(containerId).resolve("memory.usage_in_bytes"));
- }
-
- @Override
- public long memoryCacheInBytes(ContainerId containerId) throws IOException {
- return parseLong(memoryPath(containerId).resolve("memory.stat"), "cache");
- }
-
- private Path cpuacctPath(ContainerId containerId) {
- return fileSystem.getPath("/sys/fs/cgroup/cpuacct/machine.slice/libpod-" + containerId + ".scope");
- }
-
- private Path cpuPath(ContainerId containerId) {
- return fileSystem.getPath("/sys/fs/cgroup/cpu/machine.slice/libpod-" + containerId + ".scope");
- }
-
- private Path memoryPath(ContainerId containerId) {
- return fileSystem.getPath("/sys/fs/cgroup/memory/machine.slice/libpod-" + containerId + ".scope");
- }
-
- private UnixPath cfsQuotaPath(ContainerId containerId) {
- return new UnixPath(cpuPath(containerId).resolve("cpu.cfs_quota_us"));
- }
-
- private UnixPath cfsPeriodPath(ContainerId containerId) {
- return new UnixPath(cpuPath(containerId).resolve("cpu.cfs_period_us"));
- }
-
- private UnixPath sharesPath(ContainerId containerId) {
- return new UnixPath(cpuPath(containerId).resolve("cpu.shares"));
- }
-
- private static OptionalInt readCgroupsCpuInt(UnixPath unixPath) {
- return unixPath.readUtf8FileIfExists()
- .map(s -> OptionalInt.of(Integer.parseInt(s.strip())))
- .orElseGet(OptionalInt::empty);
- }
-
- private static boolean writeCgroupsCpuInt(NodeAgentContext context, UnixPath unixPath, int value) {
- int currentValue = readCgroupsCpuInt(unixPath).orElseThrow();
- if (currentValue == value) return false;
-
- context.recordSystemModification(logger, "Updating " + unixPath + " from " + currentValue + " to " + value);
- unixPath.writeUtf8File(Integer.toString(value));
- return true;
- }
-}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java
index 0c86829b96d..fef7f4bbd4c 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java
@@ -9,6 +9,7 @@ import java.io.IOException;
import java.nio.file.FileSystem;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -17,15 +18,16 @@ import java.util.logging.Logger;
import java.util.stream.Collectors;
/**
- * Read and write interface to the CGroup V2 of a Podman container.
+ * Read and write interface to the cgroup v2 of a Podman container.
*
* @see <a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html">CGroups V2</a>
* @author freva
*/
-public class CGroupV2 implements CGroup {
+public class CGroupV2 {
private static final Logger logger = Logger.getLogger(CGroupV2.class.getName());
private static final String MAX = "max";
+ public static final String VESPA_CGEXEC_PATH = "/opt/vespa/bin/vespa-cgexec";
private final FileSystem fileSystem;
@@ -33,7 +35,13 @@ public class CGroupV2 implements CGroup {
this.fileSystem = fileSystem;
}
- @Override
+ /**
+ * Returns quota and period values used for CPU scheduling. This serves as hard cap on CPU usage by allowing
+ * the CGroupV2 to use up to {@code quota} each {@code period}. If uncapped, quota will be negative.
+ *
+ * @param containerId full container ID.
+ * @return CPU quota and period for the given container. Empty if CGroupV2 for this container is not found.
+ */
public Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId) {
return cpuMaxPath(containerId).readUtf8FileIfExists()
.map(s -> {
@@ -42,45 +50,68 @@ public class CGroupV2 implements CGroup {
});
}
- @Override
+ /** @return number of shares allocated to this CGroupV2 for purposes of CPU time scheduling, empty if CGroupV2 not found */
public OptionalInt cpuShares(ContainerId containerId) {
return cpuWeightPath(containerId).readUtf8FileIfExists()
.map(s -> OptionalInt.of(weightToShares(Integer.parseInt(s.strip()))))
.orElseGet(OptionalInt::empty);
}
- @Override
+ /** Update CPU quota and period for the given container ID, set quota to -1 value for unlimited */
public boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs) {
String wanted = String.format("%s %d", cpuQuotaUs < 0 ? MAX : cpuQuotaUs, periodUs);
return writeCGroupsValue(context, cpuMaxPath(containerId), wanted);
}
- @Override
public boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares) {
return writeCGroupsValue(context, cpuWeightPath(containerId), Integer.toString(sharesToWeight(shares)));
}
- @Override
+ enum CpuStatField {
+ TOTAL_USAGE_USEC("usage_usec"),
+ USER_USAGE_USEC("user_usec"),
+ SYSTEM_USAGE_USEC("system_usec"),
+ TOTAL_PERIODS("nr_periods"),
+ THROTTLED_PERIODS("nr_throttled"),
+ THROTTLED_TIME_USEC("throttled_usec");
+
+ private final String name;
+
+ CpuStatField(String name) {
+ this.name = name;
+ }
+
+ long parseValue(String value) {
+ return Long.parseLong(value);
+ }
+
+ static Optional<CpuStatField> fromField(String fieldName) {
+ return Arrays.stream(values())
+ .filter(field -> fieldName.equals(field.name))
+ .findFirst();
+ }
+ }
+
public Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException {
return Files.readAllLines(cgroupRoot(containerId).resolve("cpu.stat")).stream()
.map(line -> line.split("\\s+"))
.filter(parts -> parts.length == 2)
- .flatMap(parts -> CpuStatField.fromV2Field(parts[0]).stream().map(field -> new Pair<>(field, field.parseValueV2(parts[1]))))
+ .flatMap(parts -> CpuStatField.fromField(parts[0]).stream().map(field -> new Pair<>(field, field.parseValue(parts[1]))))
.collect(Collectors.toMap(Pair::getFirst, Pair::getSecond));
}
- @Override
+ /** @return Maximum amount of memory that can be used by the cgroup and its descendants. */
public long memoryLimitInBytes(ContainerId containerId) throws IOException {
String limit = Files.readString(cgroupRoot(containerId).resolve("memory.max")).strip();
return MAX.equals(limit) ? -1L : Long.parseLong(limit);
}
- @Override
+ /** @return The total amount of memory currently being used by the cgroup and its descendants. */
public long memoryUsageInBytes(ContainerId containerId) throws IOException {
return parseLong(cgroupRoot(containerId).resolve("memory.current"));
}
- @Override
+ /** @return Number of bytes used to cache filesystem data, including tmpfs and shared memory. */
public long memoryCacheInBytes(ContainerId containerId) throws IOException {
return parseLong(cgroupRoot(containerId).resolve("memory.stat"), "file");
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java
index fb789874acf..e76a46b1c3b 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java
@@ -51,6 +51,16 @@ public class Container extends PartialContainer {
}
@Override
+ public String toString() {
+ return "Container{" +
+ "hostname='" + hostname + '\'' +
+ ", resources=" + resources +
+ ", conmonPid=" + conmonPid +
+ ", networks=" + networks +
+ '}';
+ }
+
+ @Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java
index f131aca2db0..ce2a6bb22ac 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java
@@ -33,7 +33,7 @@ public class ContainerOperations {
private final ContainerImagePruner imagePruner;
private final ContainerStatsCollector containerStatsCollector;
- public ContainerOperations(ContainerEngine containerEngine, CGroup cgroup, FileSystem fileSystem) {
+ public ContainerOperations(ContainerEngine containerEngine, CGroupV2 cgroup, FileSystem fileSystem) {
this.containerEngine = Objects.requireNonNull(containerEngine);
this.imageDownloader = new ContainerImageDownloader(containerEngine);
this.imagePruner = new ContainerImagePruner(containerEngine, Clock.systemUTC());
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java
index c17f98b9c9d..870809123a9 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java
@@ -27,15 +27,15 @@ import java.util.stream.Stream;
class ContainerStatsCollector {
private final ContainerEngine containerEngine;
- private final CGroup cgroup;
+ private final CGroupV2 cgroup;
private final FileSystem fileSystem;
private final int onlineCpus;
- ContainerStatsCollector(ContainerEngine containerEngine, CGroup cgroup, FileSystem fileSystem) {
+ ContainerStatsCollector(ContainerEngine containerEngine, CGroupV2 cgroup, FileSystem fileSystem) {
this(containerEngine, cgroup, fileSystem, Runtime.getRuntime().availableProcessors());
}
- ContainerStatsCollector(ContainerEngine containerEngine, CGroup cgroup, FileSystem fileSystem, int onlineCpus) {
+ ContainerStatsCollector(ContainerEngine containerEngine, CGroupV2 cgroup, FileSystem fileSystem, int onlineCpus) {
this.containerEngine = Objects.requireNonNull(containerEngine);
this.cgroup = Objects.requireNonNull(cgroup);
this.fileSystem = Objects.requireNonNull(fileSystem);
@@ -83,14 +83,14 @@ class ContainerStatsCollector {
}
private ContainerStats.CpuStats collectCpuStats(ContainerId containerId) throws IOException {
- Map<CGroup.CpuStatField, Long> cpuStats = cgroup.cpuStats(containerId);
+ Map<CGroupV2.CpuStatField, Long> cpuStats = cgroup.cpuStats(containerId);
return new ContainerStats.CpuStats(onlineCpus,
systemCpuUsage(),
- cpuStats.get(CGroup.CpuStatField.TOTAL_USAGE_USEC),
- cpuStats.get(CGroup.CpuStatField.SYSTEM_USAGE_USEC),
- cpuStats.get(CGroup.CpuStatField.THROTTLED_TIME_USEC),
- cpuStats.get(CGroup.CpuStatField.TOTAL_PERIODS),
- cpuStats.get(CGroup.CpuStatField.THROTTLED_PERIODS));
+ cpuStats.get(CGroupV2.CpuStatField.TOTAL_USAGE_USEC),
+ cpuStats.get(CGroupV2.CpuStatField.SYSTEM_USAGE_USEC),
+ cpuStats.get(CGroupV2.CpuStatField.THROTTLED_TIME_USEC),
+ cpuStats.get(CGroupV2.CpuStatField.TOTAL_PERIODS),
+ cpuStats.get(CGroupV2.CpuStatField.THROTTLED_PERIODS));
}
private ContainerStats.MemoryStats collectMemoryStats(ContainerId containerId) throws IOException {