diff options
author | Ola Aunronning <olaa@yahooinc.com> | 2023-04-26 09:36:43 +0200 |
---|---|---|
committer | Ola Aunronning <olaa@yahooinc.com> | 2023-04-26 09:36:43 +0200 |
commit | 48b7ffe757ffa25bc3ca1eeaab8153db30623fa3 (patch) | |
tree | b41007ada3c803cc3282cc0556acbb13f3642e6a /node-admin | |
parent | d46b56f3f8577d8c4cc4b08f5f13b3b983ef7d2a (diff) | |
parent | 9a4376dae10e986c7061633e5a02f18c24a951da (diff) |
Merge branch 'master' into olaa/versioned-feature-flag
Diffstat (limited to 'node-admin')
12 files changed, 155 insertions, 383 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java deleted file mode 100644 index b98ad7a11bc..00000000000 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroup.java +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.container; - -import com.yahoo.collections.Pair; -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalInt; - -import static com.yahoo.vespa.hosted.node.admin.container.ContainerStatsCollector.userHzToMicroSeconds; - -/** - * Read and write interface to the CGroup of a podman container. - * - * @author freva - */ -public interface CGroup { - - /** - * Returns quota and period values used for CPU scheduling. This serves as hard cap on CPU usage by allowing - * the CGroup to use up to {@code quota} each {@code period}. If uncapped, quota will be negative. - * - * @param containerId full container ID. - * @return CPU quota and period for the given container. Empty if CGroup for this container is not found. - */ - Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId); - - /** @return number of shares allocated to this CGroup for purposes of CPU time scheduling, empty if CGroup not found */ - OptionalInt cpuShares(ContainerId containerId); - - /** Update CPU quota and period for the given container ID, set quota to -1 value for unlimited */ - boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs); - - boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares); - - Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException; - - /** @return Maximum amount of memory that can be used by the cgroup and its descendants. */ - long memoryLimitInBytes(ContainerId containerId) throws IOException; - - /** @return The total amount of memory currently being used by the cgroup and its descendants. */ - long memoryUsageInBytes(ContainerId containerId) throws IOException; - - /** @return Number of bytes used to cache filesystem data, including tmpfs and shared memory. */ - long memoryCacheInBytes(ContainerId containerId) throws IOException; - - enum CpuStatField { - TOTAL_USAGE_USEC(null/* in a dedicated file */, "usage_usec"), - USER_USAGE_USEC("user", "user_usec"), - SYSTEM_USAGE_USEC("system", "system_usec"), - TOTAL_PERIODS("nr_periods", "nr_periods"), - THROTTLED_PERIODS("nr_throttled", "nr_throttled"), - THROTTLED_TIME_USEC("throttled_time", "throttled_usec"); - - private final String v1Name; - private final String v2Name; - CpuStatField(String v1Name, String v2Name) { - this.v1Name = v1Name; - this.v2Name = v2Name; - } - - long parseValueV1(String value) { - long longValue = Long.parseLong(value); - return switch (this) { - case THROTTLED_TIME_USEC, TOTAL_USAGE_USEC -> longValue / 1000; // Value in ns - case USER_USAGE_USEC, SYSTEM_USAGE_USEC -> userHzToMicroSeconds(longValue); - default -> longValue; - }; - } - - long parseValueV2(String value) { - return Long.parseLong(value); - } - - static Optional<CpuStatField> fromV1Field(String name) { - return Arrays.stream(values()) - .filter(field -> name.equals(field.v1Name)) - .findFirst(); - } - - static Optional<CpuStatField> fromV2Field(String name) { - return Arrays.stream(values()) - .filter(field -> name.equals(field.v2Name)) - .findFirst(); - } - } -} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java deleted file mode 100644 index 7607858ec85..00000000000 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1.java +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.container; - -import com.yahoo.collections.Pair; -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; -import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; - -import java.io.IOException; -import java.nio.file.FileSystem; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalInt; -import java.util.logging.Logger; -import java.util.stream.Stream; - -import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.parseLong; - -/** - * Read and write interface to the CGroup V1 of a Podman container. - * - * @see <a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/index.html">CGroups V1</a> - * @author freva - */ -public class CGroupV1 implements CGroup { - - private static final Logger logger = Logger.getLogger(CGroupV1.class.getName()); - - private final FileSystem fileSystem; - - public CGroupV1(FileSystem fileSystem) { - this.fileSystem = fileSystem; - } - - @Override - public Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId) { - OptionalInt quota = readCgroupsCpuInt(cfsQuotaPath(containerId)); - if (quota.isEmpty()) return Optional.empty(); - OptionalInt period = readCgroupsCpuInt(cfsPeriodPath(containerId)); - if (period.isEmpty()) return Optional.empty(); - return Optional.of(new Pair<>(quota.getAsInt(), period.getAsInt())); - } - - @Override - public OptionalInt cpuShares(ContainerId containerId) { - return readCgroupsCpuInt(sharesPath(containerId)); - } - - @Override - public boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs) { - return writeCgroupsCpuInt(context, cfsQuotaPath(containerId), cpuQuotaUs) | - writeCgroupsCpuInt(context, cfsPeriodPath(containerId), periodUs); - } - - @Override - public boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares) { - return writeCgroupsCpuInt(context, sharesPath(containerId), shares); - } - - @Override - public Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException { - Map<CpuStatField, Long> stats = new HashMap<>(); - stats.put(CpuStatField.TOTAL_USAGE_USEC, parseLong(cpuacctPath(containerId).resolve("cpuacct.usage")) / 1000); - Stream.concat(Files.readAllLines(cpuacctPath(containerId).resolve("cpuacct.stat")).stream(), - Files.readAllLines(cpuacctPath(containerId).resolve("cpu.stat")).stream()) - .forEach(line -> { - String[] parts = line.split("\\s+"); - if (parts.length != 2) return; - CpuStatField.fromV1Field(parts[0]).ifPresent(field -> stats.put(field, field.parseValueV1(parts[1]))); - }); - return stats; - } - - @Override - public long memoryLimitInBytes(ContainerId containerId) throws IOException { - return parseLong(memoryPath(containerId).resolve("memory.limit_in_bytes")); - } - - @Override - public long memoryUsageInBytes(ContainerId containerId) throws IOException { - return parseLong(memoryPath(containerId).resolve("memory.usage_in_bytes")); - } - - @Override - public long memoryCacheInBytes(ContainerId containerId) throws IOException { - return parseLong(memoryPath(containerId).resolve("memory.stat"), "cache"); - } - - private Path cpuacctPath(ContainerId containerId) { - return fileSystem.getPath("/sys/fs/cgroup/cpuacct/machine.slice/libpod-" + containerId + ".scope"); - } - - private Path cpuPath(ContainerId containerId) { - return fileSystem.getPath("/sys/fs/cgroup/cpu/machine.slice/libpod-" + containerId + ".scope"); - } - - private Path memoryPath(ContainerId containerId) { - return fileSystem.getPath("/sys/fs/cgroup/memory/machine.slice/libpod-" + containerId + ".scope"); - } - - private UnixPath cfsQuotaPath(ContainerId containerId) { - return new UnixPath(cpuPath(containerId).resolve("cpu.cfs_quota_us")); - } - - private UnixPath cfsPeriodPath(ContainerId containerId) { - return new UnixPath(cpuPath(containerId).resolve("cpu.cfs_period_us")); - } - - private UnixPath sharesPath(ContainerId containerId) { - return new UnixPath(cpuPath(containerId).resolve("cpu.shares")); - } - - private static OptionalInt readCgroupsCpuInt(UnixPath unixPath) { - return unixPath.readUtf8FileIfExists() - .map(s -> OptionalInt.of(Integer.parseInt(s.strip()))) - .orElseGet(OptionalInt::empty); - } - - private static boolean writeCgroupsCpuInt(NodeAgentContext context, UnixPath unixPath, int value) { - int currentValue = readCgroupsCpuInt(unixPath).orElseThrow(); - if (currentValue == value) return false; - - context.recordSystemModification(logger, "Updating " + unixPath + " from " + currentValue + " to " + value); - unixPath.writeUtf8File(Integer.toString(value)); - return true; - } -} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java index 0c86829b96d..3cb34e066ff 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.node.admin.container; import com.yahoo.collections.Pair; +import com.yahoo.vespa.defaults.Defaults; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; @@ -9,6 +10,7 @@ import java.io.IOException; import java.nio.file.FileSystem; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Optional; @@ -17,23 +19,48 @@ import java.util.logging.Logger; import java.util.stream.Collectors; /** - * Read and write interface to the CGroup V2 of a Podman container. + * Read and write interface to the cgroup v2 of a Podman container. * * @see <a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html">CGroups V2</a> * @author freva */ -public class CGroupV2 implements CGroup { +public class CGroupV2 { private static final Logger logger = Logger.getLogger(CGroupV2.class.getName()); private static final String MAX = "max"; - private final FileSystem fileSystem; + private final Path rootCgroupPath; public CGroupV2(FileSystem fileSystem) { - this.fileSystem = fileSystem; - } - - @Override + this.rootCgroupPath = fileSystem.getPath("/sys/fs/cgroup"); + } + + /** + * Wraps {@code command} to ensure it is executed in the given cgroup. + * + * <p>WARNING: This method must be called only after vespa-cgexec has been installed.</p> + * + * @param cgroup The cgroup to execute the command in, e.g. /sys/fs/cgroup/system.slice/wireguard.scope. + * @param command The command to execute in the cgroup. + * @see #cgroupRootPath() + * @see #cgroupPath(ContainerId) + */ + public String[] wrapForExecutionIn(Path cgroup, String... command) { + String[] fullCommand = new String[3 + command.length]; + fullCommand[0] = Defaults.getDefaults().vespaHome() + "/bin/vespa-cgexec"; + fullCommand[1] = "-g"; + fullCommand[2] = cgroup.toString(); + System.arraycopy(command, 0, fullCommand, 3, command.length); + return fullCommand; + } + + /** + * Returns quota and period values used for CPU scheduling. This serves as hard cap on CPU usage by allowing + * the CGroupV2 to use up to {@code quota} each {@code period}. If uncapped, quota will be negative. + * + * @param containerId full container ID. + * @return CPU quota and period for the given container. Empty if CGroupV2 for this container is not found. + */ public Optional<Pair<Integer, Integer>> cpuQuotaPeriod(ContainerId containerId) { return cpuMaxPath(containerId).readUtf8FileIfExists() .map(s -> { @@ -42,60 +69,89 @@ public class CGroupV2 implements CGroup { }); } - @Override + /** @return number of shares allocated to this CGroupV2 for purposes of CPU time scheduling, empty if CGroupV2 not found */ public OptionalInt cpuShares(ContainerId containerId) { return cpuWeightPath(containerId).readUtf8FileIfExists() .map(s -> OptionalInt.of(weightToShares(Integer.parseInt(s.strip())))) .orElseGet(OptionalInt::empty); } - @Override + /** Update CPU quota and period for the given container ID, set quota to -1 value for unlimited */ public boolean updateCpuQuotaPeriod(NodeAgentContext context, ContainerId containerId, int cpuQuotaUs, int periodUs) { String wanted = String.format("%s %d", cpuQuotaUs < 0 ? MAX : cpuQuotaUs, periodUs); return writeCGroupsValue(context, cpuMaxPath(containerId), wanted); } - @Override public boolean updateCpuShares(NodeAgentContext context, ContainerId containerId, int shares) { return writeCGroupsValue(context, cpuWeightPath(containerId), Integer.toString(sharesToWeight(shares))); } - @Override + enum CpuStatField { + TOTAL_USAGE_USEC("usage_usec"), + USER_USAGE_USEC("user_usec"), + SYSTEM_USAGE_USEC("system_usec"), + TOTAL_PERIODS("nr_periods"), + THROTTLED_PERIODS("nr_throttled"), + THROTTLED_TIME_USEC("throttled_usec"); + + private final String name; + + CpuStatField(String name) { + this.name = name; + } + + long parseValue(String value) { + return Long.parseLong(value); + } + + static Optional<CpuStatField> fromField(String fieldName) { + return Arrays.stream(values()) + .filter(field -> fieldName.equals(field.name)) + .findFirst(); + } + } + public Map<CpuStatField, Long> cpuStats(ContainerId containerId) throws IOException { - return Files.readAllLines(cgroupRoot(containerId).resolve("cpu.stat")).stream() - .map(line -> line.split("\\s+")) - .filter(parts -> parts.length == 2) - .flatMap(parts -> CpuStatField.fromV2Field(parts[0]).stream().map(field -> new Pair<>(field, field.parseValueV2(parts[1])))) - .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); + return Files.readAllLines(cgroupPath(containerId).resolve("cpu.stat")).stream() + .map(line -> line.split("\\s+")) + .filter(parts -> parts.length == 2) + .flatMap(parts -> CpuStatField.fromField(parts[0]).stream().map(field -> new Pair<>(field, field.parseValue(parts[1])))) + .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); } - @Override + /** @return Maximum amount of memory that can be used by the cgroup and its descendants. */ public long memoryLimitInBytes(ContainerId containerId) throws IOException { - String limit = Files.readString(cgroupRoot(containerId).resolve("memory.max")).strip(); + String limit = Files.readString(cgroupPath(containerId).resolve("memory.max")).strip(); return MAX.equals(limit) ? -1L : Long.parseLong(limit); } - @Override + /** @return The total amount of memory currently being used by the cgroup and its descendants. */ public long memoryUsageInBytes(ContainerId containerId) throws IOException { - return parseLong(cgroupRoot(containerId).resolve("memory.current")); + return parseLong(cgroupPath(containerId).resolve("memory.current")); } - @Override + /** @return Number of bytes used to cache filesystem data, including tmpfs and shared memory. */ public long memoryCacheInBytes(ContainerId containerId) throws IOException { - return parseLong(cgroupRoot(containerId).resolve("memory.stat"), "file"); + return parseLong(cgroupPath(containerId).resolve("memory.stat"), "file"); + } + + /** Returns the cgroup v2 mount point path (/sys/fs/cgroup). */ + public Path cgroupRootPath() { + return rootCgroupPath; } - private Path cgroupRoot(ContainerId containerId) { + /** Returns the cgroup directory of the Podman container, and which appears as the root cgroup within the container. */ + public Path cgroupPath(ContainerId containerId) { // crun path, runc path is without the 'container' directory - return fileSystem.getPath("/sys/fs/cgroup/machine.slice/libpod-" + containerId + ".scope/container"); + return rootCgroupPath.resolve("machine.slice/libpod-" + containerId + ".scope/container"); } private UnixPath cpuMaxPath(ContainerId containerId) { - return new UnixPath(cgroupRoot(containerId).resolve("cpu.max")); + return new UnixPath(cgroupPath(containerId).resolve("cpu.max")); } private UnixPath cpuWeightPath(ContainerId containerId) { - return new UnixPath(cgroupRoot(containerId).resolve("cpu.weight")); + return new UnixPath(cgroupPath(containerId).resolve("cpu.weight")); } private static boolean writeCGroupsValue(NodeAgentContext context, UnixPath unixPath, String value) { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java index fb789874acf..e76a46b1c3b 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java @@ -51,6 +51,16 @@ public class Container extends PartialContainer { } @Override + public String toString() { + return "Container{" + + "hostname='" + hostname + '\'' + + ", resources=" + resources + + ", conmonPid=" + conmonPid + + ", networks=" + networks + + '}'; + } + + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java index f131aca2db0..ce2a6bb22ac 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java @@ -33,7 +33,7 @@ public class ContainerOperations { private final ContainerImagePruner imagePruner; private final ContainerStatsCollector containerStatsCollector; - public ContainerOperations(ContainerEngine containerEngine, CGroup cgroup, FileSystem fileSystem) { + public ContainerOperations(ContainerEngine containerEngine, CGroupV2 cgroup, FileSystem fileSystem) { this.containerEngine = Objects.requireNonNull(containerEngine); this.imageDownloader = new ContainerImageDownloader(containerEngine); this.imagePruner = new ContainerImagePruner(containerEngine, Clock.systemUTC()); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java index c17f98b9c9d..870809123a9 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java @@ -27,15 +27,15 @@ import java.util.stream.Stream; class ContainerStatsCollector { private final ContainerEngine containerEngine; - private final CGroup cgroup; + private final CGroupV2 cgroup; private final FileSystem fileSystem; private final int onlineCpus; - ContainerStatsCollector(ContainerEngine containerEngine, CGroup cgroup, FileSystem fileSystem) { + ContainerStatsCollector(ContainerEngine containerEngine, CGroupV2 cgroup, FileSystem fileSystem) { this(containerEngine, cgroup, fileSystem, Runtime.getRuntime().availableProcessors()); } - ContainerStatsCollector(ContainerEngine containerEngine, CGroup cgroup, FileSystem fileSystem, int onlineCpus) { + ContainerStatsCollector(ContainerEngine containerEngine, CGroupV2 cgroup, FileSystem fileSystem, int onlineCpus) { this.containerEngine = Objects.requireNonNull(containerEngine); this.cgroup = Objects.requireNonNull(cgroup); this.fileSystem = Objects.requireNonNull(fileSystem); @@ -83,14 +83,14 @@ class ContainerStatsCollector { } private ContainerStats.CpuStats collectCpuStats(ContainerId containerId) throws IOException { - Map<CGroup.CpuStatField, Long> cpuStats = cgroup.cpuStats(containerId); + Map<CGroupV2.CpuStatField, Long> cpuStats = cgroup.cpuStats(containerId); return new ContainerStats.CpuStats(onlineCpus, systemCpuUsage(), - cpuStats.get(CGroup.CpuStatField.TOTAL_USAGE_USEC), - cpuStats.get(CGroup.CpuStatField.SYSTEM_USAGE_USEC), - cpuStats.get(CGroup.CpuStatField.THROTTLED_TIME_USEC), - cpuStats.get(CGroup.CpuStatField.TOTAL_PERIODS), - cpuStats.get(CGroup.CpuStatField.THROTTLED_PERIODS)); + cpuStats.get(CGroupV2.CpuStatField.TOTAL_USAGE_USEC), + cpuStats.get(CGroupV2.CpuStatField.SYSTEM_USAGE_USEC), + cpuStats.get(CGroupV2.CpuStatField.THROTTLED_TIME_USEC), + cpuStats.get(CGroupV2.CpuStatField.TOTAL_PERIODS), + cpuStats.get(CGroupV2.CpuStatField.THROTTLED_PERIODS)); } private ContainerStats.MemoryStats collectMemoryStats(ContainerId containerId) throws IOException { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/ContainerWireguardTask.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/ContainerWireguardTask.java index 073e4263492..858b3d647fc 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/ContainerWireguardTask.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/ContainerWireguardTask.java @@ -1,5 +1,6 @@ package com.yahoo.vespa.hosted.node.admin.maintenance; +import com.yahoo.vespa.hosted.node.admin.container.ContainerId; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; /** @@ -9,6 +10,6 @@ import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; */ public interface ContainerWireguardTask { - void converge(NodeAgentContext context); + void converge(NodeAgentContext context, ContainerId containerId); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java index 45973ee6784..9e295b6a8e6 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java @@ -15,6 +15,7 @@ import com.yahoo.vespa.athenz.client.zts.ZtsClient; import com.yahoo.vespa.athenz.client.zts.ZtsClientException; import com.yahoo.vespa.athenz.identity.ServiceIdentityProvider; import com.yahoo.vespa.athenz.identityprovider.api.EntityBindingsMapper; +import com.yahoo.vespa.athenz.identityprovider.api.IdentityDocument; import com.yahoo.vespa.athenz.identityprovider.api.IdentityDocumentClient; import com.yahoo.vespa.athenz.identityprovider.api.SignedIdentityDocument; import com.yahoo.vespa.athenz.identityprovider.client.CsrGenerator; @@ -78,6 +79,7 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { private final ServiceIdentityProvider hostIdentityProvider; private final IdentityDocumentClient identityDocumentClient; private final BooleanFlag tenantServiceIdentityFlag; + private final BooleanFlag useNewIdentityDocumentLayout; // Used as an optimization to ensure ZTS is not DDoS'ed on continuously failing refresh attempts private final Map<ContainerName, Instant> lastRefreshAttempt = new ConcurrentHashMap<>(); @@ -99,6 +101,7 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { new AthenzIdentityVerifier(Set.of(configServerInfo.getConfigServerIdentity()))); this.clock = clock; this.tenantServiceIdentityFlag = Flags.NODE_ADMIN_TENANT_SERVICE_REGISTRY.bindTo(flagSource); + this.useNewIdentityDocumentLayout = Flags.NEW_IDDOC_LAYOUT.bindTo(flagSource); } public boolean converge(NodeAgentContext context) { @@ -134,7 +137,7 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { Instant now = clock.instant(); Instant expiry = certificate.getNotAfter().toInstant(); var doc = EntityBindingsMapper.readSignedIdentityDocumentFromFile(identityDocumentFile); - if (doc.outdated()) { + if (refreshIdentityDocument(doc, context)) { context.log(logger, "Identity document is outdated (version=%d)", doc.documentVersion()); registerIdentity(context, privateKeyFile, certificateFile, identityDocumentFile, identityType, athenzIdentity); return true; @@ -154,7 +157,7 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { return false; } else { lastRefreshAttempt.put(context.containerName(), now); - refreshIdentity(context, privateKeyFile, certificateFile, identityDocumentFile, doc, identityType, athenzIdentity); + refreshIdentity(context, privateKeyFile, certificateFile, identityDocumentFile, doc.identityDocument(), identityType, athenzIdentity); return true; } } @@ -165,6 +168,11 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { } } + private boolean refreshIdentityDocument(SignedIdentityDocument signedIdentityDocument, NodeAgentContext context) { + int expectedVersion = documentVersion(context); + return signedIdentityDocument.outdated() || signedIdentityDocument.documentVersion() != expectedVersion; + } + public void clearCredentials(NodeAgentContext context) { FileFinder.files(context.paths().of(CONTAINER_SIA_DIRECTORY)) .deleteRecursively(context); @@ -219,7 +227,8 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { private void registerIdentity(NodeAgentContext context, ContainerPath privateKeyFile, ContainerPath certificateFile, ContainerPath identityDocumentFile, IdentityType identityType, AthenzIdentity identity) { KeyPair keyPair = KeyUtils.generateKeypair(KeyAlgorithm.RSA); - SignedIdentityDocument doc = signedIdentityDocument(context, identityType); + SignedIdentityDocument signedDoc = signedIdentityDocument(context, identityType); + IdentityDocument doc = signedDoc.identityDocument(); CsrGenerator csrGenerator = new CsrGenerator(certificateDnsSuffix, doc.providerService().getFullName()); Pkcs10Csr csr = csrGenerator.generateInstanceCsr( identity, doc.providerUniqueId(), doc.ipAddresses(), doc.clusterType(), keyPair); @@ -231,9 +240,9 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { ztsClient.registerInstance( doc.providerService(), identity, - EntityBindingsMapper.toAttestationData(doc), + EntityBindingsMapper.toAttestationData(signedDoc), csr); - EntityBindingsMapper.writeSignedIdentityDocumentToFile(identityDocumentFile, doc); + EntityBindingsMapper.writeSignedIdentityDocumentToFile(identityDocumentFile, signedDoc); writePrivateKeyAndCertificate(privateKeyFile, keyPair.getPrivate(), certificateFile, instanceIdentity.certificate()); context.log(logger, "Instance successfully registered and credentials written to file"); } @@ -242,14 +251,14 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { /** * Return zts url from identity document, fallback to ztsEndpoint */ - private URI ztsEndpoint(SignedIdentityDocument doc) { + private URI ztsEndpoint(IdentityDocument doc) { return Optional.ofNullable(doc.ztsUrl()) .filter(s -> !s.isBlank()) .map(URI::create) .orElse(ztsEndpoint); } private void refreshIdentity(NodeAgentContext context, ContainerPath privateKeyFile, ContainerPath certificateFile, - ContainerPath identityDocumentFile, SignedIdentityDocument doc, IdentityType identityType, AthenzIdentity identity) { + ContainerPath identityDocumentFile, IdentityDocument doc, IdentityType identityType, AthenzIdentity identity) { KeyPair keyPair = KeyUtils.generateKeypair(KeyAlgorithm.RSA); CsrGenerator csrGenerator = new CsrGenerator(certificateDnsSuffix, doc.providerService().getFullName()); Pkcs10Csr csr = csrGenerator.generateInstanceCsr( @@ -310,8 +319,8 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { private SignedIdentityDocument signedIdentityDocument(NodeAgentContext context, IdentityType identityType) { return switch (identityType) { - case NODE -> identityDocumentClient.getNodeIdentityDocument(context.hostname().value()); - case TENANT -> identityDocumentClient.getTenantIdentityDocument(context.hostname().value()); + case NODE -> identityDocumentClient.getNodeIdentityDocument(context.hostname().value(), documentVersion(context)); + case TENANT -> identityDocumentClient.getTenantIdentityDocument(context.hostname().value(), documentVersion(context)); }; } @@ -324,9 +333,9 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { private AthenzIdentity getTenantIdentity(NodeAgentContext context, ContainerPath identityDocumentFile) { if (Files.exists(identityDocumentFile)) { - return EntityBindingsMapper.readSignedIdentityDocumentFromFile(identityDocumentFile).serviceIdentity(); + return EntityBindingsMapper.readSignedIdentityDocumentFromFile(identityDocumentFile).identityDocument().serviceIdentity(); } else { - return identityDocumentClient.getTenantIdentityDocument(context.hostname().value()).serviceIdentity(); + return identityDocumentClient.getTenantIdentityDocument(context.hostname().value(), documentVersion(context)).identityDocument().serviceIdentity(); } } @@ -340,6 +349,17 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { .value(); } + /* + Get the document version to ask for + */ + private int documentVersion(NodeAgentContext context) { + return useNewIdentityDocumentLayout + .with(FetchVector.Dimension.HOSTNAME, context.hostname().value()) + .value() + ? SignedIdentityDocument.DEFAULT_DOCUMENT_VERSION + : SignedIdentityDocument.LEGACY_DEFAULT_DOCUMENT_VERSION; + } + enum IdentityType { NODE("vespa-node-identity-document.json"), TENANT("vespa-tenant-identity-document.json"); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index f2f690106fa..7c84afc8397 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -509,7 +509,8 @@ public class NodeAgentImpl implements NodeAgent { // TODO: this is a workaround for restarting wireguard as early as possible after host-admin has been down. var runOrdinaryWireguardTasks = true; if (container.isPresent() && container.get().state().isRunning()) { - wireguardTasks.forEach(task -> task.converge(context)); + Optional<Container> finalContainer = container; + wireguardTasks.forEach(task -> task.converge(context, finalContainer.get().id())); runOrdinaryWireguardTasks = false; } @@ -530,7 +531,10 @@ public class NodeAgentImpl implements NodeAgent { } aclMaintainer.ifPresent(maintainer -> maintainer.converge(context)); - if (runOrdinaryWireguardTasks) wireguardTasks.forEach(task -> task.converge(context)); + if (runOrdinaryWireguardTasks) { + Optional<Container> finalContainer = container; + wireguardTasks.forEach(task -> task.converge(context, finalContainer.get().id())); + } startServicesIfNeeded(context); resumeNodeIfNeeded(context); if (healthChecker.isPresent()) { diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1Test.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1Test.java deleted file mode 100644 index f25001d77cd..00000000000 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV1Test.java +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.container; - -import com.yahoo.collections.Pair; -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImpl; -import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; -import com.yahoo.vespa.test.file.TestFileSystem; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.FileSystem; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalInt; - -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.SYSTEM_USAGE_USEC; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.THROTTLED_PERIODS; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.THROTTLED_TIME_USEC; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.TOTAL_PERIODS; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.TOTAL_USAGE_USEC; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.USER_USAGE_USEC; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * @author freva - */ -public class CGroupV1Test { - - private static final ContainerId containerId = new ContainerId("4aec78cc"); - - private final FileSystem fileSystem = TestFileSystem.create(); - private final CGroup cgroup = new CGroupV1(fileSystem); - private final NodeAgentContext context = NodeAgentContextImpl.builder("node123.yahoo.com").fileSystem(fileSystem).build(); - - @Test - public void updates_cpu_quota_and_period() { - assertEquals(Optional.empty(), cgroup.cpuQuotaPeriod(containerId)); - - UnixPath cpu = new UnixPath(fileSystem.getPath("/sys/fs/cgroup/cpu/machine.slice/libpod-4aec78cc.scope")).createDirectories(); - cpu.resolve("cpu.cfs_period_us").writeUtf8File("123456\n"); - cpu.resolve("cpu.cfs_quota_us").writeUtf8File("-1\n"); - assertEquals(Optional.of(new Pair<>(-1, 123456)), cgroup.cpuQuotaPeriod(containerId)); - - cpu.resolve("cpu.cfs_quota_us").writeUtf8File("456\n"); - assertEquals(Optional.of(new Pair<>(456, 123456)), cgroup.cpuQuotaPeriod(containerId)); - - assertFalse(cgroup.updateCpuQuotaPeriod(context, containerId, 456, 123456)); - - assertTrue(cgroup.updateCpuQuotaPeriod(context, containerId, 654, 123456)); - assertEquals(Optional.of(new Pair<>(654, 123456)), cgroup.cpuQuotaPeriod(containerId)); - } - - @Test - public void updates_cpu_shares() { - assertEquals(OptionalInt.empty(), cgroup.cpuShares(containerId)); - - UnixPath cpuPath = new UnixPath(fileSystem.getPath("/sys/fs/cgroup/cpu/machine.slice/libpod-4aec78cc.scope")).createDirectories(); - cpuPath.resolve("cpu.shares").writeUtf8File("987\n"); - assertEquals(OptionalInt.of(987), cgroup.cpuShares(containerId)); - - assertFalse(cgroup.updateCpuShares(context, containerId, 987)); - - assertTrue(cgroup.updateCpuShares(context, containerId, 789)); - assertEquals(OptionalInt.of(789), cgroup.cpuShares(containerId)); - } - - @Test - public void reads_cpu_stats() throws IOException { - UnixPath cpuacctPath = new UnixPath(fileSystem.getPath("/sys/fs/cgroup/cpuacct/machine.slice/libpod-4aec78cc.scope")).createDirectories(); - cpuacctPath.resolve("cpuacct.usage").writeUtf8File("91623711445\n"); - cpuacctPath.resolve("cpuacct.stat").writeUtf8File("user 7463\n" + - "system 1741\n"); - cpuacctPath.resolve("cpu.stat").writeUtf8File("nr_periods 2361\n" + - "nr_throttled 342\n" + - "throttled_time 131033468519\n"); - - assertEquals(Map.of(TOTAL_USAGE_USEC, 91623711L, SYSTEM_USAGE_USEC, 17410000L, USER_USAGE_USEC, 74630000L, - TOTAL_PERIODS, 2361L, THROTTLED_PERIODS, 342L, THROTTLED_TIME_USEC, 131033468L), cgroup.cpuStats(containerId)); - } - - @Test - public void reads_memory_metrics() throws IOException { - UnixPath memoryPath = new UnixPath(fileSystem.getPath("/sys/fs/cgroup/memory/machine.slice/libpod-4aec78cc.scope")).createDirectories(); - memoryPath.resolve("memory.usage_in_bytes").writeUtf8File("2525093888\n"); - assertEquals(2525093888L, cgroup.memoryUsageInBytes(containerId)); - - memoryPath.resolve("memory.limit_in_bytes").writeUtf8File("4322885632\n"); - assertEquals(4322885632L, cgroup.memoryLimitInBytes(containerId)); - - memoryPath.resolve("memory.stat").writeUtf8File("cache 296828928\n" + - "rss 2152587264\n" + - "rss_huge 1107296256\n" + - "shmem 135168\n" + - "mapped_file 270336\n"); - assertEquals(296828928L, cgroup.memoryCacheInBytes(containerId)); - } -} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2Test.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2Test.java index 909979342ea..789f31f75c6 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2Test.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/CGroupV2Test.java @@ -14,12 +14,12 @@ import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.SYSTEM_USAGE_USEC; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.THROTTLED_PERIODS; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.THROTTLED_TIME_USEC; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.TOTAL_PERIODS; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.TOTAL_USAGE_USEC; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.USER_USAGE_USEC; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.SYSTEM_USAGE_USEC; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.THROTTLED_PERIODS; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.THROTTLED_TIME_USEC; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.TOTAL_PERIODS; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.TOTAL_USAGE_USEC; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.USER_USAGE_USEC; import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.sharesToWeight; import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.weightToShares; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -34,7 +34,7 @@ public class CGroupV2Test { private static final ContainerId containerId = new ContainerId("4aec78cc"); private final FileSystem fileSystem = TestFileSystem.create(); - private final CGroup cgroup = new CGroupV2(fileSystem); + private final CGroupV2 cgroup = new CGroupV2(fileSystem); private final NodeAgentContext context = NodeAgentContextImpl.builder("node123.yahoo.com").fileSystem(fileSystem).build(); private final UnixPath cgroupRoot = new UnixPath(fileSystem.getPath("/sys/fs/cgroup/machine.slice/libpod-4aec78cc.scope/container")).createDirectories(); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java index f852eb6235d..72c5d016a47 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java @@ -17,12 +17,12 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.SYSTEM_USAGE_USEC; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.THROTTLED_PERIODS; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.THROTTLED_TIME_USEC; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.TOTAL_PERIODS; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.TOTAL_USAGE_USEC; -import static com.yahoo.vespa.hosted.node.admin.container.CGroup.CpuStatField.USER_USAGE_USEC; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.SYSTEM_USAGE_USEC; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.THROTTLED_PERIODS; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.THROTTLED_TIME_USEC; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.TOTAL_PERIODS; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.TOTAL_USAGE_USEC; +import static com.yahoo.vespa.hosted.node.admin.container.CGroupV2.CpuStatField.USER_USAGE_USEC; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.eq; @@ -37,7 +37,7 @@ public class ContainerStatsCollectorTest { private final TestTerminal testTerminal = new TestTerminal(); private final ContainerEngineMock containerEngine = new ContainerEngineMock(testTerminal); private final FileSystem fileSystem = TestFileSystem.create(); - private final CGroup cgroup = mock(CGroup.class); + private final CGroupV2 cgroup = mock(CGroupV2.class); private final NodeAgentContext context = NodeAgentContextImpl.builder(NodeSpec.Builder.testSpec("c1").build()) .fileSystem(TestFileSystem.create()) .build(); |