diff options
Diffstat (limited to 'node-admin')
25 files changed, 1735 insertions, 362 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Cgroup.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Cgroup.java new file mode 100644 index 00000000000..3efadda183b --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Cgroup.java @@ -0,0 +1,111 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.util.OptionalInt; +import java.util.logging.Logger; + +/** + * Read and write interface to the v1 cgroup of a podman container. + * See <a href="https://man7.org/linux/man-pages/man7/cgroups.7.html">cgroups(7)</a> for background. + * + * @author hakon + */ +public class Cgroup { + + private static final Logger logger = Logger.getLogger(Cgroup.class.getName()); + + private final FileSystem fileSystem; + private final ContainerId containerId; + + public Cgroup(FileSystem fileSystem, ContainerId containerId) { + this.fileSystem = fileSystem; + this.containerId = containerId; + } + + public OptionalInt readCpuQuota() { + return readCgroupsCpuInt(cfsQuotaPath()); + } + + public OptionalInt readCpuPeriod() { + return readCgroupsCpuInt(cfsPeriodPath()); + } + + public OptionalInt readCpuShares() { + return readCgroupsCpuInt(sharesPath()); + } + + public boolean updateCpuQuota(NodeAgentContext context, int cpuQuotaUs) { + return writeCgroupsCpuInt(context, cfsQuotaPath(), cpuQuotaUs); + } + + public boolean updateCpuPeriod(NodeAgentContext context, int periodUs) { + return writeCgroupsCpuInt(context, cfsPeriodPath(), periodUs); + } + + public boolean updateCpuShares(NodeAgentContext context, int shares) { + return writeCgroupsCpuInt(context, sharesPath(), shares); + } + + /** Returns the path to the podman container's scope directory for the cpuacct controller. */ + public Path cpuacctPath() { + return fileSystem.getPath("/sys/fs/cgroup/cpuacct/machine.slice/libpod-" + containerId + ".scope"); + } + + /** Returns the path to the podman container's scope directory for the cpu controller. */ + public Path cpuPath() { + return fileSystem.getPath("/sys/fs/cgroup/cpu/machine.slice/libpod-" + containerId + ".scope"); + } + + /** Returns the path to the podman container's scope directory for the memory controller. */ + public Path memoryPath() { + return fileSystem.getPath("/sys/fs/cgroup/memory/machine.slice/libpod-" + containerId + ".scope"); + } + + private UnixPath cfsQuotaPath() { + return new UnixPath(cpuPath().resolve("cpu.cfs_quota_us")); + } + + private UnixPath cfsPeriodPath() { + return new UnixPath(cpuPath().resolve("cpu.cfs_period_us")); + } + + private UnixPath sharesPath() { + return new UnixPath(cpuPath().resolve("cpu.shares")); + } + + private OptionalInt readCgroupsCpuInt(UnixPath unixPath) { + final byte[] currentContentBytes; + try { + currentContentBytes = Files.readAllBytes(unixPath.toPath()); + } catch (NoSuchFileException e) { + return OptionalInt.empty(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + String currentContent = new String(currentContentBytes, StandardCharsets.UTF_8).strip(); + return OptionalInt.of(Integer.parseInt(currentContent)); + } + + private boolean writeCgroupsCpuInt(NodeAgentContext context, UnixPath unixPath, int value) { + int currentValue = readCgroupsCpuInt(unixPath).orElseThrow(); + if (currentValue == value) { + return false; + } + + context.recordSystemModification(logger, "Updating " + unixPath + " from " + currentValue + " to " + value); + unixPath.writeUtf8File(Integer.toString(value)); + return true; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java index 7aeb43e44f4..24b77e52a02 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java @@ -1,80 +1,100 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.container; import com.yahoo.config.provision.DockerImage; +import java.util.List; +import java.util.Map; import java.util.Objects; /** - * @author stiankri + * A Podman container. + * + * @author mpolden */ -public class Container { - private final ContainerId id; - public final String hostname; - public final DockerImage image; - public final ContainerResources resources; - public final ContainerName name; - public final State state; - public final int pid; - - public Container( - final ContainerId id, - final String hostname, - final DockerImage image, - final ContainerResources resources, - final ContainerName containerName, - final State state, - final int pid) { - this.id = id; - this.hostname = hostname; - this.image = image; - this.resources = resources; - this.name = containerName; - this.state = state; - this.pid = pid; +public class Container extends PartialContainer { + + private final String hostname; + private final ContainerResources resources; + private final int conmonPid; + private final List<Network> networks; + + public Container(ContainerId id, ContainerName name, State state, String imageId, DockerImage image, + Map<String, String> labels, int pid, int conmonPid, String hostname, + ContainerResources resources, List<Network> networks, boolean managed) { + super(id, name, state, imageId, image, labels, pid, managed); + this.hostname = Objects.requireNonNull(hostname); + this.resources = Objects.requireNonNull(resources); + this.conmonPid = conmonPid; + this.networks = List.copyOf(Objects.requireNonNull(networks)); } - public ContainerId id() { - return id; + /** The hostname of this, if any */ + public String hostname() { + return hostname; } - @Override - public boolean equals(final Object obj) { - if (!(obj instanceof Container)) { - return false; - } - final Container other = (Container) obj; - return Objects.equals(id, other.id) - && Objects.equals(hostname, other.hostname) - && Objects.equals(image, other.image) - && Objects.equals(resources, other.resources) - && Objects.equals(name, other.name) - && Objects.equals(pid, other.pid); + /** Resource limits for this*/ + public ContainerResources resources() { + return resources; + } + + /** Pid of the conmon process for this container */ + public int conmonPid() { + return conmonPid; + } + + /** The networks used by this */ + public List<Network> networks() { + return networks; } @Override - public int hashCode() { - return Objects.hash(hostname, image, resources, name, pid); + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + Container that = (Container) o; + return conmonPid == that.conmonPid && hostname.equals(that.hostname) && resources.equals(that.resources) && networks.equals(that.networks); } @Override - public String toString() { - return "Container {" - + " id=" + id - + " hostname=" + hostname - + " image=" + image - + " resources=" + resources - + " name=" + name - + " state=" + state - + " pid=" + pid - + "}"; + public int hashCode() { + return Objects.hash(super.hashCode(), hostname, resources, conmonPid, networks); } - public enum State { - CREATED, RESTARTING, RUNNING, REMOVING, PAUSED, EXITED, DEAD, UNKNOWN, CONFIGURED, STOPPED; + /** The network of a container */ + public static class Network { + + private final String name; + private final String ipv4Address; - public boolean isRunning() { - return this == RUNNING; + public Network(String name, String ipv4Address) { + this.name = Objects.requireNonNull(name); + this.ipv4Address = Objects.requireNonNull(ipv4Address); } + + public String name() { + return name; + } + + public String ipv4Address() { + return ipv4Address; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Network network = (Network) o; + return name.equals(network.name) && ipv4Address.equals(network.ipv4Address); + } + + @Override + public int hashCode() { + return Objects.hash(name, ipv4Address); + } + } + } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java new file mode 100644 index 00000000000..58ff1af4681 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java @@ -0,0 +1,61 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.config.provision.DockerImage; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.image.Image; +import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; + +/** + * Interface for a container engine, such as Docker or Podman. + * + * @author mpolden + */ +public interface ContainerEngine { + + /** Create a new container */ + void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources); + + /** Start a created container */ + void startContainer(NodeAgentContext context); + + /** Update an existing container */ + void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources); + + /** Remove given container. The container will be stopped if necessary */ + void removeContainer(TaskContext context, PartialContainer container); + + /** Get container for given context */ + Optional<Container> getContainer(NodeAgentContext context); + + /** List containers managed by this */ + List<PartialContainer> listContainers(TaskContext context); + + /** Returns the network interface used by container in given context */ + String networkInterface(NodeAgentContext context); + + /** Executes a command in inside container as root user, throws on non-zero exit code */ + CommandResult executeAsRoot(NodeAgentContext context, Duration timeout, String... command); + + /** Executes a command in inside containers network namespace, throws on non-zero exit code */ + CommandResult executeInNetworkNamespace(NodeAgentContext context, String... command); + + /** Download giving image */ + void pullImage(TaskContext context, DockerImage image, RegistryCredentials registryCredentials); + + /** Returns whether given image is already downloaded */ + boolean hasImage(TaskContext context, DockerImage image); + + /** Remove given image */ + void removeImage(TaskContext context, String id); + + /** Returns images available in this */ + List<Image> listImages(TaskContext context); + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java index 49dacc44335..a81404902bc 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java @@ -9,7 +9,8 @@ import java.util.regex.Pattern; * * @author bakksjo */ -public class ContainerName { +public class ContainerName implements Comparable<ContainerName> { + private static final Pattern LEGAL_CONTAINER_NAME_PATTERN = Pattern.compile("^[a-zA-Z0-9-]+$"); private final String name; @@ -51,4 +52,10 @@ public class ContainerName { + " name=" + name + " }"; } + + @Override + public int compareTo(ContainerName o) { + return name.compareTo(o.name); + } + } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java index e48927279a4..444e92e793f 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java @@ -3,69 +3,147 @@ package com.yahoo.vespa.hosted.node.admin.container; import com.yahoo.config.provision.DockerImage; import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.image.ContainerImageDownloader; +import com.yahoo.vespa.hosted.node.admin.container.image.ContainerImagePruner; import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandLine; import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; +import java.nio.file.FileSystem; +import java.time.Clock; import java.time.Duration; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; /** + * High-level interface for container operations. Code managing containers should use this and not + * {@link ContainerEngine} directly. + * * @author hakonhall + * @author mpolden */ -// TODO(mpolden): Clean up this interface when ContainerOperationsImpl, DockerEngine and friends can be removed -public interface ContainerOperations { - - void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources); - - void startContainer(NodeAgentContext context); - - void removeContainer(NodeAgentContext context, Container container); - - void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources); - - Optional<Container> getContainer(NodeAgentContext context); - - boolean pullImageAsyncIfNeeded(TaskContext context, DockerImage dockerImage, RegistryCredentials registryCredentials); - - ProcessResult executeCommandInContainerAsRoot(NodeAgentContext context, String... command); - - ProcessResult executeCommandInContainerAsRoot(NodeAgentContext context, Long timeoutSeconds, String... command); +public class ContainerOperations { + + private final ContainerEngine containerEngine; + private final ContainerImageDownloader imageDownloader; + private final ContainerImagePruner imagePruner; + private final ContainerStatsCollector containerStatsCollector; + + public ContainerOperations(ContainerEngine containerEngine, FileSystem fileSystem) { + this.containerEngine = Objects.requireNonNull(containerEngine); + this.imageDownloader = new ContainerImageDownloader(containerEngine); + this.imagePruner = new ContainerImagePruner(containerEngine, Clock.systemUTC()); + this.containerStatsCollector = new ContainerStatsCollector(Objects.requireNonNull(fileSystem)); + } + + public void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources) { + containerEngine.createContainer(context, containerData, containerResources); + } + + public void startContainer(NodeAgentContext context) { + containerEngine.startContainer(context); + } + + public void removeContainer(NodeAgentContext context, Container container) { + containerEngine.removeContainer(context, container); + } + + public void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources) { + containerEngine.updateContainer(context, containerId, containerResources); + } + + public Optional<Container> getContainer(NodeAgentContext context) { + return containerEngine.getContainer(context); + } + + /** Pull image asynchronously. Returns true if image is still downloading and false if download is complete */ + public boolean pullImageAsyncIfNeeded(TaskContext context, DockerImage dockerImage, RegistryCredentials registryCredentials) { + return !imageDownloader.get(context, dockerImage, registryCredentials); + } + + /** Executes a command inside container identified by given context. Does NOT throw on non-zero exit code */ + public CommandResult executeCommandInContainerAsRoot(NodeAgentContext context, String... command) { + return executeCommandInContainerAsRoot(context, CommandLine.DEFAULT_TIMEOUT.toSeconds(), command); + } + + /** Executes a command inside container identified by given context. Does NOT throw on non-zero exit code */ + public CommandResult executeCommandInContainerAsRoot(NodeAgentContext context, Long timeoutSeconds, String... command) { + return containerEngine.executeAsRoot(context, Duration.ofSeconds(timeoutSeconds), command); + } /** Executes a command in inside containers network namespace, throws on non-zero exit code */ - CommandResult executeCommandInNetworkNamespace(NodeAgentContext context, String... command); + public CommandResult executeCommandInNetworkNamespace(NodeAgentContext context, String... command) { + return containerEngine.executeInNetworkNamespace(context, command); + } - /** Resume node. Resuming a node means that it is ready to take on traffic. - * @return*/ - String resumeNode(NodeAgentContext context); + /** Resume node. Resuming a node means that it is ready to receive traffic */ + public String resumeNode(NodeAgentContext context) { + return executeNodeCtlInContainer(context, "resume"); + } /** * Suspend node and return output. Suspending a node means the node should be taken temporarly offline, * such that maintenance of the node can be done (upgrading, rebooting, etc). */ - String suspendNode(NodeAgentContext context); - - String restartVespa(NodeAgentContext context); - - String startServices(NodeAgentContext context); - - String stopServices(NodeAgentContext context); - - Optional<ContainerStats> getContainerStats(NodeAgentContext context); - - boolean noManagedContainersRunning(TaskContext context); + public String suspendNode(NodeAgentContext context) { + return executeNodeCtlInContainer(context, "suspend"); + } + + /** Restart Vespa inside container. Same as running suspend, stop, start and resume */ + public String restartVespa(NodeAgentContext context) { + return executeNodeCtlInContainer(context, "restart-vespa"); + } + + /** Start Vespa inside container */ + public String startServices(NodeAgentContext context) { + return executeNodeCtlInContainer(context, "start"); + } + + /** Stop Vespa inside container */ + public String stopServices(NodeAgentContext context) { + return executeNodeCtlInContainer(context, "stop"); + } + + /** Get container statistics */ + public Optional<ContainerStats> getContainerStats(NodeAgentContext context) { + String iface = containerEngine.networkInterface(context); + return getContainer(context).flatMap(container -> containerStatsCollector.collect(container.id(), container.pid(), iface)); + } + + /** Returns true if no containers managed by node-admin are running */ + public boolean noManagedContainersRunning(TaskContext context) { + return containerEngine.listContainers(context).stream() + .filter(c -> c.managed()) + .noneMatch(container -> container.state() == Container.State.running); + } /** - * Stops and removes all managed containers except the ones given in {@code containerNames}. + * Stop and remove all managed containers except the given ones * * @return true if any containers were removed */ - boolean retainManagedContainers(TaskContext context, Set<ContainerName> containerNames); + public boolean retainManagedContainers(TaskContext context, Set<ContainerName> containerNames) { + return containerEngine.listContainers(context).stream() + .filter(c -> c.managed()) + .filter(container -> !containerNames.contains(container.name())) + .peek(container -> containerEngine.removeContainer(context, container)) + .count() > 0; + } /** Deletes the local images that are currently not in use by any container and not recently used. */ - boolean deleteUnusedContainerImages(TaskContext context, List<DockerImage> excludes, Duration minImageAgeToDelete); + public boolean deleteUnusedContainerImages(TaskContext context, List<DockerImage> excludes, Duration minImageAgeToDelete) { + List<String> excludedRefs = excludes.stream().map(DockerImage::asString).collect(Collectors.toList()); + return imagePruner.removeUnusedImages(context, excludedRefs, minImageAgeToDelete); + } + + private String executeNodeCtlInContainer(NodeAgentContext context, String program) { + String[] command = new String[] {context.pathInNodeUnderVespaHome("bin/vespa-nodectl").toString(), program}; + return executeCommandInContainerAsRoot(context, command).getOutput(); + } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java new file mode 100644 index 00000000000..91466efd43e --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java @@ -0,0 +1,170 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +/** + * Collects CPU, memory and network statistics for a container. + * + * Uses same approach as runc: https://github.com/opencontainers/runc/tree/master/libcontainer/cgroups/fs + * + * @author mpolden + */ +class ContainerStatsCollector { + + private final FileSystem fileSystem; + + public ContainerStatsCollector(FileSystem fileSystem) { + this.fileSystem = Objects.requireNonNull(fileSystem); + } + + /** Collect statistics for given container ID and PID */ + public Optional<ContainerStats> collect(ContainerId containerId, int pid, String iface) { + Cgroup cgroup = new Cgroup(fileSystem, containerId); + try { + ContainerStats.CpuStats cpuStats = collectCpuStats(cgroup); + ContainerStats.MemoryStats memoryStats = collectMemoryStats(cgroup); + Map<String, ContainerStats.NetworkStats> networkStats = Map.of(iface, collectNetworkStats(iface, pid)); + return Optional.of(new ContainerStats(networkStats, memoryStats, cpuStats)); + } catch (NoSuchFileException ignored) { + return Optional.empty(); // Container disappeared while we collected stats + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private ContainerStats.CpuStats collectCpuStats(Cgroup cgroup) throws IOException { + List<String> cpuStatLines = Files.readAllLines(cpuStatPath(cgroup)); + long throttledActivePeriods = parseLong(cpuStatLines, "nr_periods"); + long throttledPeriods = parseLong(cpuStatLines, "nr_throttled"); + long throttledTime = parseLong(cpuStatLines, "throttled_time"); + return new ContainerStats.CpuStats(cpuCount(cgroup), + systemCpuUsage().toNanos(), + containerCpuUsage(cgroup).toNanos(), + containerCpuUsageSystem(cgroup).toNanos(), + throttledTime, + throttledActivePeriods, + throttledPeriods); + } + + private ContainerStats.MemoryStats collectMemoryStats(Cgroup cgroup) throws IOException { + long memoryLimitInBytes = parseLong(memoryLimitPath(cgroup)); + long memoryUsageInBytes = parseLong(memoryUsagePath(cgroup)); + long cachedInBytes = parseLong(memoryStatPath(cgroup), "cache"); + return new ContainerStats.MemoryStats(cachedInBytes, memoryUsageInBytes, memoryLimitInBytes); + } + + private ContainerStats.NetworkStats collectNetworkStats(String iface, int containerPid) throws IOException { + for (var line : Files.readAllLines(netDevPath(containerPid))) { + String[] fields = fields(line.trim()); + if (fields.length < 17 || !fields[0].equals(iface + ":")) continue; + + long rxBytes = Long.parseLong(fields[1]); + long rxErrors = Long.parseLong(fields[3]); + long rxDropped = Long.parseLong(fields[4]); + + long txBytes = Long.parseLong(fields[9]); + long txErrors = Long.parseLong(fields[11]); + long txDropped = Long.parseLong(fields[12]); + + return new ContainerStats.NetworkStats(rxBytes, rxDropped, rxErrors, txBytes, txDropped, txErrors); + } + throw new IllegalArgumentException("No statistics found for interface " + iface); + } + + /** Number of CPUs seen by given container */ + private int cpuCount(Cgroup cgroup) throws IOException { + return fields(Files.readString(perCpuUsagePath(cgroup))).length; + } + + /** Returns total CPU time spent executing all the processes on this host */ + private Duration systemCpuUsage() throws IOException { + long ticks = parseLong(fileSystem.getPath("/proc/stat"), "cpu"); + return ticksToDuration(ticks); + } + + /** Returns total CPU time spent running all processes inside given container */ + private Duration containerCpuUsage(Cgroup cgroup) throws IOException { + return Duration.ofNanos(parseLong(cpuUsagePath(cgroup))); + } + + /** Returns total CPU time spent in kernel/system mode while executing processes inside given container */ + private Duration containerCpuUsageSystem(Cgroup cgroup) throws IOException { + long ticks = parseLong(cpuacctStatPath(cgroup), "system"); + return ticksToDuration(ticks); + } + + private long parseLong(Path path) throws IOException { + return Long.parseLong(Files.readString(path).trim()); + } + + private long parseLong(Path path, String fieldName) throws IOException { + return parseLong(Files.readAllLines(path), fieldName); + } + + private long parseLong(List<String> lines, String fieldName) { + long value = 0; + for (var line : lines) { + String[] fields = fields(line); + if (fields.length < 2 || !fields[0].equals(fieldName)) continue; + for (int i = 1; i < fields.length; i++) { + value += Long.parseLong(fields[i]); + } + break; + } + return value; + } + + private Path netDevPath(int containerPid) { + return fileSystem.getPath("/proc/" + containerPid + "/net/dev"); + } + + private Path cpuacctStatPath(Cgroup cgroup) { + return cgroup.cpuacctPath().resolve("cpuacct.stat"); + } + + private Path cpuUsagePath(Cgroup cgroup) { + return cgroup.cpuacctPath().resolve("cpuacct.usage"); + } + + private Path perCpuUsagePath(Cgroup cgroup) { + return cgroup.cpuacctPath().resolve("cpuacct.usage_percpu"); + } + + private Path cpuStatPath(Cgroup cgroup) { + return cgroup.cpuacctPath().resolve("cpu.stat"); + } + + private Path memoryStatPath(Cgroup cgroup) { + return cgroup.memoryPath().resolve("memory.stat"); + } + + private Path memoryUsagePath(Cgroup cgroup) { + return cgroup.memoryPath().resolve("memory.usage_in_bytes"); + } + + private Path memoryLimitPath(Cgroup cgroup) { + return cgroup.memoryPath().resolve("memory.limit_in_bytes"); + } + + private static Duration ticksToDuration(long ticks) { + // Ideally we would read this from _SC_CLK_TCK, but then we need JNI. However, in practice this is always 100 on x86 Linux + long ticksPerSecond = 100; + return Duration.ofNanos((ticks * Duration.ofSeconds(1).toNanos()) / ticksPerSecond); + } + + private static String[] fields(String s) { + return s.split("\\s+"); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java new file mode 100644 index 00000000000..c3be670c009 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java @@ -0,0 +1,130 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.config.provision.DockerImage; + +import java.util.Map; +import java.util.Objects; + +/** + * A partial container, containing only fields returned by a container list command such as 'podman ps'. + * + * @author mpolden + */ +public class PartialContainer { + + private final ContainerId id; + private final ContainerName name; + private final State state; + private final String imageId; + private final DockerImage image; + private final Map<String, String> labels; + private final int pid; + private final boolean managed; + + public PartialContainer(ContainerId id, ContainerName name, State state, String imageId, + DockerImage image, Map<String, String> labels, int pid, boolean managed) { + this.id = Objects.requireNonNull(id); + this.name = Objects.requireNonNull(name); + this.state = Objects.requireNonNull(state); + this.imageId = Objects.requireNonNull(imageId); + this.image = Objects.requireNonNull(image); + this.labels = Map.copyOf(Objects.requireNonNull(labels)); + this.pid = pid; + this.managed = managed; + } + + /** A unique identifier for this. Typically generated by the container engine */ + public ContainerId id() { + return id; + } + + /** The given name of this */ + public ContainerName name() { + return name; + } + + /** Current state of this */ + public State state() { + return state; + } + + /** A unique identifier for the image in use by this */ + public String imageId() { + return imageId; + } + + /** The image in use by this */ + public DockerImage image() { + return image; + } + + /** The labels set on this */ + public Map<String, String> labels() { + return labels; + } + + /** The PID of this */ + public int pid() { + return pid; + } + + /** Returns whether this container is managed by node-admin */ + public boolean managed() { + return managed; + } + + /** Returns the value of given label key */ + public String label(String key) { + String labelValue = labels.get(key); + if (labelValue == null) throw new IllegalArgumentException("No such label '" + key + "'"); + return labelValue; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PartialContainer that = (PartialContainer) o; + return pid == that.pid && managed == that.managed && id.equals(that.id) && name.equals(that.name) && state == that.state && imageId.equals(that.imageId) && image.equals(that.image) && labels.equals(that.labels); + } + + @Override + public int hashCode() { + return Objects.hash(id, name, state, imageId, image, labels, pid, managed); + } + + /** The state of a container */ + public enum State { + + unknown, + configured, + created, + running, + stopped, + paused, + exited, + removing; + + + public boolean isRunning() { + return this == running; + } + + public static Container.State from(String state) { + switch (state) { + case "unknown": return unknown; + case "configured": return configured; + case "created": return created; + case "running": return running; + case "stopped": return stopped; + case "paused": return paused; + case "exited": return exited; + case "removing": return removing; + } + throw new IllegalArgumentException("Invalid state '" + state + "'"); + } + + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResult.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResult.java deleted file mode 100644 index 066a65eb409..00000000000 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResult.java +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.container; - -import java.util.Objects; - -// TODO: Replace usages of this with CommandResult -public class ProcessResult { - private final int exitStatus; - private final String output; - private final String errors; - - public ProcessResult(int exitStatus, String output, String errors) { - this.exitStatus = exitStatus; - this.output = output; - this.errors = errors; - } - - public boolean isSuccess() { return exitStatus == 0; } - public int getExitStatus() { return exitStatus; } - - public String getOutput() { return output; } - - public String getErrors() { return errors; } - - @Override - public boolean equals(Object o) { - if (!(o instanceof ProcessResult)) return false; - ProcessResult other = (ProcessResult) o; - return Objects.equals(exitStatus, other.exitStatus) - && Objects.equals(output, other.output) - && Objects.equals(errors, other.errors); - } - - @Override - public int hashCode() { - return Objects.hash(exitStatus, output, errors); - } - - @Override - public String toString() { - return "ProcessResult {" - + " exitStatus=" + exitStatus - + " output=" + output - + " errors=" + errors - + " }"; - } - -} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java new file mode 100644 index 00000000000..c65209698f6 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java @@ -0,0 +1,53 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.image; + +import com.yahoo.concurrent.DaemonThreadFactory; +import com.yahoo.config.provision.DockerImage; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.ContainerEngine; +import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentials; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +/** + * Download a container image asynchronously. + * + * @author mpolden + */ +public class ContainerImageDownloader { + + private final ContainerEngine containerEngine; + + private final ExecutorService executorService = Executors.newSingleThreadExecutor( + new DaemonThreadFactory("container-image-downloader")); // Download one image at a time + private final Set<DockerImage> pendingDownloads = Collections.synchronizedSet(new HashSet<>()); + + public ContainerImageDownloader(ContainerEngine containerEngine) { + this.containerEngine = Objects.requireNonNull(containerEngine); + } + + /** + * Download given container image. + * + * @return true if the image download has completed. + */ + public boolean get(TaskContext context, DockerImage image, RegistryCredentials registryCredentials) { + if (pendingDownloads.contains(image)) return false; + if (containerEngine.hasImage(context, image)) return true; + executorService.submit(() -> { + try { + containerEngine.pullImage(context, image, registryCredentials); + } finally { + pendingDownloads.remove(image); + } + }); + pendingDownloads.add(image); + return false; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java new file mode 100644 index 00000000000..bb33dfee663 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java @@ -0,0 +1,201 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.image; + +import com.google.common.base.Strings; +import com.yahoo.collections.Pair; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.ContainerEngine; +import com.yahoo.vespa.hosted.node.admin.container.PartialContainer; + +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * This class removes container images that have not been recently used by any containers. + * + * <p>Definitions: + * <ul> + * <li>Every image has exactly 1 id</li> + * <li>Every image has between 0..n tags, see + * <a href="https://docs.docker.com/engine/reference/commandline/tag/">docker tag</a> for more</li> + * <li>Every image has 0..1 parent ids</li> + * </ul> + * + * <p>Limitations: + * <ol> + * <li>Image that has more than 1 tag cannot be deleted by ID</li> + * <li>Deleting a tag of an image with multiple tags will only remove the tag, the image with the + * remaining tags will remain</li> + * <li>Deleting the last tag of an image will delete the entire image.</li> + * <li>Image cannot be deleted if:</li> + * <ol> + * <li>It has 1 or more children</li> + * <li>A container uses it</li> + * </ol> + * </ol> + * + * @author freva + * @author mpolden + */ +public class ContainerImagePruner { + + private static final Logger LOG = Logger.getLogger(ContainerImagePruner.class.getName()); + + private final Clock clock; + private final ContainerEngine containerEngine; + + private final Map<String, Instant> lastTimeUsedByImageId = new ConcurrentHashMap<>(); + + public ContainerImagePruner(ContainerEngine containerEngine, Clock clock) { + this.containerEngine = Objects.requireNonNull(containerEngine); + this.clock = Objects.requireNonNull(clock); + } + + /** + * Remove unused container images. + * + * Note: This method must be called frequently enough to see all containers to know which images are being used. + * + * @param excludedRefs List of image references (tag or id) to keep, regardless of their status + * @param minAge Minimum age of for image to be removed + * @return true if any image was remove + */ + public boolean removeUnusedImages(TaskContext context, List<String> excludedRefs, Duration minAge) { + List<Image> images = containerEngine.listImages(context); + List<PartialContainer> containers = containerEngine.listContainers(context); + + Map<String, Image> imageByImageId = images.stream().collect(Collectors.toMap(Image::id, Function.identity())); + + // Find all the ancestors for every local image id, this includes the image id itself + Map<String, Set<String>> ancestorsByImageId = images.stream() + .map(Image::id) + .collect(Collectors.toMap( + Function.identity(), + imageId -> { + Set<String> ancestors = new HashSet<>(); + while (!Strings.isNullOrEmpty(imageId)) { + ancestors.add(imageId); + imageId = Optional.of(imageId).map(imageByImageId::get).flatMap(Image::parentId).orElse(null); + } + return ancestors; + } + )); + + // The set of images that we want to keep is: + // 1. The images that were recently used + // 2. The images that were explicitly excluded + // 3. All of the ancestors of from images in 1 & 2 + Set<String> imagesToKeep = Stream + .concat( + updateRecentlyUsedImageIds(images, containers, minAge).stream(), // 1 + referencesToImages(excludedRefs, images).stream()) // 2 + .flatMap(imageId -> ancestorsByImageId.getOrDefault(imageId, Collections.emptySet()).stream()) // 3 + .collect(Collectors.toSet()); + + // Now take all the images we have locally + return imageByImageId.keySet().stream() + + // filter out images we want to keep + .filter(imageId -> !imagesToKeep.contains(imageId)) + + // Sort images in an order is safe to delete (children before parents) + .sorted((o1, o2) -> { + // If image2 is parent of image1, image1 comes before image2 + if (imageIsDescendantOf(imageByImageId, o1, o2)) return -1; + // If image1 is parent of image2, image2 comes before image1 + else if (imageIsDescendantOf(imageByImageId, o2, o1)) return 1; + // Otherwise, sort lexicographically by image name (For testing) + else return o1.compareTo(o2); + }) + + // Map back to image + .map(imageByImageId::get) + + // Delete image, if successful also remove last usage time to prevent re-download being instantly deleted + .peek(image -> { + // Deleting an image by image ID with multiple tags will fail -> delete by tags instead + referencesOf(image).forEach(imageReference -> { + LOG.info("Deleting unused image " + imageReference); + containerEngine.removeImage(context, imageReference); + }); + lastTimeUsedByImageId.remove(image.id()); + }) + .count() > 0; + } + + private Set<String> updateRecentlyUsedImageIds(List<Image> images, List<PartialContainer> containers, Duration minImageAgeToDelete) { + final Instant now = clock.instant(); + + // Add any already downloaded image to the list once + images.forEach(image -> lastTimeUsedByImageId.putIfAbsent(image.id(), now)); + + // Update last used time for all current containers + containers.forEach(container -> lastTimeUsedByImageId.put(container.imageId(), now)); + + // Return list of images that have been used within minImageAgeToDelete + return lastTimeUsedByImageId.entrySet().stream() + .filter(entry -> Duration.between(entry.getValue(), now).minus(minImageAgeToDelete).isNegative()) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + } + + /** + * Map given references (image tags or ids) to images. + * + * This only works if the given tag is actually present locally. This is fine, because if it isn't - we can't delete + * it, so no harm done. + */ + private Set<String> referencesToImages(List<String> references, List<Image> images) { + Map<String, String> imageIdByImageTag = images.stream() + .flatMap(image -> referencesOf(image).stream() + .map(repoTag -> new Pair<>(repoTag, image.id()))) + .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); + + return references.stream() + .map(ref -> imageIdByImageTag.getOrDefault(ref, ref)) + .collect(Collectors.toUnmodifiableSet()); + } + + /** + * @return true if ancestor is a parent or grand-parent or grand-grand-parent, etc. of img + */ + private boolean imageIsDescendantOf(Map<String, Image> imageIdToImage, String img, String ancestor) { + while (imageIdToImage.containsKey(img)) { + img = imageIdToImage.get(img).parentId().orElse(null); + if (img == null) return false; + if (ancestor.equals(img)) return true; + } + return false; + } + + /** + * Returns list of references to given image, preferring image tag(s), if any exist. + * + * If image is untagged, its ID is returned instead. + */ + private static List<String> referencesOf(Image image) { + if (image.names().isEmpty()) { + return List.of(image.id()); + } + return image.names().stream() + .map(tag -> { + if ("<none>:<none>".equals(tag)) return image.id(); + return tag; + }) + .collect(Collectors.toUnmodifiableList()); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java new file mode 100644 index 00000000000..a1b0ae0d809 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java @@ -0,0 +1,58 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.image; + +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +/** + * This represents a container image that exists locally. + * + * @author mpolden + */ +public class Image { + + private final String id; + private final Optional<String> parentId; + private final List<String> names; + + public Image(String id, Optional<String> parentId, List<String> names) { + this.id = Objects.requireNonNull(id); + this.parentId = Objects.requireNonNull(parentId); + this.names = List.copyOf(Objects.requireNonNull(names)); + } + + /** The identifier of this image */ + public String id() { + return id; + } + + /** ID of the parent image of this, if any */ + public Optional<String> parentId() { + return parentId; + } + + /** Names for this image, such as tags or digests */ + public List<String> names() { + return names; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Image image = (Image) o; + return id.equals(image.id) && parentId.equals(image.parentId) && names.equals(image.names); + } + + @Override + public int hashCode() { + return Objects.hash(id, parentId, names); + } + + @Override + public String toString() { + return "image " + id; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java new file mode 100644 index 00000000000..9ba1d7d6e7f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java @@ -0,0 +1,8 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @author mpolden + */ +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.container.image; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java index 15705557447..5fd7d2e4119 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java @@ -227,7 +227,7 @@ public class StorageMaintainer { } private String getDockerImage(NodeAgentContext context, Optional<Container> container) { - return container.map(c -> c.image.asString()) + return container.map(c -> c.image().asString()) .orElse(context.node().currentDockerImage() .map(DockerImage::asString) .orElse("<none>") diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java index 16b7e462ad3..12dcc04b952 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java @@ -1,10 +1,10 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; -import com.yahoo.vespa.hosted.node.admin.container.ProcessResult; import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations; import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; import java.nio.file.Path; import java.nio.file.Paths; @@ -45,12 +45,12 @@ public class CoreCollector { String getGdbPath(NodeAgentContext context) { // TODO: Remove when we do not have any devtoolset-9 installs left String[] command_rhel7_dt9 = {"stat", GDB_PATH_RHEL7_DT9}; - if (docker.executeCommandInContainerAsRoot(context, command_rhel7_dt9).getExitStatus() == 0) { + if (docker.executeCommandInContainerAsRoot(context, command_rhel7_dt9).getExitCode() == 0) { return GDB_PATH_RHEL7_DT9; } String[] command_rhel7_dt10 = {"stat", GDB_PATH_RHEL7_DT10}; - if (docker.executeCommandInContainerAsRoot(context, command_rhel7_dt10).getExitStatus() == 0) { + if (docker.executeCommandInContainerAsRoot(context, command_rhel7_dt10).getExitCode() == 0) { return GDB_PATH_RHEL7_DT10; } @@ -60,12 +60,12 @@ public class CoreCollector { Path readBinPathFallback(NodeAgentContext context, Path coredumpPath) { String command = getGdbPath(context) + " -n -batch -core " + coredumpPath + " | grep \'^Core was generated by\'"; String[] wrappedCommand = {"/bin/sh", "-c", command}; - ProcessResult result = docker.executeCommandInContainerAsRoot(context, wrappedCommand); + CommandResult result = docker.executeCommandInContainerAsRoot(context, wrappedCommand); Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getOutput()); if (! matcher.find()) { throw new ConvergenceException(String.format("Failed to extract binary path from GDB, result: %s, command: %s", - result, Arrays.toString(wrappedCommand))); + asString(result), Arrays.toString(wrappedCommand))); } return Paths.get(matcher.group("path").split(" ")[0]); } @@ -73,9 +73,9 @@ public class CoreCollector { Path readBinPath(NodeAgentContext context, Path coredumpPath) { String[] command = {"file", coredumpPath.toString()}; try { - ProcessResult result = docker.executeCommandInContainerAsRoot(context, command); - if (result.getExitStatus() != 0) { - throw new ConvergenceException("file command failed with " + result); + CommandResult result = docker.executeCommandInContainerAsRoot(context, command); + if (result.getExitCode() != 0) { + throw new ConvergenceException("file command failed with " + asString(result)); } Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getOutput()); @@ -99,9 +99,9 @@ public class CoreCollector { String threads = allThreads ? "thread apply all bt" : "bt"; String[] command = {getGdbPath(context), "-n", "-ex", threads, "-batch", binPath.toString(), coredumpPath.toString()}; - ProcessResult result = docker.executeCommandInContainerAsRoot(context, command); - if (result.getExitStatus() != 0) - throw new ConvergenceException("Failed to read backtrace " + result + ", Command: " + Arrays.toString(command)); + CommandResult result = docker.executeCommandInContainerAsRoot(context, command); + if (result.getExitCode() != 0) + throw new ConvergenceException("Failed to read backtrace " + asString(result) + ", Command: " + Arrays.toString(command)); return List.of(result.getOutput().split("\n")); } @@ -109,9 +109,9 @@ public class CoreCollector { List<String> readJstack(NodeAgentContext context, Path coredumpPath, Path binPath) { String[] command = {"jhsdb", "jstack", "--exe", binPath.toString(), "--core", coredumpPath.toString()}; - ProcessResult result = docker.executeCommandInContainerAsRoot(context, command); - if (result.getExitStatus() != 0) - throw new ConvergenceException("Failed to read jstack " + result + ", Command: " + Arrays.toString(command)); + CommandResult result = docker.executeCommandInContainerAsRoot(context, command); + if (result.getExitCode() != 0) + throw new ConvergenceException("Failed to read jstack " + asString(result) + ", Command: " + Arrays.toString(command)); return List.of(result.getOutput().split("\n")); } @@ -144,4 +144,9 @@ public class CoreCollector { } return data; } + + private String asString(CommandResult result) { + return "exit status " + result.getExitCode() + ", output '" + result.getOutput() + "'"; + } + } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index a41bfac57c0..b9855bf3bfc 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -263,7 +263,7 @@ public class NodeAgentImpl implements NodeAgent { private Optional<String> shouldRestartServices( NodeAgentContext context, Container existingContainer) { NodeSpec node = context.node(); - if (!existingContainer.state.isRunning() || node.state() != NodeState.active) return Optional.empty(); + if (!existingContainer.state().isRunning() || node.state() != NodeState.active) return Optional.empty(); // Restart generation is only optional because it does not exist for unallocated nodes if (currentRestartGeneration.get() < node.wantedRestartGeneration().get()) { @@ -315,12 +315,12 @@ public class NodeAgentImpl implements NodeAgent { reasons.add("Node in state " + nodeState + ", container should no longer be running"); if (context.node().wantedDockerImage().isPresent() && - !context.node().wantedDockerImage().get().equals(existingContainer.image)) { + !context.node().wantedDockerImage().get().equals(existingContainer.image())) { reasons.add("The node is supposed to run a new Docker image: " - + existingContainer.image.asString() + " -> " + context.node().wantedDockerImage().get().asString()); + + existingContainer.image().asString() + " -> " + context.node().wantedDockerImage().get().asString()); } - if (!existingContainer.state.isRunning()) + if (!existingContainer.state().isRunning()) reasons.add("Container no longer running"); if (currentRebootGeneration < context.node().wantedRebootGeneration()) { @@ -329,9 +329,9 @@ public class NodeAgentImpl implements NodeAgent { } ContainerResources wantedContainerResources = getContainerResources(context); - if (!wantedContainerResources.equalsMemory(existingContainer.resources)) { + if (!wantedContainerResources.equalsMemory(existingContainer.resources())) { reasons.add("Container should be running with different memory allocation, wanted: " + - wantedContainerResources.toStringMemory() + ", actual: " + existingContainer.resources.toStringMemory()); + wantedContainerResources.toStringMemory() + ", actual: " + existingContainer.resources().toStringMemory()); } if (containerState == STARTING) @@ -343,7 +343,7 @@ public class NodeAgentImpl implements NodeAgent { private void removeContainer(NodeAgentContext context, Container existingContainer, List<String> reasons, boolean alreadySuspended) { context.log(logger, "Will remove container: " + String.join(", ", reasons)); - if (existingContainer.state.isRunning()) { + if (existingContainer.state().isRunning()) { if (!alreadySuspended) { orchestratorSuspendNode(context); } @@ -373,12 +373,12 @@ public class NodeAgentImpl implements NodeAgent { .orElse(true)) return existingContainer; - if (wantedContainerResources.equalsCpu(existingContainer.resources)) return existingContainer; + if (wantedContainerResources.equalsCpu(existingContainer.resources())) return existingContainer; context.log(logger, "Container should be running with different CPU allocation, wanted: %s, current: %s", - wantedContainerResources.toStringCpu(), existingContainer.resources.toStringCpu()); + wantedContainerResources.toStringCpu(), existingContainer.resources().toStringCpu()); // Only update CPU resources - containerOperations.updateContainer(context, existingContainer.id(), wantedContainerResources.withMemoryBytes(existingContainer.resources.memoryBytes())); + containerOperations.updateContainer(context, existingContainer.id(), wantedContainerResources.withMemoryBytes(existingContainer.resources().memoryBytes())); return containerOperations.getContainer(context).orElseThrow(() -> new ConvergenceException("Did not find container that was just updated")); } @@ -402,7 +402,7 @@ public class NodeAgentImpl implements NodeAgent { private boolean downloadImageIfNeeded(NodeAgentContext context, Optional<Container> container) { NodeSpec node = context.node(); - if (node.wantedDockerImage().equals(container.map(c -> c.image))) return false; + if (node.wantedDockerImage().equals(container.map(c -> c.image()))) return false; RegistryCredentials credentials = registryCredentialsProvider.get(); return node.wantedDockerImage() @@ -480,7 +480,7 @@ public class NodeAgentImpl implements NodeAgent { firstSuccessfulHealthCheckInstant = Optional.of(clock.instant()); Duration timeLeft = Duration.between(clock.instant(), firstSuccessfulHealthCheckInstant.get().plus(warmUpDuration(context))); - if (!container.get().resources.equalsCpu(getContainerResources(context))) + if (!container.get().resources().equalsCpu(getContainerResources(context))) throw new ConvergenceException("Refusing to resume until warm up period ends (" + (timeLeft.isNegative() ? "next tick" : "in " + timeLeft) + ")"); } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngineMock.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngineMock.java new file mode 100644 index 00000000000..1d077449ed6 --- /dev/null +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngineMock.java @@ -0,0 +1,206 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.config.provision.DockerImage; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.image.Image; +import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; + +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CountDownLatch; +import java.util.stream.Collectors; + +/** + * @author mpolden + */ +public class ContainerEngineMock implements ContainerEngine { + + private final Map<ContainerName, Container> containers = new ConcurrentHashMap<>(); + private final Map<String, ImageDownload> images = new ConcurrentHashMap<>(); + private boolean asyncImageDownload = false; + + public ContainerEngineMock asyncImageDownload(boolean enabled) { + this.asyncImageDownload = enabled; + return this; + } + + public ContainerEngineMock completeDownloadOf(DockerImage image) { + String imageId = image.asString(); + ImageDownload download; + while ((download = images.get(imageId)) == null); + download.complete(); + return this; + } + + public ContainerEngineMock setImages(List<Image> images) { + this.images.clear(); + for (var image : images) { + ImageDownload imageDownload = new ImageDownload(image); + imageDownload.complete(); + this.images.put(image.id(), imageDownload); + } + return this; + } + + public ContainerEngineMock addContainers(List<Container> containers) { + for (var container : containers) { + if (this.containers.containsKey(container.name())) { + throw new IllegalArgumentException("Container " + container.name() + " already exists"); + } + this.containers.put(container.name(), container); + } + return this; + } + + public ContainerEngineMock addContainer(Container container) { + return addContainers(List.of(container)); + } + + @Override + public void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources) { + addContainer(createContainer(context, PartialContainer.State.created, containerResources)); + } + + @Override + public void startContainer(NodeAgentContext context) { + Container container = requireContainer(context.containerName(), PartialContainer.State.created); + Container newContainer = createContainer(context, PartialContainer.State.running, container.resources()); + containers.put(newContainer.name(), newContainer); + } + + @Override + public void removeContainer(TaskContext context, PartialContainer container) { + requireContainer(container.name()); + containers.remove(container.name()); + } + + @Override + public void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources) { + Container container = requireContainer(context.containerName()); + containers.put(container.name(), new Container(containerId, container.name(), container.state(), + container.imageId(), container.image(), + container.labels(), container.pid(), + container.conmonPid(), container.hostname(), + containerResources, container.networks(), + container.managed())); + } + + @Override + public Optional<Container> getContainer(NodeAgentContext context) { + return Optional.ofNullable(containers.get(context.containerName())); + } + + @Override + public List<PartialContainer> listContainers(TaskContext context) { + return List.copyOf(containers.values()); + } + + @Override + public String networkInterface(NodeAgentContext context) { + return "eth0"; + } + + @Override + public CommandResult executeAsRoot(NodeAgentContext context, Duration timeout, String... command) { + return new CommandResult(null, 0, ""); + } + + @Override + public CommandResult executeInNetworkNamespace(NodeAgentContext context, String... command) { + return new CommandResult(null, 0, ""); + } + + @Override + public void pullImage(TaskContext context, DockerImage image, RegistryCredentials registryCredentials) { + String imageId = image.asString(); + ImageDownload imageDownload = images.computeIfAbsent(imageId, (ignored) -> new ImageDownload(new Image(imageId, Optional.empty(), List.of(imageId)))); + if (!asyncImageDownload) { + imageDownload.complete(); + } + imageDownload.awaitCompletion(); + } + + @Override + public boolean hasImage(TaskContext context, DockerImage image) { + ImageDownload download = images.get(image.asString()); + return download != null && download.isComplete(); + } + + @Override + public void removeImage(TaskContext context, String id) { + images.remove(id); + } + + @Override + public List<Image> listImages(TaskContext context) { + return images.values().stream() + .filter(ImageDownload::isComplete) + .map(ImageDownload::image) + .collect(Collectors.toUnmodifiableList()); + } + + private Container requireContainer(ContainerName name) { + return requireContainer(name, null); + } + + private Container requireContainer(ContainerName name, PartialContainer.State wantedState) { + Container container = containers.get(name); + if (container == null) throw new IllegalArgumentException("No such container: " + name); + if (wantedState != null && container.state() != wantedState) throw new IllegalArgumentException("Container is " + container.state() + ", wanted " + wantedState); + return container; + } + + public Container createContainer(NodeAgentContext context, PartialContainer.State state, ContainerResources containerResources) { + return new Container(new ContainerId("id-of-" + context.containerName()), + context.containerName(), + state, + "image-id", + context.node().wantedDockerImage().get(), + Map.of(), + 41, + 42, + context.hostname().value(), + containerResources, + List.of(), + true); + } + + private static class ImageDownload { + + private final Image image; + private final CountDownLatch done = new CountDownLatch(1); + + ImageDownload(Image image) { + this.image = Objects.requireNonNull(image); + } + + Image image() { + return image; + } + + boolean isComplete() { + return done.getCount() == 0; + } + + void complete() { + done.countDown(); + } + + void awaitCompletion() { + try { + done.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + } + +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperationsTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperationsTest.java new file mode 100644 index 00000000000..666c5fb31f9 --- /dev/null +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperationsTest.java @@ -0,0 +1,64 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.config.provision.DockerImage; +import com.yahoo.vespa.hosted.node.admin.component.TestTaskContext; +import com.yahoo.vespa.test.file.TestFileSystem; +import org.junit.Test; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * @author mpolden + */ +public class ContainerOperationsTest { + + private final TestTaskContext context = new TestTaskContext(); + private final ContainerEngineMock containerEngine = new ContainerEngineMock(); + private final ContainerOperations containerOperations = new ContainerOperations(containerEngine, TestFileSystem.create()); + + @Test + public void no_managed_containers_running() { + Container c1 = createContainer("c1", true); + Container c2 = createContainer("c2", false); + + containerEngine.addContainer(c1); + assertFalse(containerOperations.noManagedContainersRunning(context)); + + containerEngine.removeContainer(context, c1); + assertTrue(containerOperations.noManagedContainersRunning(context)); + + containerEngine.addContainer(c2); + assertTrue(containerOperations.noManagedContainersRunning(context)); + } + + @Test + public void retain_managed_containers() { + Container c1 = createContainer("c1", true); + Container c2 = createContainer("c2", true); + Container c3 = createContainer("c3", false); + containerEngine.addContainers(List.of(c1, c2, c3)); + + assertEquals(3, containerEngine.listContainers(context).size()); + containerOperations.retainManagedContainers(context, Set.of(c1.name())); + + assertEquals(List.of(c1.name(), c3.name()), containerEngine.listContainers(context).stream() + .map(PartialContainer::name) + .sorted() + .collect(Collectors.toList())); + } + + private Container createContainer(String name, boolean managed) { + return new Container(new ContainerId("id-of-" + name), new ContainerName(name), PartialContainer.State.running, + "image-id", DockerImage.EMPTY, Map.of(), 42, 43, name, + ContainerResources.UNLIMITED, List.of(), managed); + } + +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java new file mode 100644 index 00000000000..0bb7aee8e0a --- /dev/null +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java @@ -0,0 +1,151 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.vespa.test.file.TestFileSystem; +import org.junit.Test; + +import java.io.IOException; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; +import java.util.Optional; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * @author mpolden + */ +public class ContainerStatsCollectorTest { + + private final FileSystem fileSystem = TestFileSystem.create(); + + @Test + public void collect() throws Exception { + ContainerStatsCollector collector = new ContainerStatsCollector(fileSystem); + ContainerId containerId = new ContainerId("id1"); + int containerPid = 42; + assertTrue("No stats found", collector.collect(containerId, containerPid, "eth0").isEmpty()); + + mockMemoryStats(containerId); + mockCpuStats(containerId); + mockNetworkStats(containerPid); + + Optional<ContainerStats> stats = collector.collect(containerId, containerPid, "eth0"); + assertTrue(stats.isPresent()); + assertEquals(new ContainerStats.CpuStats(24, 6049374780000000L, 691675615472L, + 262190000000L, 3L, 1L, 2L), + stats.get().getCpuStats()); + assertEquals(new ContainerStats.MemoryStats(470790144L, 1228017664L, 2147483648L), + stats.get().getMemoryStats()); + assertEquals(Map.of("eth0", new ContainerStats.NetworkStats(22280813L, 4L, 3L, + 19859383L, 6L, 5L)), + stats.get().getNetworks()); + } + + private void mockNetworkStats(int pid) throws IOException { + Path dev = fileSystem.getPath("/proc/" + pid + "/net/dev"); + Files.createDirectories(dev.getParent()); + Files.writeString(dev, "Inter-| Receive | Transmit\n" + + " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n" + + " lo: 36289258 149700 0 0 0 0 0 0 36289258 149700 0 0 0 0 0 0\n" + + " eth0: 22280813 118083 3 4 0 0 0 0 19859383 115415 5 6 0 0 0 0\n"); + } + + private void mockMemoryStats(ContainerId containerId) throws IOException { + Path root = fileSystem.getPath("/sys/fs/cgroup/memory/machine.slice/libpod-" + containerId + ".scope"); + Files.createDirectories(root); + + Files.writeString(root.resolve("memory.limit_in_bytes"), "2147483648\n"); + Files.writeString(root.resolve("memory.usage_in_bytes"), "1228017664\n"); + Files.writeString(root.resolve("memory.stat"), "cache 470790144\n" + + "rss 698699776\n" + + "rss_huge 526385152\n" + + "shmem 0\n" + + "mapped_file 811008\n" + + "dirty 405504\n" + + "writeback 0\n" + + "swap 0\n" + + "pgpgin 6938085\n" + + "pgpgout 6780573\n" + + "pgfault 14343186\n" + + "pgmajfault 0\n" + + "inactive_anon 0\n" + + "active_anon 699289600\n" + + "inactive_file 455516160\n" + + "active_file 13787136\n" + + "unevictable 0\n" + + "hierarchical_memory_limit 2147483648\n" + + "hierarchical_memsw_limit 4294967296\n" + + "total_cache 470790144\n" + + "total_rss 698699776\n" + + "total_rss_huge 526385152\n" + + "total_shmem 0\n" + + "total_mapped_file 811008\n" + + "total_dirty 405504\n" + + "total_writeback 0\n" + + "total_swap 0\n" + + "total_pgpgin 6938085\n" + + "total_pgpgout 6780573\n" + + "total_pgfault 14343186\n" + + "total_pgmajfault 0\n" + + "total_inactive_anon 0\n" + + "total_active_anon 699289600\n" + + "total_inactive_file 455516160\n" + + "total_active_file 13787136\n" + + "total_unevictable 0\n"); + } + + private void mockCpuStats(ContainerId containerId) throws IOException { + Path root = fileSystem.getPath("/sys/fs/cgroup/cpuacct/machine.slice/libpod-" + containerId + ".scope"); + Path proc = fileSystem.getPath("/proc"); + Files.createDirectories(root); + Files.createDirectories(proc); + Files.writeString(root.resolve("cpu.stat"), "nr_periods 1\n" + + "nr_throttled 2\n" + + "throttled_time 3\n"); + Files.writeString(root.resolve("cpuacct.usage_percpu"), "25801608855 22529436415 25293652376 26212081533 " + + "27545883290 25357818592 33464821448 32568003867 " + + "28916742231 31771772292 34418037242 38417072233 " + + "26069101127 24568838237 23683334366 26824607997 " + + "24289870206 22249389818 32683986446 32444831154 " + + "30488394217 26840956322 31633747261 30838696584\n"); + Files.writeString(root.resolve("cpuacct.usage"), "691675615472\n"); + Files.writeString(root.resolve("cpuacct.stat"), "user 40900\n" + + "system 26219\n"); + Files.writeString(proc.resolve("stat"), "cpu 7991366 978222 2346238 565556517 1935450 25514479 615206 0 0 0\n" + + "cpu0 387906 61529 99088 23516506 42258 1063359 29882 0 0 0\n" + + "cpu1 271253 49383 86149 23655234 41703 1061416 31885 0 0 0\n" + + "cpu2 349420 50987 93560 23571695 59437 1051977 24461 0 0 0\n" + + "cpu3 328107 50628 93406 23605135 44378 1048549 30199 0 0 0\n" + + "cpu4 267474 50404 99253 23606041 113094 1038572 26494 0 0 0\n" + + "cpu5 309584 50677 94284 23550372 132616 1033661 29436 0 0 0\n" + + "cpu6 477926 56888 121251 23367023 83121 1074930 28818 0 0 0\n" + + "cpu7 335335 29350 106130 23551107 95606 1066394 26156 0 0 0\n" + + "cpu8 323678 28629 99171 23586501 82183 1064708 25403 0 0 0\n" + + "cpu9 329805 27516 98538 23579458 89235 1061561 25140 0 0 0\n" + + "cpu10 291536 26455 93934 23642345 81282 1049736 25228 0 0 0\n" + + "cpu11 271103 25302 90630 23663641 85711 1048781 24291 0 0 0\n" + + "cpu12 323634 63392 100406 23465340 132684 1089157 28319 0 0 0\n" + + "cpu13 348085 49568 100772 23490388 114190 1079474 20948 0 0 0\n" + + "cpu14 310712 51208 90461 23547980 101601 1071940 26712 0 0 0\n" + + "cpu15 360405 52754 94620 23524878 79851 1062050 26836 0 0 0\n" + + "cpu16 367893 52141 98074 23541314 57500 1058968 25242 0 0 0\n" + + "cpu17 412756 51486 101592 23515056 47653 1044874 27467 0 0 0\n" + + "cpu18 287307 25478 106011 23599505 79848 1089812 23160 0 0 0\n" + + "cpu19 275001 24421 98338 23628694 79675 1084074 22083 0 0 0\n" + + "cpu20 288038 24805 94432 23629908 74735 1078501 21915 0 0 0\n" + + "cpu21 295373 25017 91344 23628585 75282 1071019 22026 0 0 0\n" + + "cpu22 326739 25588 90385 23608217 69186 1068494 21108 0 0 0\n" + + "cpu23 452284 24602 104397 23481583 72612 1052462 21985 0 0 0\n" + + "intr 6645352968 64 0 0 0 1481 0 0 0 1 0 0 0 0 0 0 0 39 0 0 0 0 0 0 37 0 0 0 0 0 0 0 0 4334106 1 6949071 5814662 5415344 6939471 6961483 6358810 5271953 6718644 0 126114 126114 126114 126114 126114 126114 126114 126114 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n" + + "ctxt 2495530303\n" + + "btime 1611928223\n" + + "processes 4839481\n" + + "procs_running 4\n" + + "procs_blocked 0\n" + + "softirq 2202631388 4 20504999 46734 54405637 4330276 0 6951 1664780312 10130 458546345\n"); + } + +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResultTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResultTest.java deleted file mode 100644 index f7b832bd566..00000000000 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResultTest.java +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.container; - -import org.junit.Test; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -public class ProcessResultTest { - @Test - public void testBasicProperties() { - ProcessResult processResult = new ProcessResult(0, "foo", "bar"); - assertEquals(0, processResult.getExitStatus()); - assertEquals("foo", processResult.getOutput()); - assertTrue(processResult.isSuccess()); - } - - @Test - public void testSuccessFails() { - ProcessResult processResult = new ProcessResult(1, "foo", "bar"); - assertFalse(processResult.isSuccess()); - } -} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloaderTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloaderTest.java new file mode 100644 index 00000000000..f50f2b8b053 --- /dev/null +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloaderTest.java @@ -0,0 +1,34 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.image; + +import com.yahoo.config.provision.DockerImage; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.component.TestTaskContext; +import com.yahoo.vespa.hosted.node.admin.container.ContainerEngineMock; +import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentials; +import org.junit.Test; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * @author mpolden + */ +public class ContainerImageDownloaderTest { + + @Test(timeout = 5_000) + public void test_download() { + ContainerEngineMock podman = new ContainerEngineMock().asyncImageDownload(true); + ContainerImageDownloader downloader = new ContainerImageDownloader(podman); + TaskContext context = new TestTaskContext(); + DockerImage image = DockerImage.fromString("registry.example.com/vespa:7.42"); + + assertFalse("Download started", downloader.get(context, image, RegistryCredentials.none)); + assertFalse("Download pending", downloader.get(context, image, RegistryCredentials.none)); + podman.completeDownloadOf(image); + boolean downloadCompleted; + while (!(downloadCompleted = downloader.get(context, image, RegistryCredentials.none))); + assertTrue("Download completed", downloadCompleted); + } + +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePrunerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePrunerTest.java new file mode 100644 index 00000000000..f6d941c4299 --- /dev/null +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePrunerTest.java @@ -0,0 +1,238 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.image; + +import com.yahoo.config.provision.DockerImage; +import com.yahoo.test.ManualClock; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.component.TestTaskContext; +import com.yahoo.vespa.hosted.node.admin.container.Container; +import com.yahoo.vespa.hosted.node.admin.container.ContainerEngineMock; +import com.yahoo.vespa.hosted.node.admin.container.ContainerId; +import com.yahoo.vespa.hosted.node.admin.container.ContainerName; +import com.yahoo.vespa.hosted.node.admin.container.ContainerResources; +import com.yahoo.vespa.hosted.node.admin.container.image.ContainerImagePruner; +import com.yahoo.vespa.hosted.node.admin.container.image.Image; +import org.junit.Test; + +import java.time.Duration; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static org.junit.Assert.assertTrue; + +/** + * @author freva + * @author mpolden + */ +public class ContainerImagePrunerTest { + + private final Tester tester = new Tester(); + + @Test + public void noImagesMeansNoUnusedImages() { + tester.withExistingImages() + .expectDeletedImages(); + } + + @Test + public void singleImageWithoutContainersIsUnused() { + tester.withExistingImages(image("image-1")) + // Even though nothing is using the image, we will keep it for at least 1h + .expectDeletedImagesAfterMinutes(0) + .expectDeletedImagesAfterMinutes(30) + .expectDeletedImagesAfterMinutes(30, "image-1"); + } + + @Test + public void singleImageWithContainerIsUsed() { + tester.withExistingImages(image("image-1")) + .withExistingContainers(container("container-1", "image-1")) + .expectDeletedImages(); + } + + + @Test + public void multipleUnusedImagesAreIdentified() { + tester.withExistingImages(image("image-1"), image("image-2")) + .expectDeletedImages("image-1", "image-2"); + } + + + @Test + public void multipleUnusedLeavesAreIdentified() { + tester.withExistingImages(image("parent-image"), + image("image-1", "parent-image"), + image("image-2", "parent-image")) + .expectDeletedImages("image-1", "image-2", "parent-image"); + } + + + @Test + public void unusedLeafWithUsedSiblingIsIdentified() { + tester.withExistingImages(image("parent-image"), + image("image-1", "parent-image", "latest"), + image("image-2", "parent-image", "1.24")) + .withExistingContainers(container("vespa-node-1", "image-1")) + .expectDeletedImages("1.24"); // Deleting the only tag will delete the image + } + + + @Test + public void unusedImagesWithMultipleTags() { + tester.withExistingImages(image("parent-image"), + image("image-1", "parent-image", "vespa-6", "vespa-6.28", "vespa:latest")) + .expectDeletedImages("vespa-6", "vespa-6.28", "vespa:latest", "parent-image"); + } + + + @Test + public void unusedImagesWithMultipleUntagged() { + tester.withExistingImages(image("image1", null, "<none>:<none>"), + image("image2", null, "<none>:<none>")) + .expectDeletedImages("image1", "image2"); + } + + + @Test + public void taggedImageWithNoContainersIsUnused() { + tester.withExistingImages(image("image-1", null, "vespa-6")) + .expectDeletedImages("vespa-6"); + } + + + @Test + public void unusedImagesWithSimpleImageGc() { + tester.withExistingImages(image("parent-image")) + .expectDeletedImagesAfterMinutes(30) + .withExistingImages(image("parent-image"), + image("image-1", "parent-image")) + .expectDeletedImagesAfterMinutes(0) + .expectDeletedImagesAfterMinutes(30) + // At this point, parent-image has been unused for 1h, but image-1 depends on parent-image and it has + // only been unused for 30m, so we cannot delete parent-image yet. 30 mins later both can be removed + .expectDeletedImagesAfterMinutes(30, "image-1", "parent-image"); + } + + + @Test + public void reDownloadingImageIsNotImmediatelyDeleted() { + tester.withExistingImages(image("image")) + .expectDeletedImages("image") // After 1h we delete image + .expectDeletedImagesAfterMinutes(0) // image is immediately re-downloaded, but is not deleted + .expectDeletedImagesAfterMinutes(10) + .expectDeletedImages("image"); // 1h after re-download it is deleted again + } + + + @Test + public void reDownloadingImageIsNotImmediatelyDeletedWhenDeletingByTag() { + tester.withExistingImages(image("image", null, "image-1", "my-tag")) + .expectDeletedImages("image-1", "my-tag") // After 1h we delete image + .expectDeletedImagesAfterMinutes(0) // image is immediately re-downloaded, but is not deleted + .expectDeletedImagesAfterMinutes(10) + .expectDeletedImages("image-1", "my-tag"); // 1h after re-download it is deleted again + } + + /** Same scenario as in {@link #multipleUnusedImagesAreIdentified()} */ + @Test + public void doesNotDeleteExcludedByIdImages() { + tester.withExistingImages(image("parent-image"), + image("image-1", "parent-image"), + image("image-2", "parent-image")) + // Normally, image-1 and parent-image should also be deleted, but because we exclude image-1 + // we cannot delete parent-image, so only image-2 is deleted + .expectDeletedImages(List.of("image-1"), "image-2"); + } + + /** Same as in {@link #doesNotDeleteExcludedByIdImages()} but with tags */ + @Test + public void doesNotDeleteExcludedByTagImages() { + tester.withExistingImages(image("parent-image", "rhel-6"), + image("image-1", "parent-image", "vespa:6.288.16"), + image("image-2", "parent-image", "vespa:6.289.94")) + .expectDeletedImages(List.of("vespa:6.288.16"), "vespa:6.289.94"); + } + + @Test + public void exludingNotDownloadedImageIsNoop() { + tester.withExistingImages(image("parent-image", "rhel-6"), + image("image-1", "parent-image", "vespa:6.288.16"), + image("image-2", "parent-image", "vespa:6.289.94")) + .expectDeletedImages(List.of("vespa:6.300.1"), "vespa:6.288.16", "vespa:6.289.94", "rhel-6"); + } + + private static Image image(String id) { + return image(id, null); + } + + private static Image image(String id, String parentId, String... tags) { + return new Image(id, Optional.ofNullable(parentId), List.of(tags)); + } + + private static Container container(String name, String imageId) { + return new Container(new ContainerId("id-of-" + name), new ContainerName(name), + Container.State.running, imageId, DockerImage.EMPTY, Map.of(), + 42, 43, name + ".example.com", ContainerResources.UNLIMITED, + List.of(), true); + } + + private static class Tester { + + private final ContainerEngineMock containerEngine = new ContainerEngineMock(); + private final TaskContext context = new TestTaskContext(); + private final ManualClock clock = new ManualClock(); + private final ContainerImagePruner pruner = new ContainerImagePruner(containerEngine, clock); + private final Map<String, Integer> removalCountByImageId = new HashMap<>(); + + private boolean initialized = false; + + private Tester withExistingImages(Image... images) { + containerEngine.setImages(List.of(images)); + return this; + } + + private Tester withExistingContainers(Container... containers) { + containerEngine.addContainers(List.of(containers)); + return this; + } + + private Tester expectDeletedImages(String... imageIds) { + return expectDeletedImagesAfterMinutes(60, imageIds); + } + + private Tester expectDeletedImages(List<String> excludedRefs, String... imageIds) { + return expectDeletedImagesAfterMinutes(60, excludedRefs, imageIds); + } + + private Tester expectDeletedImagesAfterMinutes(int minutesAfter, String... imageIds) { + return expectDeletedImagesAfterMinutes(minutesAfter, Collections.emptyList(), imageIds); + } + + private Tester expectDeletedImagesAfterMinutes(int minutesAfter, List<String> excludedRefs, String... imageIds) { + if (!initialized) { + // Run once with a very long expiry to initialize internal state of existing images + pruner.removeUnusedImages(context, List.of(), Duration.ofDays(999)); + initialized = true; + } + + clock.advance(Duration.ofMinutes(minutesAfter)); + + pruner.removeUnusedImages(context, excludedRefs, Duration.ofHours(1).minusSeconds(1)); + + Arrays.stream(imageIds) + .forEach(imageId -> { + int newValue = removalCountByImageId.getOrDefault(imageId, 0) + 1; + removalCountByImageId.put(imageId, newValue); + + assertTrue("Image " + imageId + " removed", + containerEngine.listImages(context).stream().noneMatch(image -> image.id().equals(imageId))); + }); + return this; + } + } + +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerOperationsMock.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerOperationsMock.java deleted file mode 100644 index a19e031e41f..00000000000 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerOperationsMock.java +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.integration; - -import com.yahoo.config.provision.DockerImage; -import com.yahoo.vespa.hosted.node.admin.container.Container; -import com.yahoo.vespa.hosted.node.admin.container.ContainerId; -import com.yahoo.vespa.hosted.node.admin.container.ContainerName; -import com.yahoo.vespa.hosted.node.admin.container.ContainerResources; -import com.yahoo.vespa.hosted.node.admin.container.ContainerStats; -import com.yahoo.vespa.hosted.node.admin.container.ProcessResult; -import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentials; -import com.yahoo.vespa.hosted.node.admin.component.TaskContext; -import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations; -import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData; -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; -import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; - -import java.time.Duration; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; - -/** - * Mock with some simple logic - * - * @author freva - */ -public class ContainerOperationsMock implements ContainerOperations { - - public static final ContainerId CONTAINER_ID = new ContainerId("af345"); - - private final Map<ContainerName, Container> containersByContainerName = new HashMap<>(); - private final Object monitor = new Object(); - - @Override - public void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources) { - synchronized (monitor) { - containersByContainerName.put(context.containerName(), - new Container(CONTAINER_ID, - context.hostname().value(), - context.node().wantedDockerImage().get(), - containerResources, - context.containerName(), - Container.State.CREATED, - 2)); - } - } - - @Override - public void startContainer(NodeAgentContext context) { - synchronized (monitor) { - Optional<Container> container = getContainer(context); - if (container.isEmpty()) throw new IllegalArgumentException("Cannot start non-existent container " + context.containerName()); - containersByContainerName.put(context.containerName(), new Container(CONTAINER_ID, - container.get().hostname, - container.get().image, - container.get().resources, - container.get().name, - Container.State.RUNNING, - container.get().pid)); - } - } - - @Override - public void removeContainer(NodeAgentContext context, Container container) { - synchronized (monitor) { - containersByContainerName.remove(container.name); - } - } - - @Override - public void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources) { - synchronized (monitor) { - Container container = containersByContainerName.get(context.containerName()); - containersByContainerName.put(context.containerName(), - new Container(container.id(), container.hostname, container.image, - containerResources, container.name, container.state, container.pid)); - } - } - - @Override - public Optional<Container> getContainer(NodeAgentContext context) { - synchronized (monitor) { - return Optional.ofNullable(containersByContainerName.get(context.containerName())); - } - } - - @Override - public boolean pullImageAsyncIfNeeded(TaskContext context, DockerImage dockerImage, RegistryCredentials registryCredentials) { - return false; - } - - @Override - public ProcessResult executeCommandInContainerAsRoot(NodeAgentContext context, String... command) { - return null; - } - - @Override - public ProcessResult executeCommandInContainerAsRoot(NodeAgentContext context, Long timeoutSeconds, String... command) { - return null; - } - - @Override - public CommandResult executeCommandInNetworkNamespace(NodeAgentContext context, String... command) { - return null; - } - - @Override - public String resumeNode(NodeAgentContext context) { - return ""; - } - - @Override - public String suspendNode(NodeAgentContext context) { - return ""; - } - - @Override - public String restartVespa(NodeAgentContext context) { - return ""; - } - - @Override - public String startServices(NodeAgentContext context) { - return ""; - } - - @Override - public String stopServices(NodeAgentContext context) { - return ""; - } - - @Override - public Optional<ContainerStats> getContainerStats(NodeAgentContext context) { - return Optional.empty(); - } - - @Override - public boolean noManagedContainersRunning(TaskContext context) { - return false; - } - - @Override - public boolean retainManagedContainers(TaskContext context, Set<ContainerName> containerNames) { - return false; - } - - @Override - public boolean deleteUnusedContainerImages(TaskContext context, List<DockerImage> excludes, Duration minImageAgeToDelete) { - return false; - } - -} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java index 9153afd8e54..4179f53370b 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java @@ -4,11 +4,13 @@ package com.yahoo.vespa.hosted.node.admin.integration; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.flags.InMemoryFlagSource; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; +import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator; +import com.yahoo.vespa.hosted.node.admin.container.ContainerEngineMock; import com.yahoo.vespa.hosted.node.admin.container.ContainerName; +import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations; import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentials; import com.yahoo.vespa.hosted.node.admin.container.metrics.Metrics; -import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; -import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator; import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer; import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl; import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater; @@ -23,8 +25,6 @@ import org.mockito.InOrder; import org.mockito.Mockito; import java.nio.file.FileSystem; -import java.nio.file.Path; -import java.nio.file.Paths; import java.time.Clock; import java.time.Duration; import java.util.Collections; @@ -46,12 +46,11 @@ public class ContainerTester implements AutoCloseable { private static final Logger log = Logger.getLogger(ContainerTester.class.getName()); private static final Duration INTERVAL = Duration.ofMillis(10); - private static final Path PATH_TO_VESPA_HOME = Paths.get("/opt/vespa"); static final HostName HOST_HOSTNAME = HostName.from("host.test.yahoo.com"); private final Thread loopThread; - final ContainerOperationsMock containerOperations = spy(new ContainerOperationsMock()); + final ContainerOperations containerOperations = spy(new ContainerOperations(new ContainerEngineMock(), TestFileSystem.create())); final NodeRepoMock nodeRepository = spy(new NodeRepoMock()); final Orchestrator orchestrator = mock(Orchestrator.class); final StorageMaintainer storageMaintainer = mock(StorageMaintainer.class); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java index 3c22305d006..cfb9aaa4001 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java @@ -1,10 +1,10 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; -import com.yahoo.vespa.hosted.node.admin.container.ProcessResult; import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImpl; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; import org.junit.Test; import java.nio.file.Path; @@ -12,8 +12,8 @@ import java.nio.file.Paths; import java.util.List; import java.util.Map; -import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.GDB_PATH_RHEL7_DT9; import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.GDB_PATH_RHEL7_DT10; +import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.GDB_PATH_RHEL7_DT9; import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.GDB_PATH_RHEL8; import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.JAVA_HEAP_DUMP_METADATA; import static org.junit.Assert.assertEquals; @@ -91,7 +91,7 @@ public class CoreCollectorTest { coreCollector.readBinPathFallback(context, TEST_CORE_PATH); fail("Expected not to be able to get bin path"); } catch (RuntimeException e) { - assertEquals("Failed to extract binary path from GDB, result: ProcessResult { exitStatus=1 output= errors=Error 123 }, command: " + + assertEquals("Failed to extract binary path from GDB, result: exit status 1, output 'Error 123', command: " + "[/bin/sh, -c, /opt/rh/devtoolset-10/root/bin/gdb -n -batch -core /tmp/core.1234 | grep '^Core was generated by']", e.getMessage()); } } @@ -110,7 +110,7 @@ public class CoreCollectorTest { coreCollector.readBacktrace(context, TEST_CORE_PATH, TEST_BIN_PATH, false); fail("Expected not to be able to read backtrace"); } catch (RuntimeException e) { - assertEquals("Failed to read backtrace ProcessResult { exitStatus=1 output= errors=Failure }, Command: " + + assertEquals("Failed to read backtrace exit status 1, output 'Failure', Command: " + "[" + GDB_PATH_RHEL7_DT9 + ", -n, -ex, bt, -batch, /usr/bin/program, /tmp/core.1234]", e.getMessage()); } } @@ -194,6 +194,6 @@ public class CoreCollectorTest { private void mockExec(NodeAgentContext context, String[] cmd, String output, String error) { when(docker.executeCommandInContainerAsRoot(context, cmd)) - .thenReturn(new ProcessResult(error.isEmpty() ? 0 : 1, output, error)); + .thenReturn(new CommandResult(null, error.isEmpty() ? 0 : 1, error.isEmpty() ? output : error)); } } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index 454374fe3bd..f2b761b614d 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -33,6 +33,7 @@ import org.mockito.InOrder; import java.time.Duration; import java.time.Instant; import java.util.List; +import java.util.Map; import java.util.Optional; import static org.junit.Assert.assertEquals; @@ -762,12 +763,17 @@ public class NodeAgentImplTest { return dockerImage != null ? Optional.of(new Container( containerId, - hostName, + ContainerName.fromHostname(hostName), + isRunning ? Container.State.running : Container.State.exited, + "image-id-1", dockerImage, + Map.of(), + 42, + 43, + hostName, containerResources, - ContainerName.fromHostname(hostName), - isRunning ? Container.State.RUNNING : Container.State.EXITED, - isRunning ? 1 : 0)) : + List.of(), + true)) : Optional.empty(); }).when(containerOperations).getContainer(any()); } |