summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-06-30 11:17:20 +0200
committerMartin Polden <mpolden@mpolden.no>2021-06-30 11:37:10 +0200
commit513f5bd3b709a6c524efee7ed86d17e1d0899efe (patch)
treefe19f85dd842c450de7dcf913e465416bc55ad33 /node-admin
parent732a55c8508ce437e7d7fe5c50285b6a082bff44 (diff)
Container cleanup
This PR puts all container-related code, which is independent of the container engine implementation, in `com.yahoo.vespa.hosted.node.admin.container`. * All operations on containers still pass through `ContainerOperations` * `ContainerOperations` is no longer an interface * `ContainerEngine` is the interface for a container engine/runtime (e.g. Podman) * Some code was migrated from internal repo. This is the reason for the large-ish diff * Fixes problematic circular dependencies
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Cgroup.java111
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java132
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java61
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java9
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java150
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java170
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java130
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResult.java48
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java53
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java201
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java58
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java8
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java33
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java24
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngineMock.java206
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperationsTest.java64
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java151
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResultTest.java24
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloaderTest.java34
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePrunerTest.java238
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerOperationsMock.java155
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java11
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java10
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java14
25 files changed, 1735 insertions, 362 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Cgroup.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Cgroup.java
new file mode 100644
index 00000000000..3efadda183b
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Cgroup.java
@@ -0,0 +1,111 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+//
+package com.yahoo.vespa.hosted.node.admin.container;
+
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
+import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.FileSystem;
+import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
+import java.nio.file.Path;
+import java.util.OptionalInt;
+import java.util.logging.Logger;
+
+/**
+ * Read and write interface to the v1 cgroup of a podman container.
+ * See <a href="https://man7.org/linux/man-pages/man7/cgroups.7.html">cgroups(7)</a> for background.
+ *
+ * @author hakon
+ */
+public class Cgroup {
+
+ private static final Logger logger = Logger.getLogger(Cgroup.class.getName());
+
+ private final FileSystem fileSystem;
+ private final ContainerId containerId;
+
+ public Cgroup(FileSystem fileSystem, ContainerId containerId) {
+ this.fileSystem = fileSystem;
+ this.containerId = containerId;
+ }
+
+ public OptionalInt readCpuQuota() {
+ return readCgroupsCpuInt(cfsQuotaPath());
+ }
+
+ public OptionalInt readCpuPeriod() {
+ return readCgroupsCpuInt(cfsPeriodPath());
+ }
+
+ public OptionalInt readCpuShares() {
+ return readCgroupsCpuInt(sharesPath());
+ }
+
+ public boolean updateCpuQuota(NodeAgentContext context, int cpuQuotaUs) {
+ return writeCgroupsCpuInt(context, cfsQuotaPath(), cpuQuotaUs);
+ }
+
+ public boolean updateCpuPeriod(NodeAgentContext context, int periodUs) {
+ return writeCgroupsCpuInt(context, cfsPeriodPath(), periodUs);
+ }
+
+ public boolean updateCpuShares(NodeAgentContext context, int shares) {
+ return writeCgroupsCpuInt(context, sharesPath(), shares);
+ }
+
+ /** Returns the path to the podman container's scope directory for the cpuacct controller. */
+ public Path cpuacctPath() {
+ return fileSystem.getPath("/sys/fs/cgroup/cpuacct/machine.slice/libpod-" + containerId + ".scope");
+ }
+
+ /** Returns the path to the podman container's scope directory for the cpu controller. */
+ public Path cpuPath() {
+ return fileSystem.getPath("/sys/fs/cgroup/cpu/machine.slice/libpod-" + containerId + ".scope");
+ }
+
+ /** Returns the path to the podman container's scope directory for the memory controller. */
+ public Path memoryPath() {
+ return fileSystem.getPath("/sys/fs/cgroup/memory/machine.slice/libpod-" + containerId + ".scope");
+ }
+
+ private UnixPath cfsQuotaPath() {
+ return new UnixPath(cpuPath().resolve("cpu.cfs_quota_us"));
+ }
+
+ private UnixPath cfsPeriodPath() {
+ return new UnixPath(cpuPath().resolve("cpu.cfs_period_us"));
+ }
+
+ private UnixPath sharesPath() {
+ return new UnixPath(cpuPath().resolve("cpu.shares"));
+ }
+
+ private OptionalInt readCgroupsCpuInt(UnixPath unixPath) {
+ final byte[] currentContentBytes;
+ try {
+ currentContentBytes = Files.readAllBytes(unixPath.toPath());
+ } catch (NoSuchFileException e) {
+ return OptionalInt.empty();
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+
+ String currentContent = new String(currentContentBytes, StandardCharsets.UTF_8).strip();
+ return OptionalInt.of(Integer.parseInt(currentContent));
+ }
+
+ private boolean writeCgroupsCpuInt(NodeAgentContext context, UnixPath unixPath, int value) {
+ int currentValue = readCgroupsCpuInt(unixPath).orElseThrow();
+ if (currentValue == value) {
+ return false;
+ }
+
+ context.recordSystemModification(logger, "Updating " + unixPath + " from " + currentValue + " to " + value);
+ unixPath.writeUtf8File(Integer.toString(value));
+ return true;
+ }
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java
index 7aeb43e44f4..24b77e52a02 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java
@@ -1,80 +1,100 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.container;
import com.yahoo.config.provision.DockerImage;
+import java.util.List;
+import java.util.Map;
import java.util.Objects;
/**
- * @author stiankri
+ * A Podman container.
+ *
+ * @author mpolden
*/
-public class Container {
- private final ContainerId id;
- public final String hostname;
- public final DockerImage image;
- public final ContainerResources resources;
- public final ContainerName name;
- public final State state;
- public final int pid;
-
- public Container(
- final ContainerId id,
- final String hostname,
- final DockerImage image,
- final ContainerResources resources,
- final ContainerName containerName,
- final State state,
- final int pid) {
- this.id = id;
- this.hostname = hostname;
- this.image = image;
- this.resources = resources;
- this.name = containerName;
- this.state = state;
- this.pid = pid;
+public class Container extends PartialContainer {
+
+ private final String hostname;
+ private final ContainerResources resources;
+ private final int conmonPid;
+ private final List<Network> networks;
+
+ public Container(ContainerId id, ContainerName name, State state, String imageId, DockerImage image,
+ Map<String, String> labels, int pid, int conmonPid, String hostname,
+ ContainerResources resources, List<Network> networks, boolean managed) {
+ super(id, name, state, imageId, image, labels, pid, managed);
+ this.hostname = Objects.requireNonNull(hostname);
+ this.resources = Objects.requireNonNull(resources);
+ this.conmonPid = conmonPid;
+ this.networks = List.copyOf(Objects.requireNonNull(networks));
}
- public ContainerId id() {
- return id;
+ /** The hostname of this, if any */
+ public String hostname() {
+ return hostname;
}
- @Override
- public boolean equals(final Object obj) {
- if (!(obj instanceof Container)) {
- return false;
- }
- final Container other = (Container) obj;
- return Objects.equals(id, other.id)
- && Objects.equals(hostname, other.hostname)
- && Objects.equals(image, other.image)
- && Objects.equals(resources, other.resources)
- && Objects.equals(name, other.name)
- && Objects.equals(pid, other.pid);
+ /** Resource limits for this*/
+ public ContainerResources resources() {
+ return resources;
+ }
+
+ /** Pid of the conmon process for this container */
+ public int conmonPid() {
+ return conmonPid;
+ }
+
+ /** The networks used by this */
+ public List<Network> networks() {
+ return networks;
}
@Override
- public int hashCode() {
- return Objects.hash(hostname, image, resources, name, pid);
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ if (!super.equals(o)) return false;
+ Container that = (Container) o;
+ return conmonPid == that.conmonPid && hostname.equals(that.hostname) && resources.equals(that.resources) && networks.equals(that.networks);
}
@Override
- public String toString() {
- return "Container {"
- + " id=" + id
- + " hostname=" + hostname
- + " image=" + image
- + " resources=" + resources
- + " name=" + name
- + " state=" + state
- + " pid=" + pid
- + "}";
+ public int hashCode() {
+ return Objects.hash(super.hashCode(), hostname, resources, conmonPid, networks);
}
- public enum State {
- CREATED, RESTARTING, RUNNING, REMOVING, PAUSED, EXITED, DEAD, UNKNOWN, CONFIGURED, STOPPED;
+ /** The network of a container */
+ public static class Network {
+
+ private final String name;
+ private final String ipv4Address;
- public boolean isRunning() {
- return this == RUNNING;
+ public Network(String name, String ipv4Address) {
+ this.name = Objects.requireNonNull(name);
+ this.ipv4Address = Objects.requireNonNull(ipv4Address);
}
+
+ public String name() {
+ return name;
+ }
+
+ public String ipv4Address() {
+ return ipv4Address;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ Network network = (Network) o;
+ return name.equals(network.name) && ipv4Address.equals(network.ipv4Address);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(name, ipv4Address);
+ }
+
}
+
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java
new file mode 100644
index 00000000000..58ff1af4681
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java
@@ -0,0 +1,61 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container;
+
+import com.yahoo.config.provision.DockerImage;
+import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
+import com.yahoo.vespa.hosted.node.admin.container.image.Image;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
+import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * Interface for a container engine, such as Docker or Podman.
+ *
+ * @author mpolden
+ */
+public interface ContainerEngine {
+
+ /** Create a new container */
+ void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources);
+
+ /** Start a created container */
+ void startContainer(NodeAgentContext context);
+
+ /** Update an existing container */
+ void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources);
+
+ /** Remove given container. The container will be stopped if necessary */
+ void removeContainer(TaskContext context, PartialContainer container);
+
+ /** Get container for given context */
+ Optional<Container> getContainer(NodeAgentContext context);
+
+ /** List containers managed by this */
+ List<PartialContainer> listContainers(TaskContext context);
+
+ /** Returns the network interface used by container in given context */
+ String networkInterface(NodeAgentContext context);
+
+ /** Executes a command in inside container as root user, throws on non-zero exit code */
+ CommandResult executeAsRoot(NodeAgentContext context, Duration timeout, String... command);
+
+ /** Executes a command in inside containers network namespace, throws on non-zero exit code */
+ CommandResult executeInNetworkNamespace(NodeAgentContext context, String... command);
+
+ /** Download giving image */
+ void pullImage(TaskContext context, DockerImage image, RegistryCredentials registryCredentials);
+
+ /** Returns whether given image is already downloaded */
+ boolean hasImage(TaskContext context, DockerImage image);
+
+ /** Remove given image */
+ void removeImage(TaskContext context, String id);
+
+ /** Returns images available in this */
+ List<Image> listImages(TaskContext context);
+
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java
index 49dacc44335..a81404902bc 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java
@@ -9,7 +9,8 @@ import java.util.regex.Pattern;
*
* @author bakksjo
*/
-public class ContainerName {
+public class ContainerName implements Comparable<ContainerName> {
+
private static final Pattern LEGAL_CONTAINER_NAME_PATTERN = Pattern.compile("^[a-zA-Z0-9-]+$");
private final String name;
@@ -51,4 +52,10 @@ public class ContainerName {
+ " name=" + name
+ " }";
}
+
+ @Override
+ public int compareTo(ContainerName o) {
+ return name.compareTo(o.name);
+ }
+
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java
index e48927279a4..444e92e793f 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java
@@ -3,69 +3,147 @@ package com.yahoo.vespa.hosted.node.admin.container;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
+import com.yahoo.vespa.hosted.node.admin.container.image.ContainerImageDownloader;
+import com.yahoo.vespa.hosted.node.admin.container.image.ContainerImagePruner;
import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
+import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandLine;
import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult;
+import java.nio.file.FileSystem;
+import java.time.Clock;
import java.time.Duration;
import java.util.List;
+import java.util.Objects;
import java.util.Optional;
import java.util.Set;
+import java.util.stream.Collectors;
/**
+ * High-level interface for container operations. Code managing containers should use this and not
+ * {@link ContainerEngine} directly.
+ *
* @author hakonhall
+ * @author mpolden
*/
-// TODO(mpolden): Clean up this interface when ContainerOperationsImpl, DockerEngine and friends can be removed
-public interface ContainerOperations {
-
- void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources);
-
- void startContainer(NodeAgentContext context);
-
- void removeContainer(NodeAgentContext context, Container container);
-
- void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources);
-
- Optional<Container> getContainer(NodeAgentContext context);
-
- boolean pullImageAsyncIfNeeded(TaskContext context, DockerImage dockerImage, RegistryCredentials registryCredentials);
-
- ProcessResult executeCommandInContainerAsRoot(NodeAgentContext context, String... command);
-
- ProcessResult executeCommandInContainerAsRoot(NodeAgentContext context, Long timeoutSeconds, String... command);
+public class ContainerOperations {
+
+ private final ContainerEngine containerEngine;
+ private final ContainerImageDownloader imageDownloader;
+ private final ContainerImagePruner imagePruner;
+ private final ContainerStatsCollector containerStatsCollector;
+
+ public ContainerOperations(ContainerEngine containerEngine, FileSystem fileSystem) {
+ this.containerEngine = Objects.requireNonNull(containerEngine);
+ this.imageDownloader = new ContainerImageDownloader(containerEngine);
+ this.imagePruner = new ContainerImagePruner(containerEngine, Clock.systemUTC());
+ this.containerStatsCollector = new ContainerStatsCollector(Objects.requireNonNull(fileSystem));
+ }
+
+ public void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources) {
+ containerEngine.createContainer(context, containerData, containerResources);
+ }
+
+ public void startContainer(NodeAgentContext context) {
+ containerEngine.startContainer(context);
+ }
+
+ public void removeContainer(NodeAgentContext context, Container container) {
+ containerEngine.removeContainer(context, container);
+ }
+
+ public void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources) {
+ containerEngine.updateContainer(context, containerId, containerResources);
+ }
+
+ public Optional<Container> getContainer(NodeAgentContext context) {
+ return containerEngine.getContainer(context);
+ }
+
+ /** Pull image asynchronously. Returns true if image is still downloading and false if download is complete */
+ public boolean pullImageAsyncIfNeeded(TaskContext context, DockerImage dockerImage, RegistryCredentials registryCredentials) {
+ return !imageDownloader.get(context, dockerImage, registryCredentials);
+ }
+
+ /** Executes a command inside container identified by given context. Does NOT throw on non-zero exit code */
+ public CommandResult executeCommandInContainerAsRoot(NodeAgentContext context, String... command) {
+ return executeCommandInContainerAsRoot(context, CommandLine.DEFAULT_TIMEOUT.toSeconds(), command);
+ }
+
+ /** Executes a command inside container identified by given context. Does NOT throw on non-zero exit code */
+ public CommandResult executeCommandInContainerAsRoot(NodeAgentContext context, Long timeoutSeconds, String... command) {
+ return containerEngine.executeAsRoot(context, Duration.ofSeconds(timeoutSeconds), command);
+ }
/** Executes a command in inside containers network namespace, throws on non-zero exit code */
- CommandResult executeCommandInNetworkNamespace(NodeAgentContext context, String... command);
+ public CommandResult executeCommandInNetworkNamespace(NodeAgentContext context, String... command) {
+ return containerEngine.executeInNetworkNamespace(context, command);
+ }
- /** Resume node. Resuming a node means that it is ready to take on traffic.
- * @return*/
- String resumeNode(NodeAgentContext context);
+ /** Resume node. Resuming a node means that it is ready to receive traffic */
+ public String resumeNode(NodeAgentContext context) {
+ return executeNodeCtlInContainer(context, "resume");
+ }
/**
* Suspend node and return output. Suspending a node means the node should be taken temporarly offline,
* such that maintenance of the node can be done (upgrading, rebooting, etc).
*/
- String suspendNode(NodeAgentContext context);
-
- String restartVespa(NodeAgentContext context);
-
- String startServices(NodeAgentContext context);
-
- String stopServices(NodeAgentContext context);
-
- Optional<ContainerStats> getContainerStats(NodeAgentContext context);
-
- boolean noManagedContainersRunning(TaskContext context);
+ public String suspendNode(NodeAgentContext context) {
+ return executeNodeCtlInContainer(context, "suspend");
+ }
+
+ /** Restart Vespa inside container. Same as running suspend, stop, start and resume */
+ public String restartVespa(NodeAgentContext context) {
+ return executeNodeCtlInContainer(context, "restart-vespa");
+ }
+
+ /** Start Vespa inside container */
+ public String startServices(NodeAgentContext context) {
+ return executeNodeCtlInContainer(context, "start");
+ }
+
+ /** Stop Vespa inside container */
+ public String stopServices(NodeAgentContext context) {
+ return executeNodeCtlInContainer(context, "stop");
+ }
+
+ /** Get container statistics */
+ public Optional<ContainerStats> getContainerStats(NodeAgentContext context) {
+ String iface = containerEngine.networkInterface(context);
+ return getContainer(context).flatMap(container -> containerStatsCollector.collect(container.id(), container.pid(), iface));
+ }
+
+ /** Returns true if no containers managed by node-admin are running */
+ public boolean noManagedContainersRunning(TaskContext context) {
+ return containerEngine.listContainers(context).stream()
+ .filter(c -> c.managed())
+ .noneMatch(container -> container.state() == Container.State.running);
+ }
/**
- * Stops and removes all managed containers except the ones given in {@code containerNames}.
+ * Stop and remove all managed containers except the given ones
*
* @return true if any containers were removed
*/
- boolean retainManagedContainers(TaskContext context, Set<ContainerName> containerNames);
+ public boolean retainManagedContainers(TaskContext context, Set<ContainerName> containerNames) {
+ return containerEngine.listContainers(context).stream()
+ .filter(c -> c.managed())
+ .filter(container -> !containerNames.contains(container.name()))
+ .peek(container -> containerEngine.removeContainer(context, container))
+ .count() > 0;
+ }
/** Deletes the local images that are currently not in use by any container and not recently used. */
- boolean deleteUnusedContainerImages(TaskContext context, List<DockerImage> excludes, Duration minImageAgeToDelete);
+ public boolean deleteUnusedContainerImages(TaskContext context, List<DockerImage> excludes, Duration minImageAgeToDelete) {
+ List<String> excludedRefs = excludes.stream().map(DockerImage::asString).collect(Collectors.toList());
+ return imagePruner.removeUnusedImages(context, excludedRefs, minImageAgeToDelete);
+ }
+
+ private String executeNodeCtlInContainer(NodeAgentContext context, String program) {
+ String[] command = new String[] {context.pathInNodeUnderVespaHome("bin/vespa-nodectl").toString(), program};
+ return executeCommandInContainerAsRoot(context, command).getOutput();
+ }
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java
new file mode 100644
index 00000000000..91466efd43e
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java
@@ -0,0 +1,170 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.file.FileSystem;
+import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
+import java.nio.file.Path;
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * Collects CPU, memory and network statistics for a container.
+ *
+ * Uses same approach as runc: https://github.com/opencontainers/runc/tree/master/libcontainer/cgroups/fs
+ *
+ * @author mpolden
+ */
+class ContainerStatsCollector {
+
+ private final FileSystem fileSystem;
+
+ public ContainerStatsCollector(FileSystem fileSystem) {
+ this.fileSystem = Objects.requireNonNull(fileSystem);
+ }
+
+ /** Collect statistics for given container ID and PID */
+ public Optional<ContainerStats> collect(ContainerId containerId, int pid, String iface) {
+ Cgroup cgroup = new Cgroup(fileSystem, containerId);
+ try {
+ ContainerStats.CpuStats cpuStats = collectCpuStats(cgroup);
+ ContainerStats.MemoryStats memoryStats = collectMemoryStats(cgroup);
+ Map<String, ContainerStats.NetworkStats> networkStats = Map.of(iface, collectNetworkStats(iface, pid));
+ return Optional.of(new ContainerStats(networkStats, memoryStats, cpuStats));
+ } catch (NoSuchFileException ignored) {
+ return Optional.empty(); // Container disappeared while we collected stats
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ private ContainerStats.CpuStats collectCpuStats(Cgroup cgroup) throws IOException {
+ List<String> cpuStatLines = Files.readAllLines(cpuStatPath(cgroup));
+ long throttledActivePeriods = parseLong(cpuStatLines, "nr_periods");
+ long throttledPeriods = parseLong(cpuStatLines, "nr_throttled");
+ long throttledTime = parseLong(cpuStatLines, "throttled_time");
+ return new ContainerStats.CpuStats(cpuCount(cgroup),
+ systemCpuUsage().toNanos(),
+ containerCpuUsage(cgroup).toNanos(),
+ containerCpuUsageSystem(cgroup).toNanos(),
+ throttledTime,
+ throttledActivePeriods,
+ throttledPeriods);
+ }
+
+ private ContainerStats.MemoryStats collectMemoryStats(Cgroup cgroup) throws IOException {
+ long memoryLimitInBytes = parseLong(memoryLimitPath(cgroup));
+ long memoryUsageInBytes = parseLong(memoryUsagePath(cgroup));
+ long cachedInBytes = parseLong(memoryStatPath(cgroup), "cache");
+ return new ContainerStats.MemoryStats(cachedInBytes, memoryUsageInBytes, memoryLimitInBytes);
+ }
+
+ private ContainerStats.NetworkStats collectNetworkStats(String iface, int containerPid) throws IOException {
+ for (var line : Files.readAllLines(netDevPath(containerPid))) {
+ String[] fields = fields(line.trim());
+ if (fields.length < 17 || !fields[0].equals(iface + ":")) continue;
+
+ long rxBytes = Long.parseLong(fields[1]);
+ long rxErrors = Long.parseLong(fields[3]);
+ long rxDropped = Long.parseLong(fields[4]);
+
+ long txBytes = Long.parseLong(fields[9]);
+ long txErrors = Long.parseLong(fields[11]);
+ long txDropped = Long.parseLong(fields[12]);
+
+ return new ContainerStats.NetworkStats(rxBytes, rxDropped, rxErrors, txBytes, txDropped, txErrors);
+ }
+ throw new IllegalArgumentException("No statistics found for interface " + iface);
+ }
+
+ /** Number of CPUs seen by given container */
+ private int cpuCount(Cgroup cgroup) throws IOException {
+ return fields(Files.readString(perCpuUsagePath(cgroup))).length;
+ }
+
+ /** Returns total CPU time spent executing all the processes on this host */
+ private Duration systemCpuUsage() throws IOException {
+ long ticks = parseLong(fileSystem.getPath("/proc/stat"), "cpu");
+ return ticksToDuration(ticks);
+ }
+
+ /** Returns total CPU time spent running all processes inside given container */
+ private Duration containerCpuUsage(Cgroup cgroup) throws IOException {
+ return Duration.ofNanos(parseLong(cpuUsagePath(cgroup)));
+ }
+
+ /** Returns total CPU time spent in kernel/system mode while executing processes inside given container */
+ private Duration containerCpuUsageSystem(Cgroup cgroup) throws IOException {
+ long ticks = parseLong(cpuacctStatPath(cgroup), "system");
+ return ticksToDuration(ticks);
+ }
+
+ private long parseLong(Path path) throws IOException {
+ return Long.parseLong(Files.readString(path).trim());
+ }
+
+ private long parseLong(Path path, String fieldName) throws IOException {
+ return parseLong(Files.readAllLines(path), fieldName);
+ }
+
+ private long parseLong(List<String> lines, String fieldName) {
+ long value = 0;
+ for (var line : lines) {
+ String[] fields = fields(line);
+ if (fields.length < 2 || !fields[0].equals(fieldName)) continue;
+ for (int i = 1; i < fields.length; i++) {
+ value += Long.parseLong(fields[i]);
+ }
+ break;
+ }
+ return value;
+ }
+
+ private Path netDevPath(int containerPid) {
+ return fileSystem.getPath("/proc/" + containerPid + "/net/dev");
+ }
+
+ private Path cpuacctStatPath(Cgroup cgroup) {
+ return cgroup.cpuacctPath().resolve("cpuacct.stat");
+ }
+
+ private Path cpuUsagePath(Cgroup cgroup) {
+ return cgroup.cpuacctPath().resolve("cpuacct.usage");
+ }
+
+ private Path perCpuUsagePath(Cgroup cgroup) {
+ return cgroup.cpuacctPath().resolve("cpuacct.usage_percpu");
+ }
+
+ private Path cpuStatPath(Cgroup cgroup) {
+ return cgroup.cpuacctPath().resolve("cpu.stat");
+ }
+
+ private Path memoryStatPath(Cgroup cgroup) {
+ return cgroup.memoryPath().resolve("memory.stat");
+ }
+
+ private Path memoryUsagePath(Cgroup cgroup) {
+ return cgroup.memoryPath().resolve("memory.usage_in_bytes");
+ }
+
+ private Path memoryLimitPath(Cgroup cgroup) {
+ return cgroup.memoryPath().resolve("memory.limit_in_bytes");
+ }
+
+ private static Duration ticksToDuration(long ticks) {
+ // Ideally we would read this from _SC_CLK_TCK, but then we need JNI. However, in practice this is always 100 on x86 Linux
+ long ticksPerSecond = 100;
+ return Duration.ofNanos((ticks * Duration.ofSeconds(1).toNanos()) / ticksPerSecond);
+ }
+
+ private static String[] fields(String s) {
+ return s.split("\\s+");
+ }
+
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java
new file mode 100644
index 00000000000..c3be670c009
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java
@@ -0,0 +1,130 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container;
+
+import com.yahoo.config.provision.DockerImage;
+
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * A partial container, containing only fields returned by a container list command such as 'podman ps'.
+ *
+ * @author mpolden
+ */
+public class PartialContainer {
+
+ private final ContainerId id;
+ private final ContainerName name;
+ private final State state;
+ private final String imageId;
+ private final DockerImage image;
+ private final Map<String, String> labels;
+ private final int pid;
+ private final boolean managed;
+
+ public PartialContainer(ContainerId id, ContainerName name, State state, String imageId,
+ DockerImage image, Map<String, String> labels, int pid, boolean managed) {
+ this.id = Objects.requireNonNull(id);
+ this.name = Objects.requireNonNull(name);
+ this.state = Objects.requireNonNull(state);
+ this.imageId = Objects.requireNonNull(imageId);
+ this.image = Objects.requireNonNull(image);
+ this.labels = Map.copyOf(Objects.requireNonNull(labels));
+ this.pid = pid;
+ this.managed = managed;
+ }
+
+ /** A unique identifier for this. Typically generated by the container engine */
+ public ContainerId id() {
+ return id;
+ }
+
+ /** The given name of this */
+ public ContainerName name() {
+ return name;
+ }
+
+ /** Current state of this */
+ public State state() {
+ return state;
+ }
+
+ /** A unique identifier for the image in use by this */
+ public String imageId() {
+ return imageId;
+ }
+
+ /** The image in use by this */
+ public DockerImage image() {
+ return image;
+ }
+
+ /** The labels set on this */
+ public Map<String, String> labels() {
+ return labels;
+ }
+
+ /** The PID of this */
+ public int pid() {
+ return pid;
+ }
+
+ /** Returns whether this container is managed by node-admin */
+ public boolean managed() {
+ return managed;
+ }
+
+ /** Returns the value of given label key */
+ public String label(String key) {
+ String labelValue = labels.get(key);
+ if (labelValue == null) throw new IllegalArgumentException("No such label '" + key + "'");
+ return labelValue;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ PartialContainer that = (PartialContainer) o;
+ return pid == that.pid && managed == that.managed && id.equals(that.id) && name.equals(that.name) && state == that.state && imageId.equals(that.imageId) && image.equals(that.image) && labels.equals(that.labels);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(id, name, state, imageId, image, labels, pid, managed);
+ }
+
+ /** The state of a container */
+ public enum State {
+
+ unknown,
+ configured,
+ created,
+ running,
+ stopped,
+ paused,
+ exited,
+ removing;
+
+
+ public boolean isRunning() {
+ return this == running;
+ }
+
+ public static Container.State from(String state) {
+ switch (state) {
+ case "unknown": return unknown;
+ case "configured": return configured;
+ case "created": return created;
+ case "running": return running;
+ case "stopped": return stopped;
+ case "paused": return paused;
+ case "exited": return exited;
+ case "removing": return removing;
+ }
+ throw new IllegalArgumentException("Invalid state '" + state + "'");
+ }
+
+ }
+
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResult.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResult.java
deleted file mode 100644
index 066a65eb409..00000000000
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResult.java
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.node.admin.container;
-
-import java.util.Objects;
-
-// TODO: Replace usages of this with CommandResult
-public class ProcessResult {
- private final int exitStatus;
- private final String output;
- private final String errors;
-
- public ProcessResult(int exitStatus, String output, String errors) {
- this.exitStatus = exitStatus;
- this.output = output;
- this.errors = errors;
- }
-
- public boolean isSuccess() { return exitStatus == 0; }
- public int getExitStatus() { return exitStatus; }
-
- public String getOutput() { return output; }
-
- public String getErrors() { return errors; }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof ProcessResult)) return false;
- ProcessResult other = (ProcessResult) o;
- return Objects.equals(exitStatus, other.exitStatus)
- && Objects.equals(output, other.output)
- && Objects.equals(errors, other.errors);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(exitStatus, output, errors);
- }
-
- @Override
- public String toString() {
- return "ProcessResult {"
- + " exitStatus=" + exitStatus
- + " output=" + output
- + " errors=" + errors
- + " }";
- }
-
-}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java
new file mode 100644
index 00000000000..c65209698f6
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java
@@ -0,0 +1,53 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container.image;
+
+import com.yahoo.concurrent.DaemonThreadFactory;
+import com.yahoo.config.provision.DockerImage;
+import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerEngine;
+import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentials;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+/**
+ * Download a container image asynchronously.
+ *
+ * @author mpolden
+ */
+public class ContainerImageDownloader {
+
+ private final ContainerEngine containerEngine;
+
+ private final ExecutorService executorService = Executors.newSingleThreadExecutor(
+ new DaemonThreadFactory("container-image-downloader")); // Download one image at a time
+ private final Set<DockerImage> pendingDownloads = Collections.synchronizedSet(new HashSet<>());
+
+ public ContainerImageDownloader(ContainerEngine containerEngine) {
+ this.containerEngine = Objects.requireNonNull(containerEngine);
+ }
+
+ /**
+ * Download given container image.
+ *
+ * @return true if the image download has completed.
+ */
+ public boolean get(TaskContext context, DockerImage image, RegistryCredentials registryCredentials) {
+ if (pendingDownloads.contains(image)) return false;
+ if (containerEngine.hasImage(context, image)) return true;
+ executorService.submit(() -> {
+ try {
+ containerEngine.pullImage(context, image, registryCredentials);
+ } finally {
+ pendingDownloads.remove(image);
+ }
+ });
+ pendingDownloads.add(image);
+ return false;
+ }
+
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java
new file mode 100644
index 00000000000..bb33dfee663
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java
@@ -0,0 +1,201 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container.image;
+
+import com.google.common.base.Strings;
+import com.yahoo.collections.Pair;
+import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerEngine;
+import com.yahoo.vespa.hosted.node.admin.container.PartialContainer;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.Function;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * This class removes container images that have not been recently used by any containers.
+ *
+ * <p>Definitions:
+ * <ul>
+ * <li>Every image has exactly 1 id</li>
+ * <li>Every image has between 0..n tags, see
+ * <a href="https://docs.docker.com/engine/reference/commandline/tag/">docker tag</a> for more</li>
+ * <li>Every image has 0..1 parent ids</li>
+ * </ul>
+ *
+ * <p>Limitations:
+ * <ol>
+ * <li>Image that has more than 1 tag cannot be deleted by ID</li>
+ * <li>Deleting a tag of an image with multiple tags will only remove the tag, the image with the
+ * remaining tags will remain</li>
+ * <li>Deleting the last tag of an image will delete the entire image.</li>
+ * <li>Image cannot be deleted if:</li>
+ * <ol>
+ * <li>It has 1 or more children</li>
+ * <li>A container uses it</li>
+ * </ol>
+ * </ol>
+ *
+ * @author freva
+ * @author mpolden
+ */
+public class ContainerImagePruner {
+
+ private static final Logger LOG = Logger.getLogger(ContainerImagePruner.class.getName());
+
+ private final Clock clock;
+ private final ContainerEngine containerEngine;
+
+ private final Map<String, Instant> lastTimeUsedByImageId = new ConcurrentHashMap<>();
+
+ public ContainerImagePruner(ContainerEngine containerEngine, Clock clock) {
+ this.containerEngine = Objects.requireNonNull(containerEngine);
+ this.clock = Objects.requireNonNull(clock);
+ }
+
+ /**
+ * Remove unused container images.
+ *
+ * Note: This method must be called frequently enough to see all containers to know which images are being used.
+ *
+ * @param excludedRefs List of image references (tag or id) to keep, regardless of their status
+ * @param minAge Minimum age of for image to be removed
+ * @return true if any image was remove
+ */
+ public boolean removeUnusedImages(TaskContext context, List<String> excludedRefs, Duration minAge) {
+ List<Image> images = containerEngine.listImages(context);
+ List<PartialContainer> containers = containerEngine.listContainers(context);
+
+ Map<String, Image> imageByImageId = images.stream().collect(Collectors.toMap(Image::id, Function.identity()));
+
+ // Find all the ancestors for every local image id, this includes the image id itself
+ Map<String, Set<String>> ancestorsByImageId = images.stream()
+ .map(Image::id)
+ .collect(Collectors.toMap(
+ Function.identity(),
+ imageId -> {
+ Set<String> ancestors = new HashSet<>();
+ while (!Strings.isNullOrEmpty(imageId)) {
+ ancestors.add(imageId);
+ imageId = Optional.of(imageId).map(imageByImageId::get).flatMap(Image::parentId).orElse(null);
+ }
+ return ancestors;
+ }
+ ));
+
+ // The set of images that we want to keep is:
+ // 1. The images that were recently used
+ // 2. The images that were explicitly excluded
+ // 3. All of the ancestors of from images in 1 & 2
+ Set<String> imagesToKeep = Stream
+ .concat(
+ updateRecentlyUsedImageIds(images, containers, minAge).stream(), // 1
+ referencesToImages(excludedRefs, images).stream()) // 2
+ .flatMap(imageId -> ancestorsByImageId.getOrDefault(imageId, Collections.emptySet()).stream()) // 3
+ .collect(Collectors.toSet());
+
+ // Now take all the images we have locally
+ return imageByImageId.keySet().stream()
+
+ // filter out images we want to keep
+ .filter(imageId -> !imagesToKeep.contains(imageId))
+
+ // Sort images in an order is safe to delete (children before parents)
+ .sorted((o1, o2) -> {
+ // If image2 is parent of image1, image1 comes before image2
+ if (imageIsDescendantOf(imageByImageId, o1, o2)) return -1;
+ // If image1 is parent of image2, image2 comes before image1
+ else if (imageIsDescendantOf(imageByImageId, o2, o1)) return 1;
+ // Otherwise, sort lexicographically by image name (For testing)
+ else return o1.compareTo(o2);
+ })
+
+ // Map back to image
+ .map(imageByImageId::get)
+
+ // Delete image, if successful also remove last usage time to prevent re-download being instantly deleted
+ .peek(image -> {
+ // Deleting an image by image ID with multiple tags will fail -> delete by tags instead
+ referencesOf(image).forEach(imageReference -> {
+ LOG.info("Deleting unused image " + imageReference);
+ containerEngine.removeImage(context, imageReference);
+ });
+ lastTimeUsedByImageId.remove(image.id());
+ })
+ .count() > 0;
+ }
+
+ private Set<String> updateRecentlyUsedImageIds(List<Image> images, List<PartialContainer> containers, Duration minImageAgeToDelete) {
+ final Instant now = clock.instant();
+
+ // Add any already downloaded image to the list once
+ images.forEach(image -> lastTimeUsedByImageId.putIfAbsent(image.id(), now));
+
+ // Update last used time for all current containers
+ containers.forEach(container -> lastTimeUsedByImageId.put(container.imageId(), now));
+
+ // Return list of images that have been used within minImageAgeToDelete
+ return lastTimeUsedByImageId.entrySet().stream()
+ .filter(entry -> Duration.between(entry.getValue(), now).minus(minImageAgeToDelete).isNegative())
+ .map(Map.Entry::getKey)
+ .collect(Collectors.toSet());
+ }
+
+ /**
+ * Map given references (image tags or ids) to images.
+ *
+ * This only works if the given tag is actually present locally. This is fine, because if it isn't - we can't delete
+ * it, so no harm done.
+ */
+ private Set<String> referencesToImages(List<String> references, List<Image> images) {
+ Map<String, String> imageIdByImageTag = images.stream()
+ .flatMap(image -> referencesOf(image).stream()
+ .map(repoTag -> new Pair<>(repoTag, image.id())))
+ .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond));
+
+ return references.stream()
+ .map(ref -> imageIdByImageTag.getOrDefault(ref, ref))
+ .collect(Collectors.toUnmodifiableSet());
+ }
+
+ /**
+ * @return true if ancestor is a parent or grand-parent or grand-grand-parent, etc. of img
+ */
+ private boolean imageIsDescendantOf(Map<String, Image> imageIdToImage, String img, String ancestor) {
+ while (imageIdToImage.containsKey(img)) {
+ img = imageIdToImage.get(img).parentId().orElse(null);
+ if (img == null) return false;
+ if (ancestor.equals(img)) return true;
+ }
+ return false;
+ }
+
+ /**
+ * Returns list of references to given image, preferring image tag(s), if any exist.
+ *
+ * If image is untagged, its ID is returned instead.
+ */
+ private static List<String> referencesOf(Image image) {
+ if (image.names().isEmpty()) {
+ return List.of(image.id());
+ }
+ return image.names().stream()
+ .map(tag -> {
+ if ("<none>:<none>".equals(tag)) return image.id();
+ return tag;
+ })
+ .collect(Collectors.toUnmodifiableList());
+ }
+
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java
new file mode 100644
index 00000000000..a1b0ae0d809
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java
@@ -0,0 +1,58 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container.image;
+
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * This represents a container image that exists locally.
+ *
+ * @author mpolden
+ */
+public class Image {
+
+ private final String id;
+ private final Optional<String> parentId;
+ private final List<String> names;
+
+ public Image(String id, Optional<String> parentId, List<String> names) {
+ this.id = Objects.requireNonNull(id);
+ this.parentId = Objects.requireNonNull(parentId);
+ this.names = List.copyOf(Objects.requireNonNull(names));
+ }
+
+ /** The identifier of this image */
+ public String id() {
+ return id;
+ }
+
+ /** ID of the parent image of this, if any */
+ public Optional<String> parentId() {
+ return parentId;
+ }
+
+ /** Names for this image, such as tags or digests */
+ public List<String> names() {
+ return names;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ Image image = (Image) o;
+ return id.equals(image.id) && parentId.equals(image.parentId) && names.equals(image.names);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(id, parentId, names);
+ }
+
+ @Override
+ public String toString() {
+ return "image " + id;
+ }
+
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java
new file mode 100644
index 00000000000..9ba1d7d6e7f
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java
@@ -0,0 +1,8 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author mpolden
+ */
+@ExportPackage
+package com.yahoo.vespa.hosted.node.admin.container.image;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
index 15705557447..5fd7d2e4119 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
@@ -227,7 +227,7 @@ public class StorageMaintainer {
}
private String getDockerImage(NodeAgentContext context, Optional<Container> container) {
- return container.map(c -> c.image.asString())
+ return container.map(c -> c.image().asString())
.orElse(context.node().currentDockerImage()
.map(DockerImage::asString)
.orElse("<none>")
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java
index 16b7e462ad3..12dcc04b952 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java
@@ -1,10 +1,10 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.maintenance.coredump;
-import com.yahoo.vespa.hosted.node.admin.container.ProcessResult;
import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
+import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult;
import java.nio.file.Path;
import java.nio.file.Paths;
@@ -45,12 +45,12 @@ public class CoreCollector {
String getGdbPath(NodeAgentContext context) {
// TODO: Remove when we do not have any devtoolset-9 installs left
String[] command_rhel7_dt9 = {"stat", GDB_PATH_RHEL7_DT9};
- if (docker.executeCommandInContainerAsRoot(context, command_rhel7_dt9).getExitStatus() == 0) {
+ if (docker.executeCommandInContainerAsRoot(context, command_rhel7_dt9).getExitCode() == 0) {
return GDB_PATH_RHEL7_DT9;
}
String[] command_rhel7_dt10 = {"stat", GDB_PATH_RHEL7_DT10};
- if (docker.executeCommandInContainerAsRoot(context, command_rhel7_dt10).getExitStatus() == 0) {
+ if (docker.executeCommandInContainerAsRoot(context, command_rhel7_dt10).getExitCode() == 0) {
return GDB_PATH_RHEL7_DT10;
}
@@ -60,12 +60,12 @@ public class CoreCollector {
Path readBinPathFallback(NodeAgentContext context, Path coredumpPath) {
String command = getGdbPath(context) + " -n -batch -core " + coredumpPath + " | grep \'^Core was generated by\'";
String[] wrappedCommand = {"/bin/sh", "-c", command};
- ProcessResult result = docker.executeCommandInContainerAsRoot(context, wrappedCommand);
+ CommandResult result = docker.executeCommandInContainerAsRoot(context, wrappedCommand);
Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getOutput());
if (! matcher.find()) {
throw new ConvergenceException(String.format("Failed to extract binary path from GDB, result: %s, command: %s",
- result, Arrays.toString(wrappedCommand)));
+ asString(result), Arrays.toString(wrappedCommand)));
}
return Paths.get(matcher.group("path").split(" ")[0]);
}
@@ -73,9 +73,9 @@ public class CoreCollector {
Path readBinPath(NodeAgentContext context, Path coredumpPath) {
String[] command = {"file", coredumpPath.toString()};
try {
- ProcessResult result = docker.executeCommandInContainerAsRoot(context, command);
- if (result.getExitStatus() != 0) {
- throw new ConvergenceException("file command failed with " + result);
+ CommandResult result = docker.executeCommandInContainerAsRoot(context, command);
+ if (result.getExitCode() != 0) {
+ throw new ConvergenceException("file command failed with " + asString(result));
}
Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getOutput());
@@ -99,9 +99,9 @@ public class CoreCollector {
String threads = allThreads ? "thread apply all bt" : "bt";
String[] command = {getGdbPath(context), "-n", "-ex", threads, "-batch", binPath.toString(), coredumpPath.toString()};
- ProcessResult result = docker.executeCommandInContainerAsRoot(context, command);
- if (result.getExitStatus() != 0)
- throw new ConvergenceException("Failed to read backtrace " + result + ", Command: " + Arrays.toString(command));
+ CommandResult result = docker.executeCommandInContainerAsRoot(context, command);
+ if (result.getExitCode() != 0)
+ throw new ConvergenceException("Failed to read backtrace " + asString(result) + ", Command: " + Arrays.toString(command));
return List.of(result.getOutput().split("\n"));
}
@@ -109,9 +109,9 @@ public class CoreCollector {
List<String> readJstack(NodeAgentContext context, Path coredumpPath, Path binPath) {
String[] command = {"jhsdb", "jstack", "--exe", binPath.toString(), "--core", coredumpPath.toString()};
- ProcessResult result = docker.executeCommandInContainerAsRoot(context, command);
- if (result.getExitStatus() != 0)
- throw new ConvergenceException("Failed to read jstack " + result + ", Command: " + Arrays.toString(command));
+ CommandResult result = docker.executeCommandInContainerAsRoot(context, command);
+ if (result.getExitCode() != 0)
+ throw new ConvergenceException("Failed to read jstack " + asString(result) + ", Command: " + Arrays.toString(command));
return List.of(result.getOutput().split("\n"));
}
@@ -144,4 +144,9 @@ public class CoreCollector {
}
return data;
}
+
+ private String asString(CommandResult result) {
+ return "exit status " + result.getExitCode() + ", output '" + result.getOutput() + "'";
+ }
+
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index a41bfac57c0..b9855bf3bfc 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -263,7 +263,7 @@ public class NodeAgentImpl implements NodeAgent {
private Optional<String> shouldRestartServices( NodeAgentContext context, Container existingContainer) {
NodeSpec node = context.node();
- if (!existingContainer.state.isRunning() || node.state() != NodeState.active) return Optional.empty();
+ if (!existingContainer.state().isRunning() || node.state() != NodeState.active) return Optional.empty();
// Restart generation is only optional because it does not exist for unallocated nodes
if (currentRestartGeneration.get() < node.wantedRestartGeneration().get()) {
@@ -315,12 +315,12 @@ public class NodeAgentImpl implements NodeAgent {
reasons.add("Node in state " + nodeState + ", container should no longer be running");
if (context.node().wantedDockerImage().isPresent() &&
- !context.node().wantedDockerImage().get().equals(existingContainer.image)) {
+ !context.node().wantedDockerImage().get().equals(existingContainer.image())) {
reasons.add("The node is supposed to run a new Docker image: "
- + existingContainer.image.asString() + " -> " + context.node().wantedDockerImage().get().asString());
+ + existingContainer.image().asString() + " -> " + context.node().wantedDockerImage().get().asString());
}
- if (!existingContainer.state.isRunning())
+ if (!existingContainer.state().isRunning())
reasons.add("Container no longer running");
if (currentRebootGeneration < context.node().wantedRebootGeneration()) {
@@ -329,9 +329,9 @@ public class NodeAgentImpl implements NodeAgent {
}
ContainerResources wantedContainerResources = getContainerResources(context);
- if (!wantedContainerResources.equalsMemory(existingContainer.resources)) {
+ if (!wantedContainerResources.equalsMemory(existingContainer.resources())) {
reasons.add("Container should be running with different memory allocation, wanted: " +
- wantedContainerResources.toStringMemory() + ", actual: " + existingContainer.resources.toStringMemory());
+ wantedContainerResources.toStringMemory() + ", actual: " + existingContainer.resources().toStringMemory());
}
if (containerState == STARTING)
@@ -343,7 +343,7 @@ public class NodeAgentImpl implements NodeAgent {
private void removeContainer(NodeAgentContext context, Container existingContainer, List<String> reasons, boolean alreadySuspended) {
context.log(logger, "Will remove container: " + String.join(", ", reasons));
- if (existingContainer.state.isRunning()) {
+ if (existingContainer.state().isRunning()) {
if (!alreadySuspended) {
orchestratorSuspendNode(context);
}
@@ -373,12 +373,12 @@ public class NodeAgentImpl implements NodeAgent {
.orElse(true))
return existingContainer;
- if (wantedContainerResources.equalsCpu(existingContainer.resources)) return existingContainer;
+ if (wantedContainerResources.equalsCpu(existingContainer.resources())) return existingContainer;
context.log(logger, "Container should be running with different CPU allocation, wanted: %s, current: %s",
- wantedContainerResources.toStringCpu(), existingContainer.resources.toStringCpu());
+ wantedContainerResources.toStringCpu(), existingContainer.resources().toStringCpu());
// Only update CPU resources
- containerOperations.updateContainer(context, existingContainer.id(), wantedContainerResources.withMemoryBytes(existingContainer.resources.memoryBytes()));
+ containerOperations.updateContainer(context, existingContainer.id(), wantedContainerResources.withMemoryBytes(existingContainer.resources().memoryBytes()));
return containerOperations.getContainer(context).orElseThrow(() ->
new ConvergenceException("Did not find container that was just updated"));
}
@@ -402,7 +402,7 @@ public class NodeAgentImpl implements NodeAgent {
private boolean downloadImageIfNeeded(NodeAgentContext context, Optional<Container> container) {
NodeSpec node = context.node();
- if (node.wantedDockerImage().equals(container.map(c -> c.image))) return false;
+ if (node.wantedDockerImage().equals(container.map(c -> c.image()))) return false;
RegistryCredentials credentials = registryCredentialsProvider.get();
return node.wantedDockerImage()
@@ -480,7 +480,7 @@ public class NodeAgentImpl implements NodeAgent {
firstSuccessfulHealthCheckInstant = Optional.of(clock.instant());
Duration timeLeft = Duration.between(clock.instant(), firstSuccessfulHealthCheckInstant.get().plus(warmUpDuration(context)));
- if (!container.get().resources.equalsCpu(getContainerResources(context)))
+ if (!container.get().resources().equalsCpu(getContainerResources(context)))
throw new ConvergenceException("Refusing to resume until warm up period ends (" +
(timeLeft.isNegative() ? "next tick" : "in " + timeLeft) + ")");
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngineMock.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngineMock.java
new file mode 100644
index 00000000000..1d077449ed6
--- /dev/null
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngineMock.java
@@ -0,0 +1,206 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container;
+
+import com.yahoo.config.provision.DockerImage;
+import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
+import com.yahoo.vespa.hosted.node.admin.container.image.Image;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
+import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.stream.Collectors;
+
+/**
+ * @author mpolden
+ */
+public class ContainerEngineMock implements ContainerEngine {
+
+ private final Map<ContainerName, Container> containers = new ConcurrentHashMap<>();
+ private final Map<String, ImageDownload> images = new ConcurrentHashMap<>();
+ private boolean asyncImageDownload = false;
+
+ public ContainerEngineMock asyncImageDownload(boolean enabled) {
+ this.asyncImageDownload = enabled;
+ return this;
+ }
+
+ public ContainerEngineMock completeDownloadOf(DockerImage image) {
+ String imageId = image.asString();
+ ImageDownload download;
+ while ((download = images.get(imageId)) == null);
+ download.complete();
+ return this;
+ }
+
+ public ContainerEngineMock setImages(List<Image> images) {
+ this.images.clear();
+ for (var image : images) {
+ ImageDownload imageDownload = new ImageDownload(image);
+ imageDownload.complete();
+ this.images.put(image.id(), imageDownload);
+ }
+ return this;
+ }
+
+ public ContainerEngineMock addContainers(List<Container> containers) {
+ for (var container : containers) {
+ if (this.containers.containsKey(container.name())) {
+ throw new IllegalArgumentException("Container " + container.name() + " already exists");
+ }
+ this.containers.put(container.name(), container);
+ }
+ return this;
+ }
+
+ public ContainerEngineMock addContainer(Container container) {
+ return addContainers(List.of(container));
+ }
+
+ @Override
+ public void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources) {
+ addContainer(createContainer(context, PartialContainer.State.created, containerResources));
+ }
+
+ @Override
+ public void startContainer(NodeAgentContext context) {
+ Container container = requireContainer(context.containerName(), PartialContainer.State.created);
+ Container newContainer = createContainer(context, PartialContainer.State.running, container.resources());
+ containers.put(newContainer.name(), newContainer);
+ }
+
+ @Override
+ public void removeContainer(TaskContext context, PartialContainer container) {
+ requireContainer(container.name());
+ containers.remove(container.name());
+ }
+
+ @Override
+ public void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources) {
+ Container container = requireContainer(context.containerName());
+ containers.put(container.name(), new Container(containerId, container.name(), container.state(),
+ container.imageId(), container.image(),
+ container.labels(), container.pid(),
+ container.conmonPid(), container.hostname(),
+ containerResources, container.networks(),
+ container.managed()));
+ }
+
+ @Override
+ public Optional<Container> getContainer(NodeAgentContext context) {
+ return Optional.ofNullable(containers.get(context.containerName()));
+ }
+
+ @Override
+ public List<PartialContainer> listContainers(TaskContext context) {
+ return List.copyOf(containers.values());
+ }
+
+ @Override
+ public String networkInterface(NodeAgentContext context) {
+ return "eth0";
+ }
+
+ @Override
+ public CommandResult executeAsRoot(NodeAgentContext context, Duration timeout, String... command) {
+ return new CommandResult(null, 0, "");
+ }
+
+ @Override
+ public CommandResult executeInNetworkNamespace(NodeAgentContext context, String... command) {
+ return new CommandResult(null, 0, "");
+ }
+
+ @Override
+ public void pullImage(TaskContext context, DockerImage image, RegistryCredentials registryCredentials) {
+ String imageId = image.asString();
+ ImageDownload imageDownload = images.computeIfAbsent(imageId, (ignored) -> new ImageDownload(new Image(imageId, Optional.empty(), List.of(imageId))));
+ if (!asyncImageDownload) {
+ imageDownload.complete();
+ }
+ imageDownload.awaitCompletion();
+ }
+
+ @Override
+ public boolean hasImage(TaskContext context, DockerImage image) {
+ ImageDownload download = images.get(image.asString());
+ return download != null && download.isComplete();
+ }
+
+ @Override
+ public void removeImage(TaskContext context, String id) {
+ images.remove(id);
+ }
+
+ @Override
+ public List<Image> listImages(TaskContext context) {
+ return images.values().stream()
+ .filter(ImageDownload::isComplete)
+ .map(ImageDownload::image)
+ .collect(Collectors.toUnmodifiableList());
+ }
+
+ private Container requireContainer(ContainerName name) {
+ return requireContainer(name, null);
+ }
+
+ private Container requireContainer(ContainerName name, PartialContainer.State wantedState) {
+ Container container = containers.get(name);
+ if (container == null) throw new IllegalArgumentException("No such container: " + name);
+ if (wantedState != null && container.state() != wantedState) throw new IllegalArgumentException("Container is " + container.state() + ", wanted " + wantedState);
+ return container;
+ }
+
+ public Container createContainer(NodeAgentContext context, PartialContainer.State state, ContainerResources containerResources) {
+ return new Container(new ContainerId("id-of-" + context.containerName()),
+ context.containerName(),
+ state,
+ "image-id",
+ context.node().wantedDockerImage().get(),
+ Map.of(),
+ 41,
+ 42,
+ context.hostname().value(),
+ containerResources,
+ List.of(),
+ true);
+ }
+
+ private static class ImageDownload {
+
+ private final Image image;
+ private final CountDownLatch done = new CountDownLatch(1);
+
+ ImageDownload(Image image) {
+ this.image = Objects.requireNonNull(image);
+ }
+
+ Image image() {
+ return image;
+ }
+
+ boolean isComplete() {
+ return done.getCount() == 0;
+ }
+
+ void complete() {
+ done.countDown();
+ }
+
+ void awaitCompletion() {
+ try {
+ done.await();
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ }
+
+}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperationsTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperationsTest.java
new file mode 100644
index 00000000000..666c5fb31f9
--- /dev/null
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperationsTest.java
@@ -0,0 +1,64 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container;
+
+import com.yahoo.config.provision.DockerImage;
+import com.yahoo.vespa.hosted.node.admin.component.TestTaskContext;
+import com.yahoo.vespa.test.file.TestFileSystem;
+import org.junit.Test;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author mpolden
+ */
+public class ContainerOperationsTest {
+
+ private final TestTaskContext context = new TestTaskContext();
+ private final ContainerEngineMock containerEngine = new ContainerEngineMock();
+ private final ContainerOperations containerOperations = new ContainerOperations(containerEngine, TestFileSystem.create());
+
+ @Test
+ public void no_managed_containers_running() {
+ Container c1 = createContainer("c1", true);
+ Container c2 = createContainer("c2", false);
+
+ containerEngine.addContainer(c1);
+ assertFalse(containerOperations.noManagedContainersRunning(context));
+
+ containerEngine.removeContainer(context, c1);
+ assertTrue(containerOperations.noManagedContainersRunning(context));
+
+ containerEngine.addContainer(c2);
+ assertTrue(containerOperations.noManagedContainersRunning(context));
+ }
+
+ @Test
+ public void retain_managed_containers() {
+ Container c1 = createContainer("c1", true);
+ Container c2 = createContainer("c2", true);
+ Container c3 = createContainer("c3", false);
+ containerEngine.addContainers(List.of(c1, c2, c3));
+
+ assertEquals(3, containerEngine.listContainers(context).size());
+ containerOperations.retainManagedContainers(context, Set.of(c1.name()));
+
+ assertEquals(List.of(c1.name(), c3.name()), containerEngine.listContainers(context).stream()
+ .map(PartialContainer::name)
+ .sorted()
+ .collect(Collectors.toList()));
+ }
+
+ private Container createContainer(String name, boolean managed) {
+ return new Container(new ContainerId("id-of-" + name), new ContainerName(name), PartialContainer.State.running,
+ "image-id", DockerImage.EMPTY, Map.of(), 42, 43, name,
+ ContainerResources.UNLIMITED, List.of(), managed);
+ }
+
+}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java
new file mode 100644
index 00000000000..0bb7aee8e0a
--- /dev/null
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java
@@ -0,0 +1,151 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container;
+
+import com.yahoo.vespa.test.file.TestFileSystem;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.file.FileSystem;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Map;
+import java.util.Optional;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author mpolden
+ */
+public class ContainerStatsCollectorTest {
+
+ private final FileSystem fileSystem = TestFileSystem.create();
+
+ @Test
+ public void collect() throws Exception {
+ ContainerStatsCollector collector = new ContainerStatsCollector(fileSystem);
+ ContainerId containerId = new ContainerId("id1");
+ int containerPid = 42;
+ assertTrue("No stats found", collector.collect(containerId, containerPid, "eth0").isEmpty());
+
+ mockMemoryStats(containerId);
+ mockCpuStats(containerId);
+ mockNetworkStats(containerPid);
+
+ Optional<ContainerStats> stats = collector.collect(containerId, containerPid, "eth0");
+ assertTrue(stats.isPresent());
+ assertEquals(new ContainerStats.CpuStats(24, 6049374780000000L, 691675615472L,
+ 262190000000L, 3L, 1L, 2L),
+ stats.get().getCpuStats());
+ assertEquals(new ContainerStats.MemoryStats(470790144L, 1228017664L, 2147483648L),
+ stats.get().getMemoryStats());
+ assertEquals(Map.of("eth0", new ContainerStats.NetworkStats(22280813L, 4L, 3L,
+ 19859383L, 6L, 5L)),
+ stats.get().getNetworks());
+ }
+
+ private void mockNetworkStats(int pid) throws IOException {
+ Path dev = fileSystem.getPath("/proc/" + pid + "/net/dev");
+ Files.createDirectories(dev.getParent());
+ Files.writeString(dev, "Inter-| Receive | Transmit\n" +
+ " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n" +
+ " lo: 36289258 149700 0 0 0 0 0 0 36289258 149700 0 0 0 0 0 0\n" +
+ " eth0: 22280813 118083 3 4 0 0 0 0 19859383 115415 5 6 0 0 0 0\n");
+ }
+
+ private void mockMemoryStats(ContainerId containerId) throws IOException {
+ Path root = fileSystem.getPath("/sys/fs/cgroup/memory/machine.slice/libpod-" + containerId + ".scope");
+ Files.createDirectories(root);
+
+ Files.writeString(root.resolve("memory.limit_in_bytes"), "2147483648\n");
+ Files.writeString(root.resolve("memory.usage_in_bytes"), "1228017664\n");
+ Files.writeString(root.resolve("memory.stat"), "cache 470790144\n" +
+ "rss 698699776\n" +
+ "rss_huge 526385152\n" +
+ "shmem 0\n" +
+ "mapped_file 811008\n" +
+ "dirty 405504\n" +
+ "writeback 0\n" +
+ "swap 0\n" +
+ "pgpgin 6938085\n" +
+ "pgpgout 6780573\n" +
+ "pgfault 14343186\n" +
+ "pgmajfault 0\n" +
+ "inactive_anon 0\n" +
+ "active_anon 699289600\n" +
+ "inactive_file 455516160\n" +
+ "active_file 13787136\n" +
+ "unevictable 0\n" +
+ "hierarchical_memory_limit 2147483648\n" +
+ "hierarchical_memsw_limit 4294967296\n" +
+ "total_cache 470790144\n" +
+ "total_rss 698699776\n" +
+ "total_rss_huge 526385152\n" +
+ "total_shmem 0\n" +
+ "total_mapped_file 811008\n" +
+ "total_dirty 405504\n" +
+ "total_writeback 0\n" +
+ "total_swap 0\n" +
+ "total_pgpgin 6938085\n" +
+ "total_pgpgout 6780573\n" +
+ "total_pgfault 14343186\n" +
+ "total_pgmajfault 0\n" +
+ "total_inactive_anon 0\n" +
+ "total_active_anon 699289600\n" +
+ "total_inactive_file 455516160\n" +
+ "total_active_file 13787136\n" +
+ "total_unevictable 0\n");
+ }
+
+ private void mockCpuStats(ContainerId containerId) throws IOException {
+ Path root = fileSystem.getPath("/sys/fs/cgroup/cpuacct/machine.slice/libpod-" + containerId + ".scope");
+ Path proc = fileSystem.getPath("/proc");
+ Files.createDirectories(root);
+ Files.createDirectories(proc);
+ Files.writeString(root.resolve("cpu.stat"), "nr_periods 1\n" +
+ "nr_throttled 2\n" +
+ "throttled_time 3\n");
+ Files.writeString(root.resolve("cpuacct.usage_percpu"), "25801608855 22529436415 25293652376 26212081533 " +
+ "27545883290 25357818592 33464821448 32568003867 " +
+ "28916742231 31771772292 34418037242 38417072233 " +
+ "26069101127 24568838237 23683334366 26824607997 " +
+ "24289870206 22249389818 32683986446 32444831154 " +
+ "30488394217 26840956322 31633747261 30838696584\n");
+ Files.writeString(root.resolve("cpuacct.usage"), "691675615472\n");
+ Files.writeString(root.resolve("cpuacct.stat"), "user 40900\n" +
+ "system 26219\n");
+ Files.writeString(proc.resolve("stat"), "cpu 7991366 978222 2346238 565556517 1935450 25514479 615206 0 0 0\n" +
+ "cpu0 387906 61529 99088 23516506 42258 1063359 29882 0 0 0\n" +
+ "cpu1 271253 49383 86149 23655234 41703 1061416 31885 0 0 0\n" +
+ "cpu2 349420 50987 93560 23571695 59437 1051977 24461 0 0 0\n" +
+ "cpu3 328107 50628 93406 23605135 44378 1048549 30199 0 0 0\n" +
+ "cpu4 267474 50404 99253 23606041 113094 1038572 26494 0 0 0\n" +
+ "cpu5 309584 50677 94284 23550372 132616 1033661 29436 0 0 0\n" +
+ "cpu6 477926 56888 121251 23367023 83121 1074930 28818 0 0 0\n" +
+ "cpu7 335335 29350 106130 23551107 95606 1066394 26156 0 0 0\n" +
+ "cpu8 323678 28629 99171 23586501 82183 1064708 25403 0 0 0\n" +
+ "cpu9 329805 27516 98538 23579458 89235 1061561 25140 0 0 0\n" +
+ "cpu10 291536 26455 93934 23642345 81282 1049736 25228 0 0 0\n" +
+ "cpu11 271103 25302 90630 23663641 85711 1048781 24291 0 0 0\n" +
+ "cpu12 323634 63392 100406 23465340 132684 1089157 28319 0 0 0\n" +
+ "cpu13 348085 49568 100772 23490388 114190 1079474 20948 0 0 0\n" +
+ "cpu14 310712 51208 90461 23547980 101601 1071940 26712 0 0 0\n" +
+ "cpu15 360405 52754 94620 23524878 79851 1062050 26836 0 0 0\n" +
+ "cpu16 367893 52141 98074 23541314 57500 1058968 25242 0 0 0\n" +
+ "cpu17 412756 51486 101592 23515056 47653 1044874 27467 0 0 0\n" +
+ "cpu18 287307 25478 106011 23599505 79848 1089812 23160 0 0 0\n" +
+ "cpu19 275001 24421 98338 23628694 79675 1084074 22083 0 0 0\n" +
+ "cpu20 288038 24805 94432 23629908 74735 1078501 21915 0 0 0\n" +
+ "cpu21 295373 25017 91344 23628585 75282 1071019 22026 0 0 0\n" +
+ "cpu22 326739 25588 90385 23608217 69186 1068494 21108 0 0 0\n" +
+ "cpu23 452284 24602 104397 23481583 72612 1052462 21985 0 0 0\n" +
+ "intr 6645352968 64 0 0 0 1481 0 0 0 1 0 0 0 0 0 0 0 39 0 0 0 0 0 0 37 0 0 0 0 0 0 0 0 4334106 1 6949071 5814662 5415344 6939471 6961483 6358810 5271953 6718644 0 126114 126114 126114 126114 126114 126114 126114 126114 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n" +
+ "ctxt 2495530303\n" +
+ "btime 1611928223\n" +
+ "processes 4839481\n" +
+ "procs_running 4\n" +
+ "procs_blocked 0\n" +
+ "softirq 2202631388 4 20504999 46734 54405637 4330276 0 6951 1664780312 10130 458546345\n");
+ }
+
+}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResultTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResultTest.java
deleted file mode 100644
index f7b832bd566..00000000000
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ProcessResultTest.java
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.node.admin.container;
-
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-public class ProcessResultTest {
- @Test
- public void testBasicProperties() {
- ProcessResult processResult = new ProcessResult(0, "foo", "bar");
- assertEquals(0, processResult.getExitStatus());
- assertEquals("foo", processResult.getOutput());
- assertTrue(processResult.isSuccess());
- }
-
- @Test
- public void testSuccessFails() {
- ProcessResult processResult = new ProcessResult(1, "foo", "bar");
- assertFalse(processResult.isSuccess());
- }
-}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloaderTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloaderTest.java
new file mode 100644
index 00000000000..f50f2b8b053
--- /dev/null
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloaderTest.java
@@ -0,0 +1,34 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container.image;
+
+import com.yahoo.config.provision.DockerImage;
+import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
+import com.yahoo.vespa.hosted.node.admin.component.TestTaskContext;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerEngineMock;
+import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentials;
+import org.junit.Test;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author mpolden
+ */
+public class ContainerImageDownloaderTest {
+
+ @Test(timeout = 5_000)
+ public void test_download() {
+ ContainerEngineMock podman = new ContainerEngineMock().asyncImageDownload(true);
+ ContainerImageDownloader downloader = new ContainerImageDownloader(podman);
+ TaskContext context = new TestTaskContext();
+ DockerImage image = DockerImage.fromString("registry.example.com/vespa:7.42");
+
+ assertFalse("Download started", downloader.get(context, image, RegistryCredentials.none));
+ assertFalse("Download pending", downloader.get(context, image, RegistryCredentials.none));
+ podman.completeDownloadOf(image);
+ boolean downloadCompleted;
+ while (!(downloadCompleted = downloader.get(context, image, RegistryCredentials.none)));
+ assertTrue("Download completed", downloadCompleted);
+ }
+
+}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePrunerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePrunerTest.java
new file mode 100644
index 00000000000..f6d941c4299
--- /dev/null
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePrunerTest.java
@@ -0,0 +1,238 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.container.image;
+
+import com.yahoo.config.provision.DockerImage;
+import com.yahoo.test.ManualClock;
+import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
+import com.yahoo.vespa.hosted.node.admin.component.TestTaskContext;
+import com.yahoo.vespa.hosted.node.admin.container.Container;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerEngineMock;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerId;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerName;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerResources;
+import com.yahoo.vespa.hosted.node.admin.container.image.ContainerImagePruner;
+import com.yahoo.vespa.hosted.node.admin.container.image.Image;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author freva
+ * @author mpolden
+ */
+public class ContainerImagePrunerTest {
+
+ private final Tester tester = new Tester();
+
+ @Test
+ public void noImagesMeansNoUnusedImages() {
+ tester.withExistingImages()
+ .expectDeletedImages();
+ }
+
+ @Test
+ public void singleImageWithoutContainersIsUnused() {
+ tester.withExistingImages(image("image-1"))
+ // Even though nothing is using the image, we will keep it for at least 1h
+ .expectDeletedImagesAfterMinutes(0)
+ .expectDeletedImagesAfterMinutes(30)
+ .expectDeletedImagesAfterMinutes(30, "image-1");
+ }
+
+ @Test
+ public void singleImageWithContainerIsUsed() {
+ tester.withExistingImages(image("image-1"))
+ .withExistingContainers(container("container-1", "image-1"))
+ .expectDeletedImages();
+ }
+
+
+ @Test
+ public void multipleUnusedImagesAreIdentified() {
+ tester.withExistingImages(image("image-1"), image("image-2"))
+ .expectDeletedImages("image-1", "image-2");
+ }
+
+
+ @Test
+ public void multipleUnusedLeavesAreIdentified() {
+ tester.withExistingImages(image("parent-image"),
+ image("image-1", "parent-image"),
+ image("image-2", "parent-image"))
+ .expectDeletedImages("image-1", "image-2", "parent-image");
+ }
+
+
+ @Test
+ public void unusedLeafWithUsedSiblingIsIdentified() {
+ tester.withExistingImages(image("parent-image"),
+ image("image-1", "parent-image", "latest"),
+ image("image-2", "parent-image", "1.24"))
+ .withExistingContainers(container("vespa-node-1", "image-1"))
+ .expectDeletedImages("1.24"); // Deleting the only tag will delete the image
+ }
+
+
+ @Test
+ public void unusedImagesWithMultipleTags() {
+ tester.withExistingImages(image("parent-image"),
+ image("image-1", "parent-image", "vespa-6", "vespa-6.28", "vespa:latest"))
+ .expectDeletedImages("vespa-6", "vespa-6.28", "vespa:latest", "parent-image");
+ }
+
+
+ @Test
+ public void unusedImagesWithMultipleUntagged() {
+ tester.withExistingImages(image("image1", null, "<none>:<none>"),
+ image("image2", null, "<none>:<none>"))
+ .expectDeletedImages("image1", "image2");
+ }
+
+
+ @Test
+ public void taggedImageWithNoContainersIsUnused() {
+ tester.withExistingImages(image("image-1", null, "vespa-6"))
+ .expectDeletedImages("vespa-6");
+ }
+
+
+ @Test
+ public void unusedImagesWithSimpleImageGc() {
+ tester.withExistingImages(image("parent-image"))
+ .expectDeletedImagesAfterMinutes(30)
+ .withExistingImages(image("parent-image"),
+ image("image-1", "parent-image"))
+ .expectDeletedImagesAfterMinutes(0)
+ .expectDeletedImagesAfterMinutes(30)
+ // At this point, parent-image has been unused for 1h, but image-1 depends on parent-image and it has
+ // only been unused for 30m, so we cannot delete parent-image yet. 30 mins later both can be removed
+ .expectDeletedImagesAfterMinutes(30, "image-1", "parent-image");
+ }
+
+
+ @Test
+ public void reDownloadingImageIsNotImmediatelyDeleted() {
+ tester.withExistingImages(image("image"))
+ .expectDeletedImages("image") // After 1h we delete image
+ .expectDeletedImagesAfterMinutes(0) // image is immediately re-downloaded, but is not deleted
+ .expectDeletedImagesAfterMinutes(10)
+ .expectDeletedImages("image"); // 1h after re-download it is deleted again
+ }
+
+
+ @Test
+ public void reDownloadingImageIsNotImmediatelyDeletedWhenDeletingByTag() {
+ tester.withExistingImages(image("image", null, "image-1", "my-tag"))
+ .expectDeletedImages("image-1", "my-tag") // After 1h we delete image
+ .expectDeletedImagesAfterMinutes(0) // image is immediately re-downloaded, but is not deleted
+ .expectDeletedImagesAfterMinutes(10)
+ .expectDeletedImages("image-1", "my-tag"); // 1h after re-download it is deleted again
+ }
+
+ /** Same scenario as in {@link #multipleUnusedImagesAreIdentified()} */
+ @Test
+ public void doesNotDeleteExcludedByIdImages() {
+ tester.withExistingImages(image("parent-image"),
+ image("image-1", "parent-image"),
+ image("image-2", "parent-image"))
+ // Normally, image-1 and parent-image should also be deleted, but because we exclude image-1
+ // we cannot delete parent-image, so only image-2 is deleted
+ .expectDeletedImages(List.of("image-1"), "image-2");
+ }
+
+ /** Same as in {@link #doesNotDeleteExcludedByIdImages()} but with tags */
+ @Test
+ public void doesNotDeleteExcludedByTagImages() {
+ tester.withExistingImages(image("parent-image", "rhel-6"),
+ image("image-1", "parent-image", "vespa:6.288.16"),
+ image("image-2", "parent-image", "vespa:6.289.94"))
+ .expectDeletedImages(List.of("vespa:6.288.16"), "vespa:6.289.94");
+ }
+
+ @Test
+ public void exludingNotDownloadedImageIsNoop() {
+ tester.withExistingImages(image("parent-image", "rhel-6"),
+ image("image-1", "parent-image", "vespa:6.288.16"),
+ image("image-2", "parent-image", "vespa:6.289.94"))
+ .expectDeletedImages(List.of("vespa:6.300.1"), "vespa:6.288.16", "vespa:6.289.94", "rhel-6");
+ }
+
+ private static Image image(String id) {
+ return image(id, null);
+ }
+
+ private static Image image(String id, String parentId, String... tags) {
+ return new Image(id, Optional.ofNullable(parentId), List.of(tags));
+ }
+
+ private static Container container(String name, String imageId) {
+ return new Container(new ContainerId("id-of-" + name), new ContainerName(name),
+ Container.State.running, imageId, DockerImage.EMPTY, Map.of(),
+ 42, 43, name + ".example.com", ContainerResources.UNLIMITED,
+ List.of(), true);
+ }
+
+ private static class Tester {
+
+ private final ContainerEngineMock containerEngine = new ContainerEngineMock();
+ private final TaskContext context = new TestTaskContext();
+ private final ManualClock clock = new ManualClock();
+ private final ContainerImagePruner pruner = new ContainerImagePruner(containerEngine, clock);
+ private final Map<String, Integer> removalCountByImageId = new HashMap<>();
+
+ private boolean initialized = false;
+
+ private Tester withExistingImages(Image... images) {
+ containerEngine.setImages(List.of(images));
+ return this;
+ }
+
+ private Tester withExistingContainers(Container... containers) {
+ containerEngine.addContainers(List.of(containers));
+ return this;
+ }
+
+ private Tester expectDeletedImages(String... imageIds) {
+ return expectDeletedImagesAfterMinutes(60, imageIds);
+ }
+
+ private Tester expectDeletedImages(List<String> excludedRefs, String... imageIds) {
+ return expectDeletedImagesAfterMinutes(60, excludedRefs, imageIds);
+ }
+
+ private Tester expectDeletedImagesAfterMinutes(int minutesAfter, String... imageIds) {
+ return expectDeletedImagesAfterMinutes(minutesAfter, Collections.emptyList(), imageIds);
+ }
+
+ private Tester expectDeletedImagesAfterMinutes(int minutesAfter, List<String> excludedRefs, String... imageIds) {
+ if (!initialized) {
+ // Run once with a very long expiry to initialize internal state of existing images
+ pruner.removeUnusedImages(context, List.of(), Duration.ofDays(999));
+ initialized = true;
+ }
+
+ clock.advance(Duration.ofMinutes(minutesAfter));
+
+ pruner.removeUnusedImages(context, excludedRefs, Duration.ofHours(1).minusSeconds(1));
+
+ Arrays.stream(imageIds)
+ .forEach(imageId -> {
+ int newValue = removalCountByImageId.getOrDefault(imageId, 0) + 1;
+ removalCountByImageId.put(imageId, newValue);
+
+ assertTrue("Image " + imageId + " removed",
+ containerEngine.listImages(context).stream().noneMatch(image -> image.id().equals(imageId)));
+ });
+ return this;
+ }
+ }
+
+}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerOperationsMock.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerOperationsMock.java
deleted file mode 100644
index a19e031e41f..00000000000
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerOperationsMock.java
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.node.admin.integration;
-
-import com.yahoo.config.provision.DockerImage;
-import com.yahoo.vespa.hosted.node.admin.container.Container;
-import com.yahoo.vespa.hosted.node.admin.container.ContainerId;
-import com.yahoo.vespa.hosted.node.admin.container.ContainerName;
-import com.yahoo.vespa.hosted.node.admin.container.ContainerResources;
-import com.yahoo.vespa.hosted.node.admin.container.ContainerStats;
-import com.yahoo.vespa.hosted.node.admin.container.ProcessResult;
-import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentials;
-import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
-import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations;
-import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData;
-import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
-import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult;
-
-import java.time.Duration;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.Set;
-
-/**
- * Mock with some simple logic
- *
- * @author freva
- */
-public class ContainerOperationsMock implements ContainerOperations {
-
- public static final ContainerId CONTAINER_ID = new ContainerId("af345");
-
- private final Map<ContainerName, Container> containersByContainerName = new HashMap<>();
- private final Object monitor = new Object();
-
- @Override
- public void createContainer(NodeAgentContext context, ContainerData containerData, ContainerResources containerResources) {
- synchronized (monitor) {
- containersByContainerName.put(context.containerName(),
- new Container(CONTAINER_ID,
- context.hostname().value(),
- context.node().wantedDockerImage().get(),
- containerResources,
- context.containerName(),
- Container.State.CREATED,
- 2));
- }
- }
-
- @Override
- public void startContainer(NodeAgentContext context) {
- synchronized (monitor) {
- Optional<Container> container = getContainer(context);
- if (container.isEmpty()) throw new IllegalArgumentException("Cannot start non-existent container " + context.containerName());
- containersByContainerName.put(context.containerName(), new Container(CONTAINER_ID,
- container.get().hostname,
- container.get().image,
- container.get().resources,
- container.get().name,
- Container.State.RUNNING,
- container.get().pid));
- }
- }
-
- @Override
- public void removeContainer(NodeAgentContext context, Container container) {
- synchronized (monitor) {
- containersByContainerName.remove(container.name);
- }
- }
-
- @Override
- public void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources) {
- synchronized (monitor) {
- Container container = containersByContainerName.get(context.containerName());
- containersByContainerName.put(context.containerName(),
- new Container(container.id(), container.hostname, container.image,
- containerResources, container.name, container.state, container.pid));
- }
- }
-
- @Override
- public Optional<Container> getContainer(NodeAgentContext context) {
- synchronized (monitor) {
- return Optional.ofNullable(containersByContainerName.get(context.containerName()));
- }
- }
-
- @Override
- public boolean pullImageAsyncIfNeeded(TaskContext context, DockerImage dockerImage, RegistryCredentials registryCredentials) {
- return false;
- }
-
- @Override
- public ProcessResult executeCommandInContainerAsRoot(NodeAgentContext context, String... command) {
- return null;
- }
-
- @Override
- public ProcessResult executeCommandInContainerAsRoot(NodeAgentContext context, Long timeoutSeconds, String... command) {
- return null;
- }
-
- @Override
- public CommandResult executeCommandInNetworkNamespace(NodeAgentContext context, String... command) {
- return null;
- }
-
- @Override
- public String resumeNode(NodeAgentContext context) {
- return "";
- }
-
- @Override
- public String suspendNode(NodeAgentContext context) {
- return "";
- }
-
- @Override
- public String restartVespa(NodeAgentContext context) {
- return "";
- }
-
- @Override
- public String startServices(NodeAgentContext context) {
- return "";
- }
-
- @Override
- public String stopServices(NodeAgentContext context) {
- return "";
- }
-
- @Override
- public Optional<ContainerStats> getContainerStats(NodeAgentContext context) {
- return Optional.empty();
- }
-
- @Override
- public boolean noManagedContainersRunning(TaskContext context) {
- return false;
- }
-
- @Override
- public boolean retainManagedContainers(TaskContext context, Set<ContainerName> containerNames) {
- return false;
- }
-
- @Override
- public boolean deleteUnusedContainerImages(TaskContext context, List<DockerImage> excludes, Duration minImageAgeToDelete) {
- return false;
- }
-
-}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java
index 9153afd8e54..4179f53370b 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java
@@ -4,11 +4,13 @@ package com.yahoo.vespa.hosted.node.admin.integration;
import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.flags.InMemoryFlagSource;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
+import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerEngineMock;
import com.yahoo.vespa.hosted.node.admin.container.ContainerName;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations;
import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentials;
import com.yahoo.vespa.hosted.node.admin.container.metrics.Metrics;
-import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
-import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator;
import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater;
@@ -23,8 +25,6 @@ import org.mockito.InOrder;
import org.mockito.Mockito;
import java.nio.file.FileSystem;
-import java.nio.file.Path;
-import java.nio.file.Paths;
import java.time.Clock;
import java.time.Duration;
import java.util.Collections;
@@ -46,12 +46,11 @@ public class ContainerTester implements AutoCloseable {
private static final Logger log = Logger.getLogger(ContainerTester.class.getName());
private static final Duration INTERVAL = Duration.ofMillis(10);
- private static final Path PATH_TO_VESPA_HOME = Paths.get("/opt/vespa");
static final HostName HOST_HOSTNAME = HostName.from("host.test.yahoo.com");
private final Thread loopThread;
- final ContainerOperationsMock containerOperations = spy(new ContainerOperationsMock());
+ final ContainerOperations containerOperations = spy(new ContainerOperations(new ContainerEngineMock(), TestFileSystem.create()));
final NodeRepoMock nodeRepository = spy(new NodeRepoMock());
final Orchestrator orchestrator = mock(Orchestrator.class);
final StorageMaintainer storageMaintainer = mock(StorageMaintainer.class);
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java
index 3c22305d006..cfb9aaa4001 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java
@@ -1,10 +1,10 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.maintenance.coredump;
-import com.yahoo.vespa.hosted.node.admin.container.ProcessResult;
import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImpl;
+import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult;
import org.junit.Test;
import java.nio.file.Path;
@@ -12,8 +12,8 @@ import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
-import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.GDB_PATH_RHEL7_DT9;
import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.GDB_PATH_RHEL7_DT10;
+import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.GDB_PATH_RHEL7_DT9;
import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.GDB_PATH_RHEL8;
import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.JAVA_HEAP_DUMP_METADATA;
import static org.junit.Assert.assertEquals;
@@ -91,7 +91,7 @@ public class CoreCollectorTest {
coreCollector.readBinPathFallback(context, TEST_CORE_PATH);
fail("Expected not to be able to get bin path");
} catch (RuntimeException e) {
- assertEquals("Failed to extract binary path from GDB, result: ProcessResult { exitStatus=1 output= errors=Error 123 }, command: " +
+ assertEquals("Failed to extract binary path from GDB, result: exit status 1, output 'Error 123', command: " +
"[/bin/sh, -c, /opt/rh/devtoolset-10/root/bin/gdb -n -batch -core /tmp/core.1234 | grep '^Core was generated by']", e.getMessage());
}
}
@@ -110,7 +110,7 @@ public class CoreCollectorTest {
coreCollector.readBacktrace(context, TEST_CORE_PATH, TEST_BIN_PATH, false);
fail("Expected not to be able to read backtrace");
} catch (RuntimeException e) {
- assertEquals("Failed to read backtrace ProcessResult { exitStatus=1 output= errors=Failure }, Command: " +
+ assertEquals("Failed to read backtrace exit status 1, output 'Failure', Command: " +
"[" + GDB_PATH_RHEL7_DT9 + ", -n, -ex, bt, -batch, /usr/bin/program, /tmp/core.1234]", e.getMessage());
}
}
@@ -194,6 +194,6 @@ public class CoreCollectorTest {
private void mockExec(NodeAgentContext context, String[] cmd, String output, String error) {
when(docker.executeCommandInContainerAsRoot(context, cmd))
- .thenReturn(new ProcessResult(error.isEmpty() ? 0 : 1, output, error));
+ .thenReturn(new CommandResult(null, error.isEmpty() ? 0 : 1, error.isEmpty() ? output : error));
}
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index 454374fe3bd..f2b761b614d 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -33,6 +33,7 @@ import org.mockito.InOrder;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
import static org.junit.Assert.assertEquals;
@@ -762,12 +763,17 @@ public class NodeAgentImplTest {
return dockerImage != null ?
Optional.of(new Container(
containerId,
- hostName,
+ ContainerName.fromHostname(hostName),
+ isRunning ? Container.State.running : Container.State.exited,
+ "image-id-1",
dockerImage,
+ Map.of(),
+ 42,
+ 43,
+ hostName,
containerResources,
- ContainerName.fromHostname(hostName),
- isRunning ? Container.State.RUNNING : Container.State.EXITED,
- isRunning ? 1 : 0)) :
+ List.of(),
+ true)) :
Optional.empty();
}).when(containerOperations).getContainer(any());
}