From 37a5f65f3ee16fab0e3b743eef385a4e2ddb34e1 Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Thu, 20 Sep 2018 13:59:05 +0200 Subject: Update DockerImageGC to spare given list of images --- .../dockerapi/DockerImageGarbageCollector.java | 186 ++++++++++++++------- .../DockerImageGarbageCollectionTest.java | 184 +++++++++++++------- 2 files changed, 245 insertions(+), 125 deletions(-) (limited to 'docker-api/src') diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImageGarbageCollector.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImageGarbageCollector.java index 6e728972da9..e4edcf91811 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImageGarbageCollector.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImageGarbageCollector.java @@ -3,107 +3,171 @@ package com.yahoo.vespa.hosted.dockerapi; import com.github.dockerjava.api.model.Image; import com.github.dockerjava.api.model.Container; +import com.google.common.base.Strings; +import com.yahoo.collections.Pair; +import java.time.Clock; import java.time.Duration; import java.time.Instant; -import java.util.HashMap; +import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; +import java.util.logging.Logger; import java.util.stream.Collectors; import java.util.stream.Stream; /** + * This class keeps track of downloaded docker images and helps delete images that have not been recently used + * + *

Definitions: + *

+ * + *

Limitations: + *

    + *
  1. Image that has more than 1 tag cannot be deleted by ID
  2. + *
  3. Deleting a tag of an image with multiple tags will only remove the tag, the image with the + * remaining tags will remain
  4. + *
  5. Deleting the last tag of an image will delete the entire image.
  6. + *
  7. Image cannot be deleted if:
  8. + *
      + *
    1. It has 1 or more children
    2. + *
    3. A container uses it
    4. + *
    + *
+ * * @author freva */ -public class DockerImageGarbageCollector { - private final Duration minAgeImageGc; - private final Map lastTimeUsedByImageId = new ConcurrentHashMap<>(); +class DockerImageGarbageCollector { + private static final Logger logger = Logger.getLogger(DockerImageGarbageCollector.class.getName()); - public DockerImageGarbageCollector(Duration minAgeImageToDelete) { - minAgeImageGc = minAgeImageToDelete; - } + private final Map lastTimeUsedByImageId = new ConcurrentHashMap<>(); + private final DockerImpl docker; + private final Clock clock; - public void updateLastUsedTimeFor(String imageId) { - updateLastUsedTimeFor(imageId, Instant.now()); + DockerImageGarbageCollector(DockerImpl docker) { + this(docker, Clock.systemUTC()); } - void updateLastUsedTimeFor(String imageId, Instant at) { - lastTimeUsedByImageId.put(imageId, at); + DockerImageGarbageCollector(DockerImpl docker, Clock clock) { + this.docker = docker; + this.clock = clock; } /** - * Generates lists of images that are safe to delete, in the order that is safe to delete them (children before - * parents). The function starts with the set of all local images and then filters out images that are used now - * and images that have been used recently (because they might be re-used again in near future). + * This method must be called frequently enough to see all containers to know which images are being used * - * @param images List of all the local images - * @param containers List of all the containers, including the ones that are stopped - * @return List of image tags of unused images, if unused image has no tag, will return image ID instead. + * @param excludes List of images (by tag or id) that should not be deleted regardless of their used status + * @param minImageAgeToDelete Minimum duration after which an image can be removed if it has not been used + * @return true iff at least 1 image was deleted */ - public List getUnusedDockerImages(List images, List containers) { - Map dockerImageByImageId = images.stream().collect(Collectors.toMap(Image::getId, img -> img)); - Map unusedImagesByContainers = filterOutImagesUsedByContainers(dockerImageByImageId, containers); - Map unusedImagesByRecent = filterOutRecentImages(unusedImagesByContainers); + boolean deleteUnusedDockerImages(List excludes, Duration minImageAgeToDelete) { + List images = docker.listAllImages(); + List containers = docker.listAllContainers(); + + Map imageByImageId = images.stream().collect(Collectors.toMap(Image::getId, Function.identity())); + Map> ancestorsByImageId = images.stream() + .map(Image::getId) + .collect(Collectors.toMap( + Function.identity(), + imageId -> { + Set ancestors = new HashSet<>(); + while (!Strings.isNullOrEmpty(imageId)) { + ancestors.add(imageId); + imageId = Optional.of(imageId).map(imageByImageId::get).map(Image::getParentId).orElse(null); + } + return ancestors; + } + )); + + // The set of images that we want to keep is: + // 1. The images that were recently used + // 2. The images that were explicitly excluded + // 3. All of the ancestors of from images in 1 & 2 + Set imagesToKeep = Stream + .concat( + getRecentlyUsedImageIds(images, containers, minImageAgeToDelete).stream(), // 1 + dockerImageToImageIds(excludes, images).stream()) // 2 + .flatMap(imageId -> ancestorsByImageId.getOrDefault(imageId, Collections.emptySet()).stream()) // 3 + .collect(Collectors.toSet()); - return unusedImagesByRecent.keySet().stream() + // Now take all the images we have locally + return imageByImageId.keySet().stream() + + // filter out images we want to keep + .filter(imageId -> !imagesToKeep.contains(imageId)) + + // Sort images in an order is safe to delete (children before parents) .sorted((o1, o2) -> { // If image2 is parent of image1, image1 comes before image2 - if (imageIsDescendantOf(unusedImagesByRecent, o1, o2)) return -1; + if (imageIsDescendantOf(imageByImageId, o1, o2)) return -1; // If image1 is parent of image2, image2 comes before image1 - else if (imageIsDescendantOf(unusedImagesByRecent, o2, o1)) return 1; + else if (imageIsDescendantOf(imageByImageId, o2, o1)) return 1; // Otherwise, sort lexicographically by image name (For testing) else return o1.compareTo(o2); }) + + // Map image IDs to tags if there are any .flatMap(imageId -> { // Deleting an image by image ID with multiple tags will fail -> map IDs to all the tags referring to the ID - String[] repoTags = unusedImagesByRecent.get(imageId).getRepoTags(); - return (repoTags == null) ? Stream.of(imageId) : Stream.of(repoTags); + String[] repoTags = imageByImageId.get(imageId).getRepoTags(); + return repoTags == null ? Stream.of(imageId) : Stream.of(repoTags); }) - .map(DockerImage::new) - .collect(Collectors.toList()); - } - private Map filterOutImagesUsedByContainers( - Map dockerImagesByImageId, List containerList) { - Map filteredDockerImagesByImageId = new HashMap<>(dockerImagesByImageId); - - for (com.github.dockerjava.api.model.Container container : containerList) { - String imageToSpare = container.getImageId(); - do { - // May be null if two images have have the same parent, the first image will remove the parent, the - // second will get null. - Image sparedImage = filteredDockerImagesByImageId.remove(imageToSpare); - imageToSpare = sparedImage == null ? "" : sparedImage.getParentId(); - } while (!imageToSpare.isEmpty()); - } - - return filteredDockerImagesByImageId; + // Delete image, if successful also remove last usage time to prevent re-download being instantly deleted + .peek(image -> { + logger.info("Deleting unused docker image " + image); + docker.deleteImage(new DockerImage(image)); + lastTimeUsedByImageId.remove(image); + }) + .count() > 0; } - private Map filterOutRecentImages(Map dockerImageByImageId) { - Map filteredDockerImagesByImageId = new HashMap<>(dockerImageByImageId); + private Set getRecentlyUsedImageIds(List images, List containers, Duration minImageAgeToDelete) { + final Instant now = clock.instant(); - final Instant now = Instant.now(); - filteredDockerImagesByImageId.keySet().forEach(imageId -> { - if (! lastTimeUsedByImageId.containsKey(imageId)) lastTimeUsedByImageId.put(imageId, now); - }); + // Add any already downloaded image to the list once + images.forEach(image -> lastTimeUsedByImageId.putIfAbsent(image.getId(), now)); - lastTimeUsedByImageId.entrySet().stream() - .filter(entry -> Duration.between(entry.getValue(), now).minus(minAgeImageGc).isNegative()) + // Update last used time for all current containers + containers.forEach(container -> lastTimeUsedByImageId.put(container.getImageId(), now)); + + // Return list of images that have been used within minImageAgeToDelete + return lastTimeUsedByImageId.entrySet().stream() + .filter(entry -> Duration.between(entry.getValue(), now).minus(minImageAgeToDelete).isNegative()) .map(Map.Entry::getKey) - .forEach(image -> { - String imageToSpare = image; - do { - Image sparedImage = filteredDockerImagesByImageId.remove(imageToSpare); - imageToSpare = sparedImage == null ? "" : sparedImage.getParentId(); - } while (!imageToSpare.isEmpty()); - }); - return filteredDockerImagesByImageId; + .collect(Collectors.toSet()); + } + + /** + * Attemps to make dockerImages which may be image tags or image ids to image ids. This only works + * if the given tag is actually present locally. This is fine, because if it isn't - we can't delete + * it, so no harm done. + */ + private Set dockerImageToImageIds(List dockerImages, List images) { + Map imageIdByImageTag = images.stream() + .flatMap(image -> Optional.ofNullable(image.getRepoTags()) + .map(Stream::of) + .orElseGet(Stream::empty) + .map(repoTag -> new Pair<>(repoTag, image.getId()))) + .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); + + return dockerImages.stream() + .map(DockerImage::asString) + .map(tag -> imageIdByImageTag.getOrDefault(tag, tag)) + .collect(Collectors.toSet()); } /** - * Returns true if ancestor is a parent or grand-parent or grand-grand-parent, etc. of img + * @return true if ancestor is a parent or grand-parent or grand-grand-parent, etc. of img */ private boolean imageIsDescendantOf(Map imageIdToImage, String img, String ancestor) { while (imageIdToImage.containsKey(img)) { diff --git a/docker-api/src/test/java/com/yahoo/vespa/hosted/dockerapi/DockerImageGarbageCollectionTest.java b/docker-api/src/test/java/com/yahoo/vespa/hosted/dockerapi/DockerImageGarbageCollectionTest.java index a78c9280e4e..5287d2cb45d 100644 --- a/docker-api/src/test/java/com/yahoo/vespa/hosted/dockerapi/DockerImageGarbageCollectionTest.java +++ b/docker-api/src/test/java/com/yahoo/vespa/hosted/dockerapi/DockerImageGarbageCollectionTest.java @@ -5,145 +5,206 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.github.dockerjava.api.model.Image; +import com.yahoo.test.ManualClock; import org.junit.Test; import java.io.IOException; import java.time.Duration; -import java.time.Instant; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.Assert.assertThat; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; /** * @author freva */ public class DockerImageGarbageCollectionTest { + + private final ImageGcTester gcTester = new ImageGcTester(); + @Test public void noImagesMeansNoUnusedImages() { - new ImageGcTester(0) - .withExistingImages() - .expectUnusedImages(); + gcTester.withExistingImages() + .expectDeletedImages(); } @Test public void singleImageWithoutContainersIsUnused() { - new ImageGcTester(0) - .withExistingImages(new ImageBuilder("image-1")) - .expectUnusedImages("image-1"); + gcTester.withExistingImages(new ImageBuilder("image-1")) + // Even though nothing is using the image, we will keep it for at least 1h + .expectDeletedImagesAfterMinutes(0) + .expectDeletedImagesAfterMinutes(30) + .expectDeletedImagesAfterMinutes(30, "image-1"); } @Test public void singleImageWithContainerIsUsed() { - new ImageGcTester(0) - .withExistingImages(ImageBuilder.forId("image-1")) + gcTester.withExistingImages(ImageBuilder.forId("image-1")) .andExistingContainers(ContainerBuilder.forId("container-1").withImageId("image-1")) - .expectUnusedImages(); + .expectDeletedImages(); } @Test public void multipleUnusedImagesAreIdentified() { - new ImageGcTester(0) - .withExistingImages( + gcTester.withExistingImages( ImageBuilder.forId("image-1"), ImageBuilder.forId("image-2")) - .expectUnusedImages("image-1", "image-2"); + .expectDeletedImages("image-1", "image-2"); } @Test public void multipleUnusedLeavesAreIdentified() { - new ImageGcTester(0) - .withExistingImages( + gcTester.withExistingImages( ImageBuilder.forId("parent-image"), ImageBuilder.forId("image-1").withParentId("parent-image"), ImageBuilder.forId("image-2").withParentId("parent-image")) - .expectUnusedImages("image-1", "image-2", "parent-image"); + .expectDeletedImages("image-1", "image-2", "parent-image"); } @Test public void unusedLeafWithUsedSiblingIsIdentified() { - new ImageGcTester(0) - .withExistingImages( + gcTester.withExistingImages( ImageBuilder.forId("parent-image"), ImageBuilder.forId("image-1").withParentId("parent-image").withTags("latest"), ImageBuilder.forId("image-2").withParentId("parent-image").withTags("1.24")) .andExistingContainers(ContainerBuilder.forId("vespa-node-1").withImageId("image-1")) - .expectUnusedImages("1.24"); + .expectDeletedImages("1.24"); // Deleting the only tag will delete the image } @Test public void unusedImagesWithMultipleTags() { - new ImageGcTester(0) - .withExistingImages( + gcTester.withExistingImages( ImageBuilder.forId("parent-image"), ImageBuilder.forId("image-1").withParentId("parent-image") .withTags("vespa-6", "vespa-6.28", "vespa:latest")) - .expectUnusedImages("vespa-6", "vespa-6.28", "vespa:latest", "parent-image"); + .expectDeletedImages("vespa-6", "vespa-6.28", "vespa:latest", "parent-image"); } @Test public void taggedImageWithNoContainersIsUnused() { - new ImageGcTester(0) - .withExistingImages(ImageBuilder.forId("image-1").withTags("vespa-6")) - .expectUnusedImages("vespa-6"); + gcTester.withExistingImages(ImageBuilder.forId("image-1").withTags("vespa-6")) + .expectDeletedImages("vespa-6"); } @Test public void unusedImagesWithSimpleImageGc() { - new ImageGcTester(20) + gcTester.withExistingImages(ImageBuilder.forId("parent-image")) + .expectDeletedImagesAfterMinutes(30) .withExistingImages( - ImageBuilder.forId("parent-image").withLastUsedMinutesAgo(25), - ImageBuilder.forId("image-1").withParentId("parent-image").withLastUsedMinutesAgo(5)) - .expectUnusedImages(); + ImageBuilder.forId("parent-image"), + ImageBuilder.forId("image-1").withParentId("parent-image")) + .expectDeletedImagesAfterMinutes(0) + .expectDeletedImagesAfterMinutes(30) + // At this point, parent-image has been unused for 1h, but image-1 depends on parent-image and it has + // only been unused for 30m, so we cannot delete parent-image yet. 30 mins later both can be removed + .expectDeletedImagesAfterMinutes(30, "image-1", "parent-image"); } @Test - public void unusedImagesWithImageGc() { - new ImageGcTester(20) - .withExistingImages( - ImageBuilder.forId("parent-1").withLastUsedMinutesAgo(40), - ImageBuilder.forId("parent-2").withTags("p-tag:1").withLastUsedMinutesAgo(10), - ImageBuilder.forId("image-1-1").withParentId("parent-1").withTags("i-tag:1", "i-tag:2", "i-tag-3").withLastUsedMinutesAgo(5), - ImageBuilder.forId("image-1-2").withParentId("parent-1").withLastUsedMinutesAgo(25), - ImageBuilder.forId("image-2-1").withParentId("parent-2").withTags("i-tag:4").withLastUsedMinutesAgo(30)) - .andExistingContainers( - ContainerBuilder.forId("cont-1").withImageId("image-1-1")) - .expectUnusedImages("image-1-2", "i-tag:4"); + public void reDownloadingImageIsNotImmediatelyDeleted() { + gcTester.withExistingImages(ImageBuilder.forId("image")) + .expectDeletedImages("image") // After 1h we delete image + .expectDeletedImagesAfterMinutes(0) // image is immediately re-downloaded, but is not deleted + .expectDeletedImagesAfterMinutes(10) + .expectDeletedImages("image"); // 1h after re-download it is deleted again } - private static class ImageGcTester { - private static DockerImageGarbageCollector imageGC; - private List existingImages = Collections.emptyList(); - private List existingContainers = Collections.emptyList(); + /** Same scenario as in {@link #multipleUnusedImagesAreIdentified()} */ + @Test + public void doesNotDeleteExcludedByIdImages() { + gcTester.withExistingImages( + ImageBuilder.forId("parent-image"), + ImageBuilder.forId("image-1").withParentId("parent-image"), + ImageBuilder.forId("image-2").withParentId("parent-image")) + // Normally, image-1 and parent-image should also be deleted, but because we exclude image-1 + // we cannot delete parent-image, so only image-2 is deleted + .expectDeletedImages(Collections.singletonList("image-1"), "image-2"); + } - private ImageGcTester(int imageGcMinTimeInMinutes) { - imageGC = new DockerImageGarbageCollector(Duration.ofMinutes(imageGcMinTimeInMinutes)); - } + /** Same as in {@link #doesNotDeleteExcludedByIdImages()} but with tags */ + @Test + public void doesNotDeleteExcludedByTagImages() { + gcTester.withExistingImages( + ImageBuilder.forId("parent-image").withTags("rhel-6"), + ImageBuilder.forId("image-1").withParentId("parent-image").withTags("vespa:6.288.16"), + ImageBuilder.forId("image-2").withParentId("parent-image").withTags("vespa:6.289.94")) + .expectDeletedImages(Collections.singletonList("vespa:6.288.16"), "vespa:6.289.94"); + } - private ImageGcTester withExistingImages(final ImageBuilder... images) { - this.existingImages = Arrays.stream(images) + @Test + public void exludingNotDownloadedImageIsNoop() { + gcTester.withExistingImages( + ImageBuilder.forId("parent-image").withTags("rhel-6"), + ImageBuilder.forId("image-1").withParentId("parent-image").withTags("vespa:6.288.16"), + ImageBuilder.forId("image-2").withParentId("parent-image").withTags("vespa:6.289.94")) + .expectDeletedImages(Collections.singletonList("vespa:6.300.1"), "vespa:6.288.16", "vespa:6.289.94", "rhel-6"); + } + + private class ImageGcTester { + private final DockerImpl docker = mock(DockerImpl.class); + private final ManualClock clock = new ManualClock(); + private final DockerImageGarbageCollector imageGC = new DockerImageGarbageCollector(docker, clock); + private final Map numDeletes = new HashMap<>(); + private boolean initialized = false; + + private ImageGcTester withExistingImages(ImageBuilder... images) { + when(docker.listAllImages()).thenReturn(Arrays.stream(images) .map(ImageBuilder::toImage) - .collect(Collectors.toList()); + .collect(Collectors.toList())); return this; } - private ImageGcTester andExistingContainers(final ContainerBuilder... containers) { - this.existingContainers = Arrays.stream(containers) + private ImageGcTester andExistingContainers(ContainerBuilder... containers) { + when(docker.listAllContainers()).thenReturn(Arrays.stream(containers) .map(ContainerBuilder::toContainer) - .collect(Collectors.toList()); + .collect(Collectors.toList())); return this; } - private void expectUnusedImages(final String... imageIds) { - final List expectedUnusedImages = Arrays.stream(imageIds) + private ImageGcTester expectDeletedImages(String... imageIds) { + return expectDeletedImagesAfterMinutes(60, imageIds); + } + + private ImageGcTester expectDeletedImages(List except, String... imageIds) { + return expectDeletedImagesAfterMinutes(60, except, imageIds); + } + private ImageGcTester expectDeletedImagesAfterMinutes(int minutesAfter, String... imageIds) { + return expectDeletedImagesAfterMinutes(minutesAfter, Collections.emptyList(), imageIds); + } + + private ImageGcTester expectDeletedImagesAfterMinutes(int minutesAfter, List except, String... imageIds) { + if (!initialized) { + // Run once with a very long expiry to initialize internal state of existing images + imageGC.deleteUnusedDockerImages(Collections.emptyList(), Duration.ofDays(999)); + initialized = true; + } + + clock.advance(Duration.ofMinutes(minutesAfter)); + + imageGC.deleteUnusedDockerImages( + except.stream().map(DockerImage::new).collect(Collectors.toList()), + Duration.ofHours(1).minusSeconds(1)); + + Arrays.stream(imageIds) .map(DockerImage::new) - .collect(Collectors.toList()); + .forEach(image -> { + int newValue = numDeletes.getOrDefault(image, 0) + 1; + numDeletes.put(image, newValue); + verify(docker, times(newValue)).deleteImage(eq(image)); + }); - assertThat(imageGC.getUnusedDockerImages(existingImages, existingContainers), is(expectedUnusedImages)); + verify(docker, times(numDeletes.values().stream().mapToInt(i -> i).sum())).deleteImage(any()); + return this; } } @@ -185,11 +246,6 @@ public class DockerImageGarbageCollectionTest { private static ImageBuilder forId(String id) { return new ImageBuilder(id); } private ImageBuilder withParentId(String parentId) { this.parentId = parentId; return this; } private ImageBuilder withTags(String... tags) { this.repoTags = tags; return this; } - private ImageBuilder withLastUsedMinutesAgo(int minutesAgo) { - ImageGcTester.imageGC.updateLastUsedTimeFor(id, Instant.now().minus(Duration.ofMinutes(minutesAgo))); - return this; - } - private Image toImage() { return createFrom(Image.class, this); } } -- cgit v1.2.3