diff options
10 files changed, 111 insertions, 113 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/FileServer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/FileServer.java index 57d57d16d2f..dcd2720ae3e 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/FileServer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/FileServer.java @@ -12,7 +12,6 @@ import com.yahoo.jrt.StringValue; import com.yahoo.jrt.Supervisor; import com.yahoo.jrt.Transport; import com.yahoo.vespa.config.ConnectionPool; -import com.yahoo.vespa.filedistribution.EmptyFileReferenceData; import com.yahoo.vespa.filedistribution.FileDistributionConnectionPool; import com.yahoo.vespa.filedistribution.FileDownloader; import com.yahoo.vespa.filedistribution.FileReferenceCompressor; @@ -20,9 +19,9 @@ import com.yahoo.vespa.filedistribution.FileReferenceData; import com.yahoo.vespa.filedistribution.FileReferenceDownload; import com.yahoo.vespa.filedistribution.LazyFileReferenceData; import com.yahoo.vespa.filedistribution.LazyTemporaryStorageFileReferenceData; -import com.yahoo.yolean.Exceptions; import java.io.File; import java.io.IOException; +import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; import java.time.Duration; @@ -35,10 +34,15 @@ import java.util.logging.Level; import java.util.logging.Logger; import static com.yahoo.vespa.config.server.filedistribution.FileDistributionUtil.getOtherConfigServersInCluster; +import static com.yahoo.vespa.config.server.filedistribution.FileServer.FileApiErrorCodes.NOT_FOUND; +import static com.yahoo.vespa.config.server.filedistribution.FileServer.FileApiErrorCodes.OK; +import static com.yahoo.vespa.config.server.filedistribution.FileServer.FileApiErrorCodes.TIMEOUT; +import static com.yahoo.vespa.config.server.filedistribution.FileServer.FileApiErrorCodes.TRANSFER_FAILED; import static com.yahoo.vespa.filedistribution.FileReferenceData.CompressionType; import static com.yahoo.vespa.filedistribution.FileReferenceData.CompressionType.gzip; import static com.yahoo.vespa.filedistribution.FileReferenceData.Type; import static com.yahoo.vespa.filedistribution.FileReferenceData.Type.compressed; +import static com.yahoo.yolean.Exceptions.uncheck; public class FileServer { @@ -54,10 +58,11 @@ public class FileServer { private final List<CompressionType> compressionTypes; // compression types to use, in preferred order // TODO: Move to filedistribution module, so that it can be used by both clients and servers - private enum FileApiErrorCodes { + enum FileApiErrorCodes { OK(0, "OK"), NOT_FOUND(1, "File reference not found"), - TIMEOUT(2, "Timeout"); + TIMEOUT(2, "Timeout"), + TRANSFER_FAILED(3, "Failed transferring file"); private final int code; private final String description; FileApiErrorCodes(int code, String description) { @@ -114,29 +119,24 @@ public class FileServer { FileDirectory getRootDir() { return fileDirectory; } void startFileServing(FileReference reference, Receiver target, Set<CompressionType> acceptedCompressionTypes) { - if ( ! fileDirectory.getFile(reference).exists()) return; + File file = fileDirectory.getFile(reference); + if ( ! file.exists()) return; - File file = this.fileDirectory.getFile(reference); - log.log(Level.FINE, () -> "Start serving " + reference + " with file '" + file.getAbsolutePath() + "'"); - FileReferenceData fileData = EmptyFileReferenceData.empty(reference, file.getName()); - try { - fileData = readFileReferenceData(reference, acceptedCompressionTypes); + try (FileReferenceData fileData = fileReferenceData(reference, acceptedCompressionTypes, file)) { + log.log(Level.FINE, () -> "Start serving " + reference.value() + " with file '" + file.getAbsolutePath() + "'"); target.receive(fileData, new ReplayStatus(0, "OK")); log.log(Level.FINE, () -> "Done serving " + reference.value() + " with file '" + file.getAbsolutePath() + "'"); - } catch (IOException e) { - String errorDescription = "For" + reference.value() + ": failed reading file '" + file.getAbsolutePath() + "'"; - log.warning(errorDescription + " for sending to '" + target.toString() + "'. " + e.getMessage()); - target.receive(fileData, new ReplayStatus(1, errorDescription)); + } catch (IOException ioe) { + throw new UncheckedIOException("For " + reference.value() + ": failed reading file '" + file.getAbsolutePath() + "'" + + " for sending to '" + target.toString() + "'. ", ioe); } catch (Exception e) { - log.log(Level.WARNING, "Failed serving " + reference + ": " + Exceptions.toMessageString(e)); - } finally { - fileData.close(); + throw new RuntimeException("Failed serving " + reference.value() + " to '" + target + "': ", e); } } - private FileReferenceData readFileReferenceData(FileReference reference, Set<CompressionType> acceptedCompressionTypes) throws IOException { - File file = this.fileDirectory.getFile(reference); - + private FileReferenceData fileReferenceData(FileReference reference, + Set<CompressionType> acceptedCompressionTypes, + File file) throws IOException { if (file.isDirectory()) { Path tempFile = Files.createTempFile("filereferencedata", reference.value()); CompressionType compressionType = chooseCompressionType(acceptedCompressionTypes); @@ -172,20 +172,21 @@ public class FileServer { Set<CompressionType> acceptedCompressionTypes) { if (Instant.now().isAfter(deadline)) { log.log(Level.INFO, () -> "Deadline exceeded for request for file reference '" + fileReference + "' from " + client); - return FileApiErrorCodes.TIMEOUT; + return TIMEOUT; } - boolean fileExists; try { var fileReferenceDownload = new FileReferenceDownload(fileReference, client, downloadFromOtherSourceIfNotFound); - fileExists = hasFileDownloadIfNeeded(fileReferenceDownload); - if (fileExists) startFileServing(fileReference, receiver, acceptedCompressionTypes); - } catch (IllegalArgumentException e) { - fileExists = false; + boolean fileExists = hasFileDownloadIfNeeded(fileReferenceDownload); + if ( ! fileExists) return NOT_FOUND; + + startFileServing(fileReference, receiver, acceptedCompressionTypes); + } catch (Exception e) { log.warning("Failed serving file reference '" + fileReference + "', request from " + client + " failed with: " + e.getMessage()); + return TRANSFER_FAILED; } - return (fileExists ? FileApiErrorCodes.OK : FileApiErrorCodes.NOT_FOUND); + return OK; } /* Choose the first compression type (list is in preferred order) that matches an accepted compression type, or fail */ diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/rpc/RpcServer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/rpc/RpcServer.java index eee7d6ec63d..d26a22284c0 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/rpc/RpcServer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/rpc/RpcServer.java @@ -518,7 +518,7 @@ public class RpcServer implements Runnable, ConfigActivationListener, TenantList request.parameters().add(new StringValue(fileData.filename())); request.parameters().add(new StringValue(fileData.type().name())); request.parameters().add(new Int64Value(fileData.size())); - // Only add paramter if not gzip, this is default and old clients will not handle the extra parameter + // Only add parameter if not gzip, this is default and old clients will not handle the extra parameter if (fileData.compressionType() != CompressionType.gzip) request.parameters().add(new StringValue(fileData.compressionType().name())); return request; @@ -532,7 +532,7 @@ public class RpcServer implements Runnable, ConfigActivationListener, TenantList request.parameters().add(new DataValue(buf)); invokeRpcIfValidConnection(request); if (request.isError()) { - throw new IllegalArgumentException("Failed delivering reference '" + ref.value() + "' to " + + throw new IllegalArgumentException("Failed delivering part of reference '" + ref.value() + "' to " + target.toString() + " with error: '" + request.errorMessage() + "'."); } else { if (request.returnValues().get(0).asInt32() != 0) { @@ -550,7 +550,8 @@ public class RpcServer implements Runnable, ConfigActivationListener, TenantList request.parameters().add(new StringValue(status.getDescription())); invokeRpcIfValidConnection(request); if (request.isError()) { - throw new IllegalArgumentException("Failed delivering reference '" + fileData.fileReference().value() + "' with file '" + fileData.filename() + "' to " + + throw new IllegalArgumentException("Failed delivering eof for reference '" + fileData.fileReference().value() + + "' with file '" + fileData.filename() + "' to " + target.toString() + " with error: '" + request.errorMessage() + "'."); } else { if (request.returnValues().get(0).asInt32() != 0) { diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/filedistribution/FileServerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/filedistribution/FileServerTest.java index 49458acd60b..c17b68c6d12 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/filedistribution/FileServerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/filedistribution/FileServerTest.java @@ -29,6 +29,7 @@ import static com.yahoo.vespa.filedistribution.FileReferenceData.CompressionType import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; public class FileServerTest { @@ -130,6 +131,25 @@ public class FileServerTest { assertEquals(1, fileServer.downloader().connectionPool().getSize()); } + @Test + public void requireThatErrorsAreHandled() throws IOException, ExecutionException, InterruptedException { + File dir = getFileServerRootDir(); + IOUtils.writeFile(dir + "/12y/f1", "dummy-data", true); + CompletableFuture<byte []> content = new CompletableFuture<>(); + FailingFileReceiver fileReceiver = new FailingFileReceiver(content); + + // Should fail the first time, see FailingFileReceiver + try { + fileServer.startFileServing(new FileReference("12y"), fileReceiver, Set.of(gzip)); + fail("Should have failed"); + } catch (RuntimeException e) { + // expected + } + + fileServer.startFileServing(new FileReference("12y"), fileReceiver, Set.of(gzip)); + assertEquals(new String(content.get()), "dummy-data"); + } + private void writeFile(String dir) throws IOException { File rootDir = getFileServerRootDir(); IOUtils.createDirectory(rootDir + "/" + dir); @@ -153,6 +173,23 @@ public class FileServerTest { } } + private static class FailingFileReceiver implements FileServer.Receiver { + final CompletableFuture<byte []> content; + int counter = 0; + FailingFileReceiver(CompletableFuture<byte []> content) { + this.content = content; + } + @Override + public void receive(FileReferenceData fileData, FileServer.ReplayStatus status) { + counter++; + if (counter <= 1) + throw new RuntimeException("Failed to receive file"); + else { + this.content.complete(fileData.content().array()); + } + } + } + private File getFileServerRootDir() { return fileServer.getRootDir().getRoot(); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java index c426c27418d..58c3b4da5e4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/OsController.java @@ -11,6 +11,7 @@ import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus; import com.yahoo.vespa.hosted.controller.versions.OsVersionTarget; import java.time.Instant; +import java.util.Comparator; import java.util.HashSet; import java.util.Map; import java.util.Objects; @@ -161,9 +162,12 @@ public record OsController(Controller controller) { /** Remove certifications for non-existent OS versions */ public void removeStaleCertifications(OsVersionStatus currentStatus) { try (Mutex lock = curator().lockCertifiedOsVersions()) { - Set<OsVersion> knownVersions = currentStatus.versions().keySet(); + Optional<OsVersion> minKnownVersion = currentStatus.versions().keySet().stream() + .filter(v -> !v.version().isEmpty()) + .min(Comparator.naturalOrder()); + if (minKnownVersion.isEmpty()) return; Set<CertifiedOsVersion> certifiedVersions = new HashSet<>(readCertified()); - if (certifiedVersions.removeIf(cv -> !knownVersions.contains(cv.osVersion()))) { + if (certifiedVersions.removeIf(cv -> cv.osVersion().version().isBefore(minKnownVersion.get().version()))) { curator().writeCertifiedOsVersions(certifiedVersions); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java index af535abce26..6f4052bf0ef 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdaterTest.java @@ -68,12 +68,14 @@ public class OsVersionStatusUpdaterTest { .filter(osVersion -> !osVersion.version().isEmpty()) .collect(Collectors.toSet()); List<OsVersion> versionsToCertify = new ArrayList<>(knownVersions); - versionsToCertify.addAll(List.of(new OsVersion(Version.fromString("95.0.1"), cloud), - new OsVersion(Version.fromString("98.0.2"), cloud))); + OsVersion futureVersion = new OsVersion(Version.fromString("98.0.2"), cloud); // Keep future version + versionsToCertify.addAll(List.of(new OsVersion(Version.fromString("3.11"), cloud), + futureVersion)); for (OsVersion version : versionsToCertify) { tester.controller().os().certify(version.version(), version.cloud(), Version.fromString("1.2.3")); } - assertEquals(knownVersions.size() + 2, certifiedOsVersions(tester).size()); + knownVersions.add(futureVersion); + assertEquals(knownVersions.size() + 1, certifiedOsVersions(tester).size()); statusUpdater.maintain(); assertEquals(knownVersions, certifiedOsVersions(tester)); } diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/EmptyFileReferenceData.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/EmptyFileReferenceData.java deleted file mode 100644 index ea8461b42f3..00000000000 --- a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/EmptyFileReferenceData.java +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.filedistribution; - -import com.yahoo.config.FileReference; - -import java.nio.ByteBuffer; - -public class EmptyFileReferenceData extends FileReferenceData { - - private final byte[] content; - private final long xxhash; - private int contentRead = 0; - - private EmptyFileReferenceData(FileReference fileReference, String filename, Type type, byte[] content, long xxhash) { - super(fileReference, filename, type, CompressionType.gzip); - this.content = content; - this.xxhash = xxhash; - } - - public static FileReferenceData empty(FileReference fileReference, String filename) { - return new EmptyFileReferenceData(fileReference, filename, FileReferenceData.Type.file, new byte[0], 0); - } - - public ByteBuffer content() { - return ByteBuffer.wrap(content); - } - - @Override - public int nextContent(ByteBuffer bb) { - if (contentRead >= content.length) { - return -1; - } else { - int left = content.length - contentRead; - int size = Math.min(bb.remaining(), left); - bb.put(content, contentRead, size); - contentRead += size; - return size; - } - } - - @Override - public long xxhash() { - return xxhash; - } - - @Override - public long size() { - return content.length; - } - - @Override - public void close() { - // no-op - } -} diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReceiver.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReceiver.java index b37fe02226b..a567a3bc4b3 100644 --- a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReceiver.java +++ b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReceiver.java @@ -243,7 +243,7 @@ public class FileReceiver { synchronized (sessions) { if (sessions.containsKey(sessionId)) { retval = 1; - log.severe("Session id " + sessionId + " already exist, impossible. Request from(" + req.target() + ")"); + log.severe("Session id " + sessionId + " already exist, impossible. Request from " + req.target()); } else { try { sessions.put(sessionId, new Session(downloadDirectory, sessionId, reference, diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceData.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceData.java index 3f83cbea506..87f45db5221 100644 --- a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceData.java +++ b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceData.java @@ -10,7 +10,7 @@ import java.nio.ByteBuffer; * * @author hmusum */ -public abstract class FileReferenceData { +public abstract class FileReferenceData implements AutoCloseable { public enum Type { file, compressed } public enum CompressionType { gzip, lz4, zstd } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java index 619dd39ca47..5a791522977 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java @@ -21,6 +21,7 @@ import static com.yahoo.vespa.flags.FetchVector.Dimension.HOSTNAME; import static com.yahoo.vespa.flags.FetchVector.Dimension.NODE_TYPE; import static com.yahoo.vespa.flags.FetchVector.Dimension.TENANT_ID; import static com.yahoo.vespa.flags.FetchVector.Dimension.VESPA_VERSION; +import static com.yahoo.vespa.flags.FetchVector.Dimension.ZONE_ID; /** * Definition for permanent feature flags @@ -351,6 +352,7 @@ public class PermanentFlags { "Takes effect immediately", TENANT_ID); + // TODO: Remove when not in use anymore, replaced by KEEP_FILE_REFERENCES_DAYS public static final UnboundIntFlag KEEP_FILE_REFERENCES_ON_TENANT_NODES = defineIntFlag( "keep-file-references-on-tenant-nodes", 30, "How many days to keep file references on tenant nodes (based on last modification time)", @@ -358,6 +360,20 @@ public class PermanentFlags { APPLICATION_ID ); + public static final UnboundIntFlag KEEP_FILE_REFERENCES_DAYS = defineIntFlag( + "keep-file-references-days", 30, + "How many days to keep file references on tenant nodes (based on last modification time)", + "Takes effect on restart of Docker container", + APPLICATION_ID + ); + + public static final UnboundIntFlag KEEP_FILE_REFERENCES_COUNT = defineIntFlag( + "keep-file-references-count", 20, + "How many file references to keep on tenant nodes (no matter what last modification time is)", + "Takes effect on restart of Docker container", + ZONE_ID, APPLICATION_ID + ); + public static final UnboundIntFlag ENDPOINT_CONNECTION_TTL = defineIntFlag( "endpoint-connection-ttl", 45, "Time to live for connections to endpoints in seconds", diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 7fc248024c3..284306e1e8c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -17,7 +17,6 @@ import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeState; import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.reports.DropDocumentsReport; import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator; -import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException; import com.yahoo.vespa.hosted.node.admin.container.Container; import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations; import com.yahoo.vespa.hosted.node.admin.container.ContainerResources; @@ -484,6 +483,11 @@ public class NodeAgentImpl implements NodeAgent { lastNode = node; } + // Run this here and now, even though we may immediately remove the container below. + // This ensures these maintainers are run even if something fails or returns early. + // These maintainers should also run immediately after starting the container (see below). + container.ifPresent(c -> runImportantContainerMaintainers(context, c)); + switch (node.state()) { case ready, reserved, failed, inactive, parked -> { storageMaintainer.syncLogs(context, true); @@ -508,13 +512,11 @@ public class NodeAgentImpl implements NodeAgent { containerState = STARTING; container = Optional.of(startContainer(context)); containerState = UNKNOWN; + runImportantContainerMaintainers(context, container.get()); } else { container = Optional.of(updateContainerIfNeeded(context, container.get())); } - aclMaintainer.ifPresent(maintainer -> maintainer.converge(context)); - final Optional<Container> finalContainer = container; - wireguardTasks.forEach(task -> task.converge(context, finalContainer.get().id())); startServicesIfNeeded(context); resumeNodeIfNeeded(context); if (healthChecker.isPresent()) { @@ -559,6 +561,11 @@ public class NodeAgentImpl implements NodeAgent { } } + private void runImportantContainerMaintainers(NodeAgentContext context, Container container) { + aclMaintainer.ifPresent(maintainer -> maintainer.converge(context)); + wireguardTasks.forEach(task -> task.converge(context, container.id())); + } + private static void logChangesToNodeSpec(NodeAgentContext context, NodeSpec lastNode, NodeSpec node) { StringBuilder builder = new StringBuilder(); appendIfDifferent(builder, "state", lastNode, node, NodeSpec::state); @@ -600,23 +607,8 @@ public class NodeAgentImpl implements NodeAgent { if (context.node().state() != NodeState.active) return; context.log(logger, "Ask Orchestrator for permission to suspend node"); - try { - orchestrator.suspend(context.hostname().value()); - suspendedInOrchestrator = true; - } catch (OrchestratorException e) { - // Ensure the ACLs are up to date: The reason we're unable to suspend may be because some other - // node is unable to resume because the ACL rules of SOME Docker container is wrong... - // Same can happen with stale WireGuard config, so update that too - try { - aclMaintainer.ifPresent(maintainer -> maintainer.converge(context)); - wireguardTasks.forEach(task -> getContainer(context).ifPresent(c -> task.converge(context, c.id()))); - } catch (RuntimeException suppressed) { - logger.log(Level.WARNING, "Suppressing ACL update failure: " + suppressed); - e.addSuppressed(suppressed); - } - - throw e; - } + orchestrator.suspend(context.hostname().value()); + suspendedInOrchestrator = true; } protected void writeContainerData(NodeAgentContext context, ContainerData containerData) { } |