From db0ebe4ac1f2bde69f892ab11d3a9ee9fc24bbba Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Wed, 22 Jul 2020 15:49:21 +0200 Subject: Disable node-agent tasks --- .../src/main/java/com/yahoo/vespa/flags/Flags.java | 3 ++- .../node/admin/maintenance/StorageMaintainer.java | 9 ++++--- .../node/admin/maintenance/acl/AclMaintainer.java | 5 +++- .../identity/AthenzCredentialsMaintainer.java | 5 +++- .../node/admin/nodeagent/NodeAgentContext.java | 6 ++++- .../node/admin/nodeagent/NodeAgentContextImpl.java | 24 +++++++++++++++-- .../hosted/node/admin/nodeagent/NodeAgentTask.java | 30 ++++++++++++++++++++++ .../admin/nodeagent/NodeAgentContextImplTest.java | 22 ++++++++++++++++ 8 files changed, 95 insertions(+), 9 deletions(-) create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 818fd4db5cd..cc3cb3adc85 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -71,7 +71,8 @@ public class Flags { public static final UnboundListFlag DISABLED_HOST_ADMIN_TASKS = defineListFlag( "disabled-host-admin-tasks", List.of(), String.class, - "List of host-admin task names (as they appear in the log, e.g. root>main>UpgradeTask) that should be skipped", + "List of host-admin task names (as they appear in the log, e.g. root>main>UpgradeTask), or some node-agent " + + "functionality (see NodeAgentTask), that should be skipped", "Takes effect on next host admin tick", HOSTNAME, NODE_TYPE); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java index 30ca2e0d218..a5efec1dcb7 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java @@ -5,7 +5,6 @@ import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.NodeType; -import java.util.logging.Level; import com.yahoo.vespa.hosted.dockerapi.Container; import com.yahoo.vespa.hosted.dockerapi.ContainerName; import com.yahoo.vespa.hosted.node.admin.component.TaskContext; @@ -16,8 +15,9 @@ import com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanupRule; import com.yahoo.vespa.hosted.node.admin.maintenance.disk.LinearCleanupRule; import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; -import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentTask; import com.yahoo.vespa.hosted.node.admin.task.util.file.DiskSize; +import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; import com.yahoo.vespa.hosted.node.admin.task.util.process.Terminal; @@ -37,8 +37,8 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.function.Function; +import java.util.logging.Level; import java.util.logging.Logger; -import java.util.stream.Stream; import static com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanupRule.Priority; import static com.yahoo.yolean.Exceptions.uncheck; @@ -107,6 +107,8 @@ public class StorageMaintainer { } public boolean cleanDiskIfFull(NodeAgentContext context) { + if (context.isDisabled(NodeAgentTask.DiskCleanup)) return false; + double totalBytes = context.node().diskSize().bytes(); // Delete enough bytes to get below 70% disk usage, but only if we are already using more than 80% disk long bytesToRemove = diskUsageFor(context) @@ -148,6 +150,7 @@ public class StorageMaintainer { /** Checks if container has any new coredumps, reports and archives them if so */ public void handleCoreDumpsForContainer(NodeAgentContext context, Optional container) { + if (context.isDisabled(NodeAgentTask.CoreDumps)) return; coredumpHandler.converge(context, () -> getCoredumpNodeAttributes(context, container)); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java index 360cea8a60d..fe6b29402b5 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java @@ -2,9 +2,9 @@ package com.yahoo.vespa.hosted.node.admin.maintenance.acl; import com.google.common.net.InetAddresses; -import java.util.logging.Level; import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentTask; import com.yahoo.vespa.hosted.node.admin.task.util.file.Editor; import com.yahoo.vespa.hosted.node.admin.task.util.file.LineEditor; import com.yahoo.vespa.hosted.node.admin.task.util.network.IPAddresses; @@ -18,6 +18,7 @@ import java.nio.file.Path; import java.util.List; import java.util.function.Consumer; import java.util.function.Supplier; +import java.util.logging.Level; import java.util.logging.Logger; import static com.yahoo.yolean.Exceptions.uncheck; @@ -51,6 +52,8 @@ public class AclMaintainer { // ip(6)tables operate while having the xtables lock, run with synchronized to prevent multiple NodeAgents // invoking ip(6)tables concurrently. public synchronized void converge(NodeAgentContext context) { + if (context.isDisabled(NodeAgentTask.AclMaintainer)) return; + // Apply acl to the filter table editFlushOnError(context, IPVersion.IPv4, "filter", FilterTableLineEditor.from(context.acl(), IPVersion.IPv4)); editFlushOnError(context, IPVersion.IPv6, "filter", FilterTableLineEditor.from(context.acl(), IPVersion.IPv6)); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java index 3320851a36c..d6c08a820cd 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java @@ -1,7 +1,6 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.maintenance.identity; -import java.util.logging.Level; import com.yahoo.security.KeyAlgorithm; import com.yahoo.security.KeyStoreType; import com.yahoo.security.KeyUtils; @@ -24,6 +23,7 @@ import com.yahoo.vespa.athenz.utils.SiaUtils; import com.yahoo.vespa.hosted.dockerapi.ContainerName; import com.yahoo.vespa.hosted.node.admin.component.ConfigServerInfo; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentTask; import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; @@ -46,6 +46,7 @@ import java.time.Instant; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.logging.Level; import java.util.logging.Logger; /** @@ -108,6 +109,8 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { } public boolean converge(NodeAgentContext context) { + if (context.isDisabled(NodeAgentTask.CredentialsMaintainer)) return false; + try { context.log(logger, Level.FINE, "Checking certificate"); Path containerSiaDirectory = context.pathOnHostFromPathInNode(CONTAINER_SIA_DIRECTORY); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java index d589000c07e..872b8a8096b 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java @@ -44,6 +44,10 @@ public interface NodeAgentContext extends TaskContext { String vespaUserOnHost(); + default boolean isDisabled(NodeAgentTask task) { + return false; + }; + /** * The vcpu value in NodeSpec is multiplied by the speedup factor per cpu core compared to a historical baseline * for a particular cpu generation of the host (see flavors.def cpuSpeedup). @@ -52,7 +56,7 @@ public interface NodeAgentContext extends TaskContext { */ double unscaledVcpu(); - /** The file system used by the NodeAgentContext. All paths must have the same provider. */ + /** The file system used by the NodeAgentContext. All paths must have the same provider. */ FileSystem fileSystem(); /** diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java index 9f0c8d47d64..c7c0675c30e 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java @@ -7,6 +7,10 @@ import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.athenz.api.AthenzIdentity; import com.yahoo.vespa.athenz.api.AthenzService; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.hosted.dockerapi.ContainerName; import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.Acl; import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; @@ -18,6 +22,7 @@ import java.nio.file.Path; import java.nio.file.ProviderMismatchException; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.function.Function; import java.util.logging.Level; import java.util.logging.Logger; @@ -40,10 +45,11 @@ public class NodeAgentContextImpl implements NodeAgentContext { private final String vespaUser; private final String vespaUserOnHost; private final double cpuSpeedup; + private final Set disabledNodeAgentTasks; public NodeAgentContextImpl(NodeSpec node, Acl acl, AthenzIdentity identity, DockerNetworking dockerNetworking, ZoneApi zone, - FileSystem fileSystem, + FileSystem fileSystem, FlagSource flagSource, Path pathToContainerStorage, Path pathToVespaHome, String vespaUser, String vespaUserOnHost, double cpuSpeedup) { if (cpuSpeedup <= 0) @@ -55,13 +61,15 @@ public class NodeAgentContextImpl implements NodeAgentContext { this.identity = Objects.requireNonNull(identity); this.dockerNetworking = Objects.requireNonNull(dockerNetworking); this.zone = Objects.requireNonNull(zone); - this.fileSystem = fileSystem; + this.fileSystem = Objects.requireNonNull(fileSystem); this.pathToNodeRootOnHost = requireValidPath(pathToContainerStorage).resolve(containerName.asString()); this.pathToVespaHome = requireValidPath(pathToVespaHome); this.logPrefix = containerName.asString() + ": "; this.vespaUser = vespaUser; this.vespaUserOnHost = vespaUserOnHost; this.cpuSpeedup = cpuSpeedup; + this.disabledNodeAgentTasks = NodeAgentTask.fromString( + Flags.DISABLED_HOST_ADMIN_TASKS.bindTo(flagSource).with(FetchVector.Dimension.HOSTNAME, node.hostname()).value()); } @Override @@ -104,6 +112,11 @@ public class NodeAgentContextImpl implements NodeAgentContext { return vespaUserOnHost; } + @Override + public boolean isDisabled(NodeAgentTask task) { + return disabledNodeAgentTasks.contains(task); + } + @Override public double unscaledVcpu() { return node.vcpu() / cpuSpeedup; @@ -212,6 +225,7 @@ public class NodeAgentContextImpl implements NodeAgentContext { private String vespaUser; private String vespaUserOnHost; private FileSystem fileSystem = FileSystems.getDefault(); + private FlagSource flagSource; private double cpuSpeedUp = 1; public Builder(NodeSpec node) { @@ -268,6 +282,11 @@ public class NodeAgentContextImpl implements NodeAgentContext { return this; } + public Builder flagSource(FlagSource flagSource) { + this.flagSource = flagSource; + return this; + } + public Builder cpuSpeedUp(double cpuSpeedUp) { this.cpuSpeedUp = cpuSpeedUp; return this; @@ -301,6 +320,7 @@ public class NodeAgentContextImpl implements NodeAgentContext { } }), fileSystem, + Optional.ofNullable(flagSource).orElseGet(InMemoryFlagSource::new), fileSystem.getPath("/home/docker/container-storage"), fileSystem.getPath("/opt/vespa"), Optional.ofNullable(vespaUser).orElse("vespa"), diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java new file mode 100644 index 00000000000..d57c680e190 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java @@ -0,0 +1,30 @@ +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public enum NodeAgentTask { + + // The full task name is prefixed with 'node>', e.g. 'node>DiskCleanup' + DiskCleanup, + CoreDumps, + CredentialsMaintainer, + AclMaintainer; + + private static final Map tasksByName = Arrays.stream(NodeAgentTask.values()) + .collect(Collectors.toUnmodifiableMap(NodeAgentTask::taskName, n -> n)); + + private final String taskName; + NodeAgentTask() { + this.taskName = "node>" + name(); + } + + public String taskName() { return taskName; } + + public static Set fromString(List tasks) { + return tasks.stream().filter(tasksByName::containsKey).map(tasksByName::get).collect(Collectors.toUnmodifiableSet()); + } +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java index 9bcbce849af..b7e0a2a1d97 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java @@ -2,14 +2,19 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent; import com.yahoo.config.provision.DockerImage; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.test.file.TestFileSystem; import org.junit.Test; import java.nio.file.FileSystem; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.List; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; /** * @author freva @@ -84,6 +89,23 @@ public class NodeAgentContextImplTest { assertRewrite("docker.tld/vespa/hosted:1.2.3", "/opt/vespa/log", "/opt/vespa/log"); } + @Test + public void disabledTasksTest() { + NodeAgentContext context1 = createContextWithDisabledTasks(); + assertFalse(context1.isDisabled(NodeAgentTask.DiskCleanup)); + assertFalse(context1.isDisabled(NodeAgentTask.CoreDumps)); + + NodeAgentContext context2 = createContextWithDisabledTasks("root>UpgradeTask", "DiskCleanup", "node>CoreDumps"); + assertFalse(context2.isDisabled(NodeAgentTask.DiskCleanup)); + assertTrue(context2.isDisabled(NodeAgentTask.CoreDumps)); + } + + private static NodeAgentContext createContextWithDisabledTasks(String... tasks) { + InMemoryFlagSource flagSource = new InMemoryFlagSource(); + flagSource.withListFlag(Flags.DISABLED_HOST_ADMIN_TASKS.id(), List.of(tasks), String.class); + return new NodeAgentContextImpl.Builder("node123").flagSource(flagSource).build(); + } + private static void assertRewrite(String dockerImage, String path, String expected) { NodeAgentContext context = new NodeAgentContextImpl.Builder("node123") .nodeSpecBuilder(ns -> ns.wantedDockerImage(DockerImage.fromString(dockerImage))) -- cgit v1.2.3