aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2020-07-23 09:35:44 +0200
committerGitHub <noreply@github.com>2020-07-23 09:35:44 +0200
commit48a01c4ca475a6601f66cc1d0e06a8f3d5f819b5 (patch)
treea85c95a24cc19dc6a22f922ad7b30b3d311b617e
parent7973c1ec0805711dd568e46c59e7b99bb36696dc (diff)
parentdb0ebe4ac1f2bde69f892ab11d3a9ee9fc24bbba (diff)
Merge pull request #13937 from vespa-engine/freva/disable-node-agent-tasks
[VESPA-18202] Disable node-agent tasks
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java3
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java9
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java5
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java5
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java24
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java30
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java22
8 files changed, 95 insertions, 9 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index 818fd4db5cd..cc3cb3adc85 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -71,7 +71,8 @@ public class Flags {
public static final UnboundListFlag<String> DISABLED_HOST_ADMIN_TASKS = defineListFlag(
"disabled-host-admin-tasks", List.of(), String.class,
- "List of host-admin task names (as they appear in the log, e.g. root>main>UpgradeTask) that should be skipped",
+ "List of host-admin task names (as they appear in the log, e.g. root>main>UpgradeTask), or some node-agent " +
+ "functionality (see NodeAgentTask), that should be skipped",
"Takes effect on next host admin tick",
HOSTNAME, NODE_TYPE);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
index 30ca2e0d218..a5efec1dcb7 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
@@ -5,7 +5,6 @@ import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.NodeType;
-import java.util.logging.Level;
import com.yahoo.vespa.hosted.dockerapi.Container;
import com.yahoo.vespa.hosted.dockerapi.ContainerName;
import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
@@ -16,8 +15,9 @@ import com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanupRule;
import com.yahoo.vespa.hosted.node.admin.maintenance.disk.LinearCleanupRule;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
-import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentTask;
import com.yahoo.vespa.hosted.node.admin.task.util.file.DiskSize;
+import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder;
import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath;
import com.yahoo.vespa.hosted.node.admin.task.util.process.Terminal;
@@ -37,8 +37,8 @@ import java.util.Map;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
+import java.util.logging.Level;
import java.util.logging.Logger;
-import java.util.stream.Stream;
import static com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanupRule.Priority;
import static com.yahoo.yolean.Exceptions.uncheck;
@@ -107,6 +107,8 @@ public class StorageMaintainer {
}
public boolean cleanDiskIfFull(NodeAgentContext context) {
+ if (context.isDisabled(NodeAgentTask.DiskCleanup)) return false;
+
double totalBytes = context.node().diskSize().bytes();
// Delete enough bytes to get below 70% disk usage, but only if we are already using more than 80% disk
long bytesToRemove = diskUsageFor(context)
@@ -148,6 +150,7 @@ public class StorageMaintainer {
/** Checks if container has any new coredumps, reports and archives them if so */
public void handleCoreDumpsForContainer(NodeAgentContext context, Optional<Container> container) {
+ if (context.isDisabled(NodeAgentTask.CoreDumps)) return;
coredumpHandler.converge(context, () -> getCoredumpNodeAttributes(context, container));
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java
index 360cea8a60d..fe6b29402b5 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java
@@ -2,9 +2,9 @@
package com.yahoo.vespa.hosted.node.admin.maintenance.acl;
import com.google.common.net.InetAddresses;
-import java.util.logging.Level;
import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentTask;
import com.yahoo.vespa.hosted.node.admin.task.util.file.Editor;
import com.yahoo.vespa.hosted.node.admin.task.util.file.LineEditor;
import com.yahoo.vespa.hosted.node.admin.task.util.network.IPAddresses;
@@ -18,6 +18,7 @@ import java.nio.file.Path;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Supplier;
+import java.util.logging.Level;
import java.util.logging.Logger;
import static com.yahoo.yolean.Exceptions.uncheck;
@@ -51,6 +52,8 @@ public class AclMaintainer {
// ip(6)tables operate while having the xtables lock, run with synchronized to prevent multiple NodeAgents
// invoking ip(6)tables concurrently.
public synchronized void converge(NodeAgentContext context) {
+ if (context.isDisabled(NodeAgentTask.AclMaintainer)) return;
+
// Apply acl to the filter table
editFlushOnError(context, IPVersion.IPv4, "filter", FilterTableLineEditor.from(context.acl(), IPVersion.IPv4));
editFlushOnError(context, IPVersion.IPv6, "filter", FilterTableLineEditor.from(context.acl(), IPVersion.IPv6));
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java
index 3320851a36c..d6c08a820cd 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java
@@ -1,7 +1,6 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.maintenance.identity;
-import java.util.logging.Level;
import com.yahoo.security.KeyAlgorithm;
import com.yahoo.security.KeyStoreType;
import com.yahoo.security.KeyUtils;
@@ -24,6 +23,7 @@ import com.yahoo.vespa.athenz.utils.SiaUtils;
import com.yahoo.vespa.hosted.dockerapi.ContainerName;
import com.yahoo.vespa.hosted.node.admin.component.ConfigServerInfo;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentTask;
import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder;
import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath;
@@ -46,6 +46,7 @@ import java.time.Instant;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.logging.Level;
import java.util.logging.Logger;
/**
@@ -108,6 +109,8 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer {
}
public boolean converge(NodeAgentContext context) {
+ if (context.isDisabled(NodeAgentTask.CredentialsMaintainer)) return false;
+
try {
context.log(logger, Level.FINE, "Checking certificate");
Path containerSiaDirectory = context.pathOnHostFromPathInNode(CONTAINER_SIA_DIRECTORY);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java
index d589000c07e..872b8a8096b 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java
@@ -44,6 +44,10 @@ public interface NodeAgentContext extends TaskContext {
String vespaUserOnHost();
+ default boolean isDisabled(NodeAgentTask task) {
+ return false;
+ };
+
/**
* The vcpu value in NodeSpec is multiplied by the speedup factor per cpu core compared to a historical baseline
* for a particular cpu generation of the host (see flavors.def cpuSpeedup).
@@ -52,7 +56,7 @@ public interface NodeAgentContext extends TaskContext {
*/
double unscaledVcpu();
- /** The file system used by the NodeAgentContext. All paths must have the same provider. */
+ /** The file system used by the NodeAgentContext. All paths must have the same provider. */
FileSystem fileSystem();
/**
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java
index 9f0c8d47d64..c7c0675c30e 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java
@@ -7,6 +7,10 @@ import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.config.provision.zone.ZoneId;
import com.yahoo.vespa.athenz.api.AthenzIdentity;
import com.yahoo.vespa.athenz.api.AthenzService;
+import com.yahoo.vespa.flags.FetchVector;
+import com.yahoo.vespa.flags.FlagSource;
+import com.yahoo.vespa.flags.Flags;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.hosted.dockerapi.ContainerName;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.Acl;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
@@ -18,6 +22,7 @@ import java.nio.file.Path;
import java.nio.file.ProviderMismatchException;
import java.util.Objects;
import java.util.Optional;
+import java.util.Set;
import java.util.function.Function;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -40,10 +45,11 @@ public class NodeAgentContextImpl implements NodeAgentContext {
private final String vespaUser;
private final String vespaUserOnHost;
private final double cpuSpeedup;
+ private final Set<NodeAgentTask> disabledNodeAgentTasks;
public NodeAgentContextImpl(NodeSpec node, Acl acl, AthenzIdentity identity,
DockerNetworking dockerNetworking, ZoneApi zone,
- FileSystem fileSystem,
+ FileSystem fileSystem, FlagSource flagSource,
Path pathToContainerStorage, Path pathToVespaHome,
String vespaUser, String vespaUserOnHost, double cpuSpeedup) {
if (cpuSpeedup <= 0)
@@ -55,13 +61,15 @@ public class NodeAgentContextImpl implements NodeAgentContext {
this.identity = Objects.requireNonNull(identity);
this.dockerNetworking = Objects.requireNonNull(dockerNetworking);
this.zone = Objects.requireNonNull(zone);
- this.fileSystem = fileSystem;
+ this.fileSystem = Objects.requireNonNull(fileSystem);
this.pathToNodeRootOnHost = requireValidPath(pathToContainerStorage).resolve(containerName.asString());
this.pathToVespaHome = requireValidPath(pathToVespaHome);
this.logPrefix = containerName.asString() + ": ";
this.vespaUser = vespaUser;
this.vespaUserOnHost = vespaUserOnHost;
this.cpuSpeedup = cpuSpeedup;
+ this.disabledNodeAgentTasks = NodeAgentTask.fromString(
+ Flags.DISABLED_HOST_ADMIN_TASKS.bindTo(flagSource).with(FetchVector.Dimension.HOSTNAME, node.hostname()).value());
}
@Override
@@ -105,6 +113,11 @@ public class NodeAgentContextImpl implements NodeAgentContext {
}
@Override
+ public boolean isDisabled(NodeAgentTask task) {
+ return disabledNodeAgentTasks.contains(task);
+ }
+
+ @Override
public double unscaledVcpu() {
return node.vcpu() / cpuSpeedup;
}
@@ -212,6 +225,7 @@ public class NodeAgentContextImpl implements NodeAgentContext {
private String vespaUser;
private String vespaUserOnHost;
private FileSystem fileSystem = FileSystems.getDefault();
+ private FlagSource flagSource;
private double cpuSpeedUp = 1;
public Builder(NodeSpec node) {
@@ -268,6 +282,11 @@ public class NodeAgentContextImpl implements NodeAgentContext {
return this;
}
+ public Builder flagSource(FlagSource flagSource) {
+ this.flagSource = flagSource;
+ return this;
+ }
+
public Builder cpuSpeedUp(double cpuSpeedUp) {
this.cpuSpeedUp = cpuSpeedUp;
return this;
@@ -301,6 +320,7 @@ public class NodeAgentContextImpl implements NodeAgentContext {
}
}),
fileSystem,
+ Optional.ofNullable(flagSource).orElseGet(InMemoryFlagSource::new),
fileSystem.getPath("/home/docker/container-storage"),
fileSystem.getPath("/opt/vespa"),
Optional.ofNullable(vespaUser).orElse("vespa"),
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java
new file mode 100644
index 00000000000..d57c680e190
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java
@@ -0,0 +1,30 @@
+package com.yahoo.vespa.hosted.node.admin.nodeagent;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public enum NodeAgentTask {
+
+ // The full task name is prefixed with 'node>', e.g. 'node>DiskCleanup'
+ DiskCleanup,
+ CoreDumps,
+ CredentialsMaintainer,
+ AclMaintainer;
+
+ private static final Map<String, NodeAgentTask> tasksByName = Arrays.stream(NodeAgentTask.values())
+ .collect(Collectors.toUnmodifiableMap(NodeAgentTask::taskName, n -> n));
+
+ private final String taskName;
+ NodeAgentTask() {
+ this.taskName = "node>" + name();
+ }
+
+ public String taskName() { return taskName; }
+
+ public static Set<NodeAgentTask> fromString(List<String> tasks) {
+ return tasks.stream().filter(tasksByName::containsKey).map(tasksByName::get).collect(Collectors.toUnmodifiableSet());
+ }
+}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java
index 9bcbce849af..b7e0a2a1d97 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java
@@ -2,14 +2,19 @@
package com.yahoo.vespa.hosted.node.admin.nodeagent;
import com.yahoo.config.provision.DockerImage;
+import com.yahoo.vespa.flags.Flags;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.test.file.TestFileSystem;
import org.junit.Test;
import java.nio.file.FileSystem;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.util.List;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
/**
* @author freva
@@ -84,6 +89,23 @@ public class NodeAgentContextImplTest {
assertRewrite("docker.tld/vespa/hosted:1.2.3", "/opt/vespa/log", "/opt/vespa/log");
}
+ @Test
+ public void disabledTasksTest() {
+ NodeAgentContext context1 = createContextWithDisabledTasks();
+ assertFalse(context1.isDisabled(NodeAgentTask.DiskCleanup));
+ assertFalse(context1.isDisabled(NodeAgentTask.CoreDumps));
+
+ NodeAgentContext context2 = createContextWithDisabledTasks("root>UpgradeTask", "DiskCleanup", "node>CoreDumps");
+ assertFalse(context2.isDisabled(NodeAgentTask.DiskCleanup));
+ assertTrue(context2.isDisabled(NodeAgentTask.CoreDumps));
+ }
+
+ private static NodeAgentContext createContextWithDisabledTasks(String... tasks) {
+ InMemoryFlagSource flagSource = new InMemoryFlagSource();
+ flagSource.withListFlag(Flags.DISABLED_HOST_ADMIN_TASKS.id(), List.of(tasks), String.class);
+ return new NodeAgentContextImpl.Builder("node123").flagSource(flagSource).build();
+ }
+
private static void assertRewrite(String dockerImage, String path, String expected) {
NodeAgentContext context = new NodeAgentContextImpl.Builder("node123")
.nodeSpecBuilder(ns -> ns.wantedDockerImage(DockerImage.fromString(dockerImage)))