summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/pom.xml6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerClients.java6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/RealConfigServerClients.java9
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/FlagRepository.java15
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepository.java28
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/package-info.java5
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java3
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java19
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java39
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java108
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java33
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java11
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextFactory.java12
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java65
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java102
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextSupplier.java21
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentFactory.java10
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java259
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java21
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepositoryTest.java41
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java24
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java11
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java106
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java51
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java142
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java220
27 files changed, 883 insertions, 490 deletions
diff --git a/node-admin/pom.xml b/node-admin/pom.xml
index 476902e400c..e1231f2585d 100644
--- a/node-admin/pom.xml
+++ b/node-admin/pom.xml
@@ -49,6 +49,12 @@
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>flags</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
<!-- Compile -->
<dependency>
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerClients.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerClients.java
index 4a2496f4d3e..ab899f9f919 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerClients.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerClients.java
@@ -1,6 +1,7 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.configserver;
+import com.yahoo.vespa.hosted.node.admin.configserver.flags.FlagRepository;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeRepository;
import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator;
import com.yahoo.vespa.hosted.node.admin.configserver.state.State;
@@ -18,7 +19,10 @@ public interface ConfigServerClients {
Orchestrator orchestrator();
/** Get handle to the /state/v1 REST API */
- default State state() { throw new UnsupportedOperationException(); }
+ State state();
+
+ /** Get handle to the /flags/v1 REST API */
+ FlagRepository flagRepository();
void stop();
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/RealConfigServerClients.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/RealConfigServerClients.java
index 6c982bfa71c..af11c300c2b 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/RealConfigServerClients.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/RealConfigServerClients.java
@@ -1,6 +1,8 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.configserver;
+import com.yahoo.vespa.hosted.node.admin.configserver.flags.FlagRepository;
+import com.yahoo.vespa.hosted.node.admin.configserver.flags.RealFlagRepository;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeRepository;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.RealNodeRepository;
import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator;
@@ -19,6 +21,7 @@ public class RealConfigServerClients implements ConfigServerClients {
private final NodeRepository nodeRepository;
private final Orchestrator orchestrator;
private final State state;
+ private final RealFlagRepository flagRepository;
/**
* @param configServerApi the backend API to use - will be closed at {@link #stop()}.
@@ -28,6 +31,7 @@ public class RealConfigServerClients implements ConfigServerClients {
nodeRepository = new RealNodeRepository(configServerApi);
orchestrator = new OrchestratorImpl(configServerApi);
state = new StateImpl(configServerApi);
+ flagRepository = new RealFlagRepository(configServerApi);
}
@Override
@@ -46,6 +50,11 @@ public class RealConfigServerClients implements ConfigServerClients {
}
@Override
+ public FlagRepository flagRepository() {
+ return flagRepository;
+ }
+
+ @Override
public void stop() {
configServerApi.close();
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/FlagRepository.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/FlagRepository.java
new file mode 100644
index 00000000000..8407d42131b
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/FlagRepository.java
@@ -0,0 +1,15 @@
+// Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.configserver.flags;
+
+import com.yahoo.vespa.flags.FlagId;
+import com.yahoo.vespa.flags.json.FlagData;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author hakonhall
+ */
+public interface FlagRepository {
+ Map<FlagId, FlagData> getAllFlagData();
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepository.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepository.java
new file mode 100644
index 00000000000..a017569294e
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepository.java
@@ -0,0 +1,28 @@
+// Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.configserver.flags;
+
+import com.yahoo.vespa.flags.FlagId;
+import com.yahoo.vespa.flags.json.FlagData;
+import com.yahoo.vespa.flags.json.wire.WireFlagDataList;
+import com.yahoo.vespa.hosted.node.admin.configserver.ConfigServerApi;
+
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * @author hakonhall
+ */
+public class RealFlagRepository implements FlagRepository {
+ private final ConfigServerApi configServerApi;
+
+ public RealFlagRepository(ConfigServerApi configServerApi) {
+ this.configServerApi = configServerApi;
+ }
+
+ @Override
+ public Map<FlagId, FlagData> getAllFlagData() {
+ WireFlagDataList list = configServerApi.get("/flags/v1/data?recursive=true", WireFlagDataList.class);
+ return FlagData.listFromWire(list).stream().collect(Collectors.toMap(FlagData::id, Function.identity()));
+ }
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/package-info.java
new file mode 100644
index 00000000000..b991adfc639
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.vespa.hosted.node.admin.configserver.flags;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
index c65d59a79dc..af8dfb1fd27 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
@@ -7,7 +7,6 @@ import com.yahoo.vespa.hosted.dockerapi.ContainerStats;
import com.yahoo.vespa.hosted.dockerapi.DockerImage;
import com.yahoo.vespa.hosted.dockerapi.ProcessResult;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
-import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData;
import java.util.List;
@@ -15,7 +14,7 @@ import java.util.Optional;
public interface DockerOperations {
- void createContainer(NodeAgentContext context, NodeSpec node, ContainerData containerData);
+ void createContainer(NodeAgentContext context, ContainerData containerData);
void startContainer(NodeAgentContext context);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
index 89ab2e60b63..e1b77b6a41b 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
@@ -13,7 +13,6 @@ import com.yahoo.vespa.hosted.dockerapi.Docker;
import com.yahoo.vespa.hosted.dockerapi.DockerImage;
import com.yahoo.vespa.hosted.dockerapi.ProcessResult;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
-import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData;
import com.yahoo.vespa.hosted.node.admin.task.util.network.IPAddresses;
@@ -55,19 +54,19 @@ public class DockerOperationsImpl implements DockerOperations {
}
@Override
- public void createContainer(NodeAgentContext context, NodeSpec node, ContainerData containerData) {
+ public void createContainer(NodeAgentContext context, ContainerData containerData) {
context.log(logger, "Creating container");
// IPv6 - Assume always valid
- Inet6Address ipV6Address = ipAddresses.getIPv6Address(node.getHostname()).orElseThrow(
- () -> new RuntimeException("Unable to find a valid IPv6 address for " + node.getHostname() +
+ Inet6Address ipV6Address = ipAddresses.getIPv6Address(context.node().getHostname()).orElseThrow(
+ () -> new RuntimeException("Unable to find a valid IPv6 address for " + context.node().getHostname() +
". Missing an AAAA DNS entry?"));
Docker.CreateContainerCommand command = docker.createContainerCommand(
- node.getWantedDockerImage().get(),
- ContainerResources.from(node.getMinCpuCores(), node.getMinMainMemoryAvailableGb()),
+ context.node().getWantedDockerImage().get(),
+ ContainerResources.from(context.node().getMinCpuCores(), context.node().getMinMainMemoryAvailableGb()),
context.containerName(),
- node.getHostname())
+ context.node().getHostname())
.withManagedBy(MANAGER_NAME)
.withUlimit("nofile", 262_144, 262_144)
// The nproc aka RLIMIT_NPROC resource limit works as follows:
@@ -100,20 +99,20 @@ public class DockerOperationsImpl implements DockerOperations {
command.withIpAddress(ipV6Local);
// IPv4 - Only present for some containers
- Optional<InetAddress> ipV4Local = ipAddresses.getIPv4Address(node.getHostname())
+ Optional<InetAddress> ipV4Local = ipAddresses.getIPv4Address(context.node().getHostname())
.map(ipV4Address -> {
InetAddress ipV4Prefix = InetAddresses.forString(IPV4_NPT_PREFIX);
return IPAddresses.prefixTranslate(ipV4Address, ipV4Prefix, 2);
});
ipV4Local.ifPresent(command::withIpAddress);
- addEtcHosts(containerData, node.getHostname(), ipV4Local, ipV6Local);
+ addEtcHosts(containerData, context.node().getHostname(), ipV4Local, ipV6Local);
}
addMounts(context, command);
// TODO: Enforce disk constraints
- long minMainMemoryAvailableMb = (long) (node.getMinMainMemoryAvailableGb() * 1024);
+ long minMainMemoryAvailableMb = (long) (context.node().getMinMainMemoryAvailableGb() * 1024);
if (minMainMemoryAvailableMb > 0) {
// VESPA_TOTAL_MEMORY_MB is used to make any jdisc container think the machine
// only has this much physical memory (overrides total memory reported by `free -m`).
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
index 64dca388f19..47255c54455 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
@@ -7,7 +7,6 @@ import com.yahoo.config.provision.NodeType;
import com.yahoo.log.LogLevel;
import com.yahoo.vespa.hosted.dockerapi.Container;
import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
-import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations;
import com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoredumpHandler;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
@@ -65,9 +64,9 @@ public class StorageMaintainer {
this.archiveContainerStoragePath = archiveContainerStoragePath;
}
- public void writeMetricsConfig(NodeAgentContext context, NodeSpec node) {
+ public void writeMetricsConfig(NodeAgentContext context) {
List<SecretAgentCheckConfig> configs = new ArrayList<>();
- Map<String, Object> tags = generateTags(context, node);
+ Map<String, Object> tags = generateTags(context);
// host-life
Path hostLifeCheckPath = context.pathInNodeUnderVespaHome("libexec/yms/yms_check_host_life");
@@ -154,26 +153,26 @@ public class StorageMaintainer {
dockerOperations.executeCommandInContainerAsRoot(context, "service", "yamas-agent", "restart");
}
- private Map<String, Object> generateTags(NodeAgentContext context, NodeSpec node) {
+ private Map<String, Object> generateTags(NodeAgentContext context) {
Map<String, String> tags = new LinkedHashMap<>();
tags.put("namespace", "Vespa");
- tags.put("role", nodeTypeToRole(node.getNodeType()));
+ tags.put("role", nodeTypeToRole(context.node().getNodeType()));
tags.put("zone", String.format("%s.%s", context.zoneId().environment().value(), context.zoneId().regionName().value()));
- node.getVespaVersion().ifPresent(version -> tags.put("vespaVersion", version));
+ context.node().getVespaVersion().ifPresent(version -> tags.put("vespaVersion", version));
if (! isConfigserverLike(context.nodeType())) {
- tags.put("flavor", node.getFlavor());
- tags.put("canonicalFlavor", node.getCanonicalFlavor());
- tags.put("state", node.getState().toString());
- node.getParentHostname().ifPresent(parent -> tags.put("parentHostname", parent));
- node.getOwner().ifPresent(owner -> {
+ tags.put("flavor", context.node().getFlavor());
+ tags.put("canonicalFlavor", context.node().getCanonicalFlavor());
+ tags.put("state", context.node().getState().toString());
+ context.node().getParentHostname().ifPresent(parent -> tags.put("parentHostname", parent));
+ context.node().getOwner().ifPresent(owner -> {
tags.put("tenantName", owner.getTenant());
tags.put("app", owner.getApplication() + "." + owner.getInstance());
tags.put("applicationName", owner.getApplication());
tags.put("instanceName", owner.getInstance());
tags.put("applicationId", owner.getTenant() + "." + owner.getApplication() + "." + owner.getInstance());
});
- node.getMembership().ifPresent(membership -> {
+ context.node().getMembership().ifPresent(membership -> {
tags.put("clustertype", membership.getClusterType());
tags.put("clusterid", membership.getClusterId());
});
@@ -257,23 +256,23 @@ public class StorageMaintainer {
}
/** Checks if container has any new coredumps, reports and archives them if so */
- public void handleCoreDumpsForContainer(NodeAgentContext context, NodeSpec node, Optional<Container> container) {
- final Map<String, Object> nodeAttributes = getCoredumpNodeAttributes(context, node, container);
+ public void handleCoreDumpsForContainer(NodeAgentContext context, Optional<Container> container) {
+ final Map<String, Object> nodeAttributes = getCoredumpNodeAttributes(context, container);
coredumpHandler.converge(context, nodeAttributes);
}
- private Map<String, Object> getCoredumpNodeAttributes(NodeAgentContext context, NodeSpec node, Optional<Container> container) {
+ private Map<String, Object> getCoredumpNodeAttributes(NodeAgentContext context, Optional<Container> container) {
Map<String, String> attributes = new HashMap<>();
- attributes.put("hostname", node.getHostname());
+ attributes.put("hostname", context.node().getHostname());
attributes.put("region", context.zoneId().regionName().value());
attributes.put("environment", context.zoneId().environment().value());
- attributes.put("flavor", node.getFlavor());
+ attributes.put("flavor", context.node().getFlavor());
attributes.put("kernel_version", System.getProperty("os.version"));
container.map(c -> c.image).ifPresent(image -> attributes.put("docker_image", image.asString()));
- node.getParentHostname().ifPresent(parent -> attributes.put("parent_hostname", parent));
- node.getVespaVersion().ifPresent(version -> attributes.put("vespa_version", version));
- node.getOwner().ifPresent(owner -> {
+ context.node().getParentHostname().ifPresent(parent -> attributes.put("parent_hostname", parent));
+ context.node().getVespaVersion().ifPresent(version -> attributes.put("vespa_version", version));
+ context.node().getOwner().ifPresent(owner -> {
attributes.put("tenant", owner.getTenant());
attributes.put("application", owner.getApplication());
attributes.put("instance", owner.getInstance());
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
index b191401b8e0..2303f78217c 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
@@ -9,6 +9,11 @@ import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextFactory;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextManager;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentFactory;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentScheduler;
import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger;
import java.time.Clock;
@@ -23,7 +28,6 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
-import java.util.function.Function;
import java.util.stream.Collectors;
/**
@@ -33,12 +37,15 @@ import java.util.stream.Collectors;
*/
public class NodeAdminImpl implements NodeAdmin {
private static final PrefixLogger logger = PrefixLogger.getNodeAdminLogger(NodeAdmin.class);
+ private static final Duration NODE_AGENT_FREEZE_TIMEOUT = Duration.ofSeconds(5);
+
private final ScheduledExecutorService aclScheduler =
Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("aclscheduler"));
private final ScheduledExecutorService metricsScheduler =
Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("metricsscheduler"));
- private final Function<String, NodeAgent> nodeAgentFactory;
+ private final NodeAgentWithSchedulerFactory nodeAgentWithSchedulerFactory;
+ private final NodeAgentContextFactory nodeAgentContextFactory;
private final Optional<AclMaintainer> aclMaintainer;
private final Clock clock;
@@ -46,16 +53,27 @@ public class NodeAdminImpl implements NodeAdmin {
private boolean isFrozen;
private Instant startOfFreezeConvergence;
- private final Map<String, NodeAgent> nodeAgentsByHostname = new ConcurrentHashMap<>();
+ private final Map<String, NodeAgentWithScheduler> nodeAgentWithSchedulerByHostname = new ConcurrentHashMap<>();
private final GaugeWrapper numberOfContainersInLoadImageState;
private final CounterWrapper numberOfUnhandledExceptionsInNodeAgent;
- public NodeAdminImpl(Function<String, NodeAgent> nodeAgentFactory,
+ public NodeAdminImpl(NodeAgentFactory nodeAgentFactory,
+ NodeAgentContextFactory nodeAgentContextFactory,
Optional<AclMaintainer> aclMaintainer,
MetricReceiverWrapper metricReceiver,
Clock clock) {
- this.nodeAgentFactory = nodeAgentFactory;
+ this((NodeAgentWithSchedulerFactory) nodeAgentContext -> create(clock, nodeAgentFactory, nodeAgentContext),
+ nodeAgentContextFactory, aclMaintainer, metricReceiver, clock);
+ }
+
+ NodeAdminImpl(NodeAgentWithSchedulerFactory nodeAgentWithSchedulerFactory,
+ NodeAgentContextFactory nodeAgentContextFactory,
+ Optional<AclMaintainer> aclMaintainer,
+ MetricReceiverWrapper metricReceiver,
+ Clock clock) {
+ this.nodeAgentWithSchedulerFactory = nodeAgentWithSchedulerFactory;
+ this.nodeAgentContextFactory = nodeAgentContextFactory;
this.aclMaintainer = aclMaintainer;
this.clock = clock;
@@ -70,22 +88,33 @@ public class NodeAdminImpl implements NodeAdmin {
@Override
public void refreshContainersToRun(List<NodeSpec> containersToRun) {
- final Set<String> hostnamesOfContainersToRun = containersToRun.stream()
- .map(NodeSpec::getHostname)
- .collect(Collectors.toSet());
+ final Map<String, NodeAgentContext> nodeAgentContextsByHostname = containersToRun.stream()
+ .collect(Collectors.toMap(NodeSpec::getHostname, nodeAgentContextFactory::create));
- synchronizeNodesToNodeAgents(hostnamesOfContainersToRun);
+ // Stop and remove NodeAgents that should no longer be running
+ diff(nodeAgentWithSchedulerByHostname.keySet(), nodeAgentContextsByHostname.keySet())
+ .forEach(hostname -> nodeAgentWithSchedulerByHostname.remove(hostname).stop());
+
+ // Start NodeAgent for hostnames that should be running, but aren't yet
+ diff(nodeAgentContextsByHostname.keySet(), nodeAgentWithSchedulerByHostname.keySet()).forEach(hostname -> {
+ NodeAgentWithScheduler naws = nodeAgentWithSchedulerFactory.create(nodeAgentContextsByHostname.get(hostname));
+ naws.start();
+ nodeAgentWithSchedulerByHostname.put(hostname, naws);
+ });
- updateNodeAgentMetrics();
+ // At this point, nodeAgentContextsByHostname and nodeAgentWithSchedulerByHostname should have the same keys
+ nodeAgentContextsByHostname.forEach((hostname, context) ->
+ nodeAgentWithSchedulerByHostname.get(hostname).scheduleTickWith(context)
+ );
}
private void updateNodeAgentMetrics() {
int numberContainersWaitingImage = 0;
int numberOfNewUnhandledExceptions = 0;
- for (NodeAgent nodeAgent : nodeAgentsByHostname.values()) {
- if (nodeAgent.isDownloadingImage()) numberContainersWaitingImage++;
- numberOfNewUnhandledExceptions += nodeAgent.getAndResetNumberOfUnhandledExceptions();
+ for (NodeAgentWithScheduler nodeAgentWithScheduler : nodeAgentWithSchedulerByHostname.values()) {
+ if (nodeAgentWithScheduler.isDownloadingImage()) numberContainersWaitingImage++;
+ numberOfNewUnhandledExceptions += nodeAgentWithScheduler.getAndResetNumberOfUnhandledExceptions();
}
numberOfContainersInLoadImageState.sample(numberContainersWaitingImage);
@@ -105,8 +134,8 @@ public class NodeAdminImpl implements NodeAdmin {
}
// Use filter with count instead of allMatch() because allMatch() will short circuit on first non-match
- boolean allNodeAgentsConverged = nodeAgentsByHostname.values().stream()
- .filter(nodeAgent -> !nodeAgent.setFrozen(wantFrozen))
+ boolean allNodeAgentsConverged = nodeAgentWithSchedulerByHostname.values().parallelStream()
+ .filter(nodeAgentScheduler -> !nodeAgentScheduler.setFrozen(wantFrozen, NODE_AGENT_FREEZE_TIMEOUT))
.count() == 0;
if (wantFrozen) {
@@ -134,8 +163,8 @@ public class NodeAdminImpl implements NodeAdmin {
public void stopNodeAgentServices(List<String> hostnames) {
// Each container may spend 1-1:30 minutes stopping
hostnames.parallelStream()
- .filter(nodeAgentsByHostname::containsKey)
- .map(nodeAgentsByHostname::get)
+ .filter(nodeAgentWithSchedulerByHostname::containsKey)
+ .map(nodeAgentWithSchedulerByHostname::get)
.forEach(nodeAgent -> {
nodeAgent.suspend();
nodeAgent.stopServices();
@@ -146,7 +175,8 @@ public class NodeAdminImpl implements NodeAdmin {
public void start() {
metricsScheduler.scheduleAtFixedRate(() -> {
try {
- nodeAgentsByHostname.values().forEach(NodeAgent::updateContainerNodeMetrics);
+ updateNodeAgentMetrics();
+ nodeAgentWithSchedulerByHostname.values().forEach(NodeAgent::updateContainerNodeMetrics);
} catch (Throwable e) {
logger.warning("Metric fetcher scheduler failed", e);
}
@@ -166,7 +196,7 @@ public class NodeAdminImpl implements NodeAdmin {
aclScheduler.shutdown();
// Stop all node-agents in parallel, will block until the last NodeAgent is stopped
- nodeAgentsByHostname.values().parallelStream().forEach(NodeAgent::stop);
+ nodeAgentWithSchedulerByHostname.values().parallelStream().forEach(NodeAgent::stop);
do {
try {
@@ -185,23 +215,35 @@ public class NodeAdminImpl implements NodeAdmin {
return result;
}
- void synchronizeNodesToNodeAgents(Set<String> hostnamesToRun) {
- // Stop and remove NodeAgents that should no longer be running
- diff(nodeAgentsByHostname.keySet(), hostnamesToRun)
- .forEach(hostname -> nodeAgentsByHostname.remove(hostname).stop());
+ static class NodeAgentWithScheduler implements NodeAgent, NodeAgentScheduler {
+ private final NodeAgent nodeAgent;
+ private final NodeAgentScheduler nodeAgentScheduler;
- // Start NodeAgent for hostnames that should be running, but aren't yet
- diff(hostnamesToRun, nodeAgentsByHostname.keySet())
- .forEach(this::startNodeAgent);
+ private NodeAgentWithScheduler(NodeAgent nodeAgent, NodeAgentScheduler nodeAgentScheduler) {
+ this.nodeAgent = nodeAgent;
+ this.nodeAgentScheduler = nodeAgentScheduler;
+ }
+
+ @Override public void stopServices() { nodeAgent.stopServices(); }
+ @Override public void suspend() { nodeAgent.suspend(); }
+ @Override public void start() { nodeAgent.start(); }
+ @Override public void stop() { nodeAgent.stop(); }
+ @Override public void updateContainerNodeMetrics() { nodeAgent.updateContainerNodeMetrics(); }
+ @Override public boolean isDownloadingImage() { return nodeAgent.isDownloadingImage(); }
+ @Override public int getAndResetNumberOfUnhandledExceptions() { return nodeAgent.getAndResetNumberOfUnhandledExceptions(); }
+
+ @Override public void scheduleTickWith(NodeAgentContext context) { nodeAgentScheduler.scheduleTickWith(context); }
+ @Override public boolean setFrozen(boolean frozen, Duration timeout) { return nodeAgentScheduler.setFrozen(frozen, timeout); }
}
- private void startNodeAgent(String hostname) {
- if (nodeAgentsByHostname.containsKey(hostname))
- throw new IllegalArgumentException("Attempted to start NodeAgent for hostname " + hostname +
- ", but one is already running!");
+ @FunctionalInterface
+ interface NodeAgentWithSchedulerFactory {
+ NodeAgentWithScheduler create(NodeAgentContext context);
+ }
- NodeAgent agent = nodeAgentFactory.apply(hostname);
- agent.start();
- nodeAgentsByHostname.put(hostname, agent);
+ private static NodeAgentWithScheduler create(Clock clock, NodeAgentFactory nodeAgentFactory, NodeAgentContext context) {
+ NodeAgentContextManager contextManager = new NodeAgentContextManager(clock, context);
+ NodeAgent nodeAgent = nodeAgentFactory.create(contextManager);
+ return new NodeAgentWithScheduler(nodeAgent, contextManager);
}
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
index a12104c6e98..13d3f3307d2 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
@@ -51,30 +51,21 @@ public class NodeAdminStateUpdater {
nodeAdmin.start();
}
- public void converge(State wantedState) {
- try {
- convergeState(wantedState);
- } finally {
- if (wantedState != RESUMED && currentState == TRANSITIONING) {
- Duration subsystemFreezeDuration = nodeAdmin.subsystemFreezeDuration();
- if (subsystemFreezeDuration.compareTo(FREEZE_CONVERGENCE_TIMEOUT) > 0) {
- // We have spent too much time trying to freeze and node admin is still not frozen.
- // To avoid node agents stalling for too long, we'll force unfrozen ticks now.
- log.info("Timed out trying to freeze, will force unfreezed ticks");
- fetchContainersToRunFromNodeRepository();
- nodeAdmin.setFrozen(false);
- }
- } else if (currentState == RESUMED) {
- fetchContainersToRunFromNodeRepository();
- }
- }
- }
-
/**
* This method attempts to converge node-admin w/agents to a {@link State}
* with respect to: freeze, Orchestrator, and services running.
*/
- private void convergeState(State wantedState) {
+ public void converge(State wantedState) {
+ if (wantedState == RESUMED) {
+ adjustNodeAgentsToRunFromNodeRepository();
+ } else if (currentState == TRANSITIONING && nodeAdmin.subsystemFreezeDuration().compareTo(FREEZE_CONVERGENCE_TIMEOUT) > 0) {
+ // We have spent too much time trying to freeze and node admin is still not frozen.
+ // To avoid node agents stalling for too long, we'll force unfrozen ticks now.
+ adjustNodeAgentsToRunFromNodeRepository();
+ nodeAdmin.setFrozen(false);
+ throw new ConvergenceException("Timed out trying to freeze all nodes: will force an unfrozen tick");
+ }
+
if (currentState == wantedState) return;
currentState = TRANSITIONING;
@@ -119,7 +110,7 @@ public class NodeAdminStateUpdater {
currentState = wantedState;
}
- private void fetchContainersToRunFromNodeRepository() {
+ private void adjustNodeAgentsToRunFromNodeRepository() {
try {
final List<NodeSpec> containersToRun = nodeRepository.getNodes(hostHostname);
nodeAdmin.refreshContainersToRun(containersToRun);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
index 947e7c85d66..10076c4f48a 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
@@ -9,12 +9,6 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent;
* @author bakksjo
*/
public interface NodeAgent {
- /**
- * Will eventually freeze/unfreeze the node agent
- * @param frozen whether node agent should be frozen
- * @return True if node agent has converged to the desired state
- */
- boolean setFrozen(boolean frozen);
/**
* Stop services running on node. Depending on the state of the node, {@link #suspend()} might need to be
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java
index 4874eccb913..2e9f58a2c31 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java
@@ -6,6 +6,7 @@ import com.yahoo.vespa.athenz.api.AthenzService;
import com.yahoo.vespa.hosted.dockerapi.ContainerName;
import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
import com.yahoo.vespa.hosted.node.admin.component.ZoneId;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.docker.DockerNetworking;
import java.nio.file.Path;
@@ -13,11 +14,17 @@ import java.nio.file.Paths;
public interface NodeAgentContext extends TaskContext {
+ NodeSpec node();
+
ContainerName containerName();
- HostName hostname();
+ default HostName hostname() {
+ return HostName.from(node().getHostname());
+ }
- NodeType nodeType();
+ default NodeType nodeType() {
+ return node().getNodeType();
+ }
AthenzService identity();
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextFactory.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextFactory.java
new file mode 100644
index 00000000000..0cfafe34717
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextFactory.java
@@ -0,0 +1,12 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.nodeagent;
+
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
+
+/**
+ * @author freva
+ */
+@FunctionalInterface
+public interface NodeAgentContextFactory {
+ NodeAgentContext create(NodeSpec nodeSpec);
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java
index 3c34e35ab46..58414ab55f4 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java
@@ -1,14 +1,15 @@
package com.yahoo.vespa.hosted.node.admin.nodeagent;
import com.yahoo.config.provision.Environment;
-import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.SystemName;
import com.yahoo.vespa.athenz.api.AthenzService;
import com.yahoo.vespa.hosted.dockerapi.ContainerName;
import com.yahoo.vespa.hosted.node.admin.component.ZoneId;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.docker.DockerNetworking;
+import com.yahoo.vespa.hosted.provision.Node;
import java.nio.file.FileSystem;
import java.nio.file.Path;
@@ -25,9 +26,8 @@ public class NodeAgentContextImpl implements NodeAgentContext {
private static final Path ROOT = Paths.get("/");
private final String logPrefix;
+ private final NodeSpec node;
private final ContainerName containerName;
- private final HostName hostName;
- private final NodeType nodeType;
private final AthenzService identity;
private final DockerNetworking dockerNetworking;
private final ZoneId zoneId;
@@ -36,13 +36,12 @@ public class NodeAgentContextImpl implements NodeAgentContext {
private final String vespaUser;
private final String vespaUserOnHost;
- public NodeAgentContextImpl(String hostname, NodeType nodeType, AthenzService identity,
+ public NodeAgentContextImpl(NodeSpec node, AthenzService identity,
DockerNetworking dockerNetworking, ZoneId zoneId,
Path pathToContainerStorage, Path pathToVespaHome,
String vespaUser, String vespaUserOnHost) {
- this.hostName = HostName.from(Objects.requireNonNull(hostname));
- this.containerName = ContainerName.fromHostname(hostname);
- this.nodeType = Objects.requireNonNull(nodeType);
+ this.node = Objects.requireNonNull(node);
+ this.containerName = ContainerName.fromHostname(node.getHostname());
this.identity = Objects.requireNonNull(identity);
this.dockerNetworking = Objects.requireNonNull(dockerNetworking);
this.zoneId = Objects.requireNonNull(zoneId);
@@ -54,18 +53,13 @@ public class NodeAgentContextImpl implements NodeAgentContext {
}
@Override
- public ContainerName containerName() {
- return containerName;
- }
-
- @Override
- public HostName hostname() {
- return hostName;
+ public NodeSpec node() {
+ return node;
}
@Override
- public NodeType nodeType() {
- return nodeType;
+ public ContainerName containerName() {
+ return containerName;
}
@Override
@@ -134,12 +128,25 @@ public class NodeAgentContextImpl implements NodeAgentContext {
public void log(Logger logger, Level level, String message, Throwable throwable) {
logger.log(level, logPrefix + message, throwable);
}
-
+
+ @Override
+ public String toString() {
+ return "NodeAgentContextImpl{" +
+ "node=" + node +
+ ", containerName=" + containerName +
+ ", identity=" + identity +
+ ", dockerNetworking=" + dockerNetworking +
+ ", zoneId=" + zoneId +
+ ", pathToNodeRootOnHost=" + pathToNodeRootOnHost +
+ ", pathToVespaHome=" + pathToVespaHome +
+ ", vespaUser='" + vespaUser + '\'' +
+ ", vespaUserOnHost='" + vespaUserOnHost + '\'' +
+ '}';
+ }
/** For testing only! */
public static class Builder {
- private final String hostname;
- private NodeType nodeType;
+ private NodeSpec.Builder nodeSpecBuilder = new NodeSpec.Builder();
private AthenzService identity;
private DockerNetworking dockerNetworking;
private ZoneId zoneId;
@@ -148,12 +155,25 @@ public class NodeAgentContextImpl implements NodeAgentContext {
private String vespaUser;
private String vespaUserOnHost;
+ public Builder(NodeSpec node) {
+ this.nodeSpecBuilder = new NodeSpec.Builder(node);
+ }
+
+ /**
+ * Creates a NodeAgentContext.Builder with a NodeSpec that has the given hostname and some
+ * reasonable values for the remaining required NodeSpec fields. Use {@link #Builder(NodeSpec)}
+ * if you want to control the entire NodeSpec.
+ */
public Builder(String hostname) {
- this.hostname = hostname;
+ this.nodeSpecBuilder
+ .hostname(hostname)
+ .state(Node.State.active)
+ .nodeType(NodeType.tenant)
+ .flavor("d-2-8-50");
}
public Builder nodeType(NodeType nodeType) {
- this.nodeType = nodeType;
+ this.nodeSpecBuilder.nodeType(nodeType);
return this;
}
@@ -198,8 +218,7 @@ public class NodeAgentContextImpl implements NodeAgentContext {
public NodeAgentContextImpl build() {
return new NodeAgentContextImpl(
- hostname,
- Optional.ofNullable(nodeType).orElse(NodeType.tenant),
+ nodeSpecBuilder.build(),
Optional.ofNullable(identity).orElseGet(() -> new AthenzService("domain", "service")),
Optional.ofNullable(dockerNetworking).orElse(DockerNetworking.HOST_NETWORK),
Optional.ofNullable(zoneId).orElseGet(() -> new ZoneId(SystemName.dev, Environment.dev, RegionName.defaultName())),
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java
new file mode 100644
index 00000000000..54f357d5f29
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java
@@ -0,0 +1,102 @@
+// Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.nodeagent;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.util.Objects;
+
+/**
+ * This class should be used by exactly 2 thread, 1 for each interface it implements.
+ *
+ * @author freva
+ */
+public class NodeAgentContextManager implements NodeAgentContextSupplier, NodeAgentScheduler {
+
+ private final Object monitor = new Object();
+ private final Clock clock;
+
+ private NodeAgentContext currentContext;
+ private NodeAgentContext nextContext;
+ private boolean wantFrozen = false;
+ private boolean isFrozen = true;
+ private boolean pendingInterrupt = false;
+
+ public NodeAgentContextManager(Clock clock, NodeAgentContext context) {
+ this.clock = clock;
+ this.currentContext = context;
+ }
+
+ @Override
+ public void scheduleTickWith(NodeAgentContext context) {
+ synchronized (monitor) {
+ nextContext = Objects.requireNonNull(context);
+ monitor.notifyAll(); // Notify of new context
+ }
+ }
+
+ @Override
+ public boolean setFrozen(boolean frozen, Duration timeout) {
+ synchronized (monitor) {
+ if (wantFrozen != frozen) {
+ wantFrozen = frozen;
+ monitor.notifyAll(); // Notify the supplier of the wantFrozen change
+ }
+
+ boolean successful;
+ long remainder;
+ long end = clock.instant().plus(timeout).toEpochMilli();
+ while (!(successful = isFrozen == frozen) && (remainder = end - clock.millis()) > 0) {
+ try {
+ monitor.wait(remainder); // Wait with timeout until the supplier is has reached wanted frozen state
+ } catch (InterruptedException ignored) { }
+ }
+
+ return successful;
+ }
+ }
+
+ @Override
+ public NodeAgentContext nextContext() throws InterruptedException {
+ synchronized (monitor) {
+ while (setAndGetIsFrozen(wantFrozen) || nextContext == null) {
+ if (pendingInterrupt) {
+ pendingInterrupt = false;
+ throw new InterruptedException("interrupt() was called before next context was scheduled");
+ }
+
+ try {
+ monitor.wait(); // Wait until scheduler provides a new context
+ } catch (InterruptedException ignored) { }
+ }
+
+ currentContext = nextContext;
+ nextContext = null;
+ return currentContext;
+ }
+ }
+
+ @Override
+ public NodeAgentContext currentContext() {
+ synchronized (monitor) {
+ return currentContext;
+ }
+ }
+
+ @Override
+ public void interrupt() {
+ synchronized (monitor) {
+ pendingInterrupt = true;
+ monitor.notifyAll();
+ }
+ }
+
+ private boolean setAndGetIsFrozen(boolean isFrozen) {
+ synchronized (monitor) {
+ if (this.isFrozen != isFrozen) {
+ this.isFrozen = isFrozen;
+ monitor.notifyAll(); // Notify the scheduler of the isFrozen change
+ }
+ return this.isFrozen;
+ }
+ }
+} \ No newline at end of file
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextSupplier.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextSupplier.java
new file mode 100644
index 00000000000..1fc730a3cb0
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextSupplier.java
@@ -0,0 +1,21 @@
+// Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.nodeagent;
+
+/**
+ * @author freva
+ */
+public interface NodeAgentContextSupplier {
+
+ /**
+ * Blocks until the next context is ready
+ * @return context
+ * @throws InterruptedException if {@link #interrupt()} was called before this method returned
+ */
+ NodeAgentContext nextContext() throws InterruptedException;
+
+ /** @return the last context returned by {@link #nextContext()} or a default value */
+ NodeAgentContext currentContext();
+
+ /** Interrupts the thread(s) currently waiting in {@link #nextContext()} */
+ void interrupt();
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentFactory.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentFactory.java
new file mode 100644
index 00000000000..bd13b7eb094
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentFactory.java
@@ -0,0 +1,10 @@
+// Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.nodeagent;
+
+/**
+ * @author freva
+ */
+@FunctionalInterface
+public interface NodeAgentFactory {
+ NodeAgent create(NodeAgentContextSupplier contextSupplier);
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 98975dddb56..0bfff82a055 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -17,20 +17,17 @@ import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions;
import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeAttributes;
+import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException;
import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations;
import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeRepository;
import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator;
-import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException;
import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer;
import com.yahoo.vespa.hosted.node.admin.maintenance.identity.AthenzCredentialsMaintainer;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
import com.yahoo.vespa.hosted.node.admin.util.SecretAgentCheckConfig;
import com.yahoo.vespa.hosted.provision.Node;
-import java.time.Clock;
-import java.time.Duration;
-import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -59,30 +56,21 @@ public class NodeAgentImpl implements NodeAgent {
private static final Logger logger = Logger.getLogger(NodeAgentImpl.class.getName());
- private final Object monitor = new Object();
private final AtomicBoolean terminated = new AtomicBoolean(false);
-
- private boolean isFrozen = true;
- private boolean wantFrozen = false;
- private boolean workToDoNow = true;
- private boolean expectNodeNotInNodeRepo = false;
private boolean hasResumedNode = false;
private boolean hasStartedServices = true;
- private final NodeAgentContext context;
+ private final NodeAgentContextSupplier contextSupplier;
private final NodeRepository nodeRepository;
private final Orchestrator orchestrator;
private final DockerOperations dockerOperations;
private final StorageMaintainer storageMaintainer;
- private final Clock clock;
- private final Duration timeBetweenEachConverge;
private final Optional<AthenzCredentialsMaintainer> athenzCredentialsMaintainer;
private final Optional<AclMaintainer> aclMaintainer;
private final Optional<HealthChecker> healthChecker;
private int numberOfUnhandledException = 0;
private DockerImage imageBeingDownloaded = null;
- private Instant lastConverge;
private long currentRebootGeneration = 0;
private Optional<Long> currentRestartGeneration = Optional.empty();
@@ -115,24 +103,19 @@ public class NodeAgentImpl implements NodeAgent {
// Created in NodeAdminImpl
public NodeAgentImpl(
- final NodeAgentContext context,
+ final NodeAgentContextSupplier contextSupplier,
final NodeRepository nodeRepository,
final Orchestrator orchestrator,
final DockerOperations dockerOperations,
final StorageMaintainer storageMaintainer,
- final Clock clock,
- final Duration timeBetweenEachConverge,
final Optional<AthenzCredentialsMaintainer> athenzCredentialsMaintainer,
final Optional<AclMaintainer> aclMaintainer,
final Optional<HealthChecker> healthChecker) {
- this.context = context;
+ this.contextSupplier = contextSupplier;
this.nodeRepository = nodeRepository;
this.orchestrator = orchestrator;
this.dockerOperations = dockerOperations;
this.storageMaintainer = storageMaintainer;
- this.clock = clock;
- this.timeBetweenEachConverge = timeBetweenEachConverge;
- this.lastConverge = clock.instant();
this.athenzCredentialsMaintainer = athenzCredentialsMaintainer;
this.aclMaintainer = aclMaintainer;
this.healthChecker = healthChecker;
@@ -140,16 +123,15 @@ public class NodeAgentImpl implements NodeAgent {
this.loopThread = new Thread(() -> {
while (!terminated.get()) {
try {
- tick();
- } catch (Throwable t) {
- numberOfUnhandledException++;
- context.log(logger, LogLevel.ERROR, "Unhandled throwable, ignoring", t);
- }
+ NodeAgentContext context = contextSupplier.nextContext();
+ converge(context);
+ } catch (InterruptedException ignored) { }
}
});
- this.loopThread.setName("tick-" + context.hostname());
+ this.loopThread.setName("tick-" + contextSupplier.currentContext().hostname());
this.serviceRestarter = service -> {
+ NodeAgentContext context = contextSupplier.currentContext();
try {
ProcessResult processResult = dockerOperations.executeCommandInContainerAsRoot(
context, "service", service, "restart");
@@ -164,46 +146,29 @@ public class NodeAgentImpl implements NodeAgent {
}
@Override
- public boolean setFrozen(boolean frozen) {
- synchronized (monitor) {
- if (wantFrozen != frozen) {
- wantFrozen = frozen;
- context.log(logger, LogLevel.DEBUG, wantFrozen ? "Freezing" : "Unfreezing");
- signalWorkToBeDone();
- }
-
- return isFrozen == frozen;
- }
- }
-
- @Override
public void start() {
- context.log(logger, "Starting with interval " + timeBetweenEachConverge.toMillis() + " ms");
loopThread.start();
}
@Override
public void stop() {
- filebeatRestarter.shutdown();
if (!terminated.compareAndSet(false, true)) {
throw new RuntimeException("Can not re-stop a node agent.");
}
- signalWorkToBeDone();
+ filebeatRestarter.shutdown();
+ contextSupplier.interrupt();
do {
try {
loopThread.join();
filebeatRestarter.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
- } catch (InterruptedException e) {
- context.log(logger, LogLevel.ERROR,
- "Interrupted while waiting for converge thread and filebeatRestarter scheduler to shutdown");
- }
+ } catch (InterruptedException ignored) { }
} while (loopThread.isAlive() || !filebeatRestarter.isTerminated());
- context.log(logger, "Stopped");
+ contextSupplier.currentContext().log(logger, "Stopped");
}
- void startServicesIfNeeded() {
+ void startServicesIfNeeded(NodeAgentContext context) {
if (!hasStartedServices) {
context.log(logger, "Starting services");
dockerOperations.startServices(context);
@@ -211,10 +176,10 @@ public class NodeAgentImpl implements NodeAgent {
}
}
- void resumeNodeIfNeeded(NodeSpec node) {
+ void resumeNodeIfNeeded(NodeAgentContext context) {
if (!hasResumedNode) {
if (!currentFilebeatRestarter.isPresent()) {
- storageMaintainer.writeMetricsConfig(context, node);
+ storageMaintainer.writeMetricsConfig(context);
currentFilebeatRestarter = Optional.of(filebeatRestarter.scheduleWithFixedDelay(
() -> serviceRestarter.accept("filebeat"), 1, 1, TimeUnit.DAYS));
}
@@ -225,31 +190,31 @@ public class NodeAgentImpl implements NodeAgent {
}
}
- private void updateNodeRepoWithCurrentAttributes(final NodeSpec node) {
+ private void updateNodeRepoWithCurrentAttributes(NodeAgentContext context) {
final NodeAttributes currentNodeAttributes = new NodeAttributes();
final NodeAttributes newNodeAttributes = new NodeAttributes();
- if (node.getWantedRestartGeneration().isPresent() &&
- !Objects.equals(node.getCurrentRestartGeneration(), currentRestartGeneration)) {
- currentNodeAttributes.withRestartGeneration(node.getCurrentRestartGeneration());
+ if (context.node().getWantedRestartGeneration().isPresent() &&
+ !Objects.equals(context.node().getCurrentRestartGeneration(), currentRestartGeneration)) {
+ currentNodeAttributes.withRestartGeneration(context.node().getCurrentRestartGeneration());
newNodeAttributes.withRestartGeneration(currentRestartGeneration);
}
- if (!Objects.equals(node.getCurrentRebootGeneration(), currentRebootGeneration)) {
- currentNodeAttributes.withRebootGeneration(node.getCurrentRebootGeneration());
+ if (!Objects.equals(context.node().getCurrentRebootGeneration(), currentRebootGeneration)) {
+ currentNodeAttributes.withRebootGeneration(context.node().getCurrentRebootGeneration());
newNodeAttributes.withRebootGeneration(currentRebootGeneration);
}
- Optional<DockerImage> actualDockerImage = node.getWantedDockerImage().filter(n -> containerState == UNKNOWN);
- if (!Objects.equals(node.getCurrentDockerImage(), actualDockerImage)) {
- currentNodeAttributes.withDockerImage(node.getCurrentDockerImage().orElse(new DockerImage("")));
+ Optional<DockerImage> actualDockerImage = context.node().getWantedDockerImage().filter(n -> containerState == UNKNOWN);
+ if (!Objects.equals(context.node().getCurrentDockerImage(), actualDockerImage)) {
+ currentNodeAttributes.withDockerImage(context.node().getCurrentDockerImage().orElse(new DockerImage("")));
newNodeAttributes.withDockerImage(actualDockerImage.orElse(new DockerImage("")));
}
- publishStateToNodeRepoIfChanged(currentNodeAttributes, newNodeAttributes);
+ publishStateToNodeRepoIfChanged(context, currentNodeAttributes, newNodeAttributes);
}
- private void publishStateToNodeRepoIfChanged(NodeAttributes currentAttributes, NodeAttributes newAttributes) {
+ private void publishStateToNodeRepoIfChanged(NodeAgentContext context, NodeAttributes currentAttributes, NodeAttributes newAttributes) {
if (!currentAttributes.equals(newAttributes)) {
context.log(logger, "Publishing new set of attributes to node repo: %s -> %s",
currentAttributes, newAttributes);
@@ -257,9 +222,9 @@ public class NodeAgentImpl implements NodeAgent {
}
}
- private void startContainer(NodeSpec node) {
- ContainerData containerData = createContainerData(context, node);
- dockerOperations.createContainer(context, node, containerData);
+ private void startContainer(NodeAgentContext context) {
+ ContainerData containerData = createContainerData(context);
+ dockerOperations.createContainer(context, containerData);
dockerOperations.startContainer(context);
lastCpuMetric = new CpuUsageReporter();
@@ -268,14 +233,15 @@ public class NodeAgentImpl implements NodeAgent {
context.log(logger, "Container successfully started, new containerState is " + containerState);
}
- private Optional<Container> removeContainerIfNeededUpdateContainerState(NodeSpec node, Optional<Container> existingContainer) {
+ private Optional<Container> removeContainerIfNeededUpdateContainerState(
+ NodeAgentContext context, Optional<Container> existingContainer) {
return existingContainer
- .flatMap(container -> removeContainerIfNeeded(node, container))
+ .flatMap(container -> removeContainerIfNeeded(context, container))
.map(container -> {
- shouldRestartServices(node).ifPresent(restartReason -> {
+ shouldRestartServices(context.node()).ifPresent(restartReason -> {
context.log(logger, "Will restart services: " + restartReason);
- restartServices(node, container);
- currentRestartGeneration = node.getWantedRestartGeneration();
+ restartServices(context, container);
+ currentRestartGeneration = context.node().getWantedRestartGeneration();
});
return container;
});
@@ -292,17 +258,18 @@ public class NodeAgentImpl implements NodeAgent {
return Optional.empty();
}
- private void restartServices(NodeSpec node, Container existingContainer) {
- if (existingContainer.state.isRunning() && node.getState() == Node.State.active) {
+ private void restartServices(NodeAgentContext context, Container existingContainer) {
+ if (existingContainer.state.isRunning() && context.node().getState() == Node.State.active) {
context.log(logger, "Restarting services");
// Since we are restarting the services we need to suspend the node.
- orchestratorSuspendNode();
+ orchestratorSuspendNode(context);
dockerOperations.restartVespa(context);
}
}
@Override
public void stopServices() {
+ NodeAgentContext context = contextSupplier.currentContext();
context.log(logger, "Stopping services");
if (containerState == ABSENT) return;
try {
@@ -315,6 +282,7 @@ public class NodeAgentImpl implements NodeAgent {
@Override
public void suspend() {
+ NodeAgentContext context = contextSupplier.currentContext();
context.log(logger, "Suspending services on node");
if (containerState == ABSENT) return;
try {
@@ -358,18 +326,18 @@ public class NodeAgentImpl implements NodeAgent {
return Optional.empty();
}
- private Optional<Container> removeContainerIfNeeded(NodeSpec node, Container existingContainer) {
- Optional<String> removeReason = shouldRemoveContainer(node, existingContainer);
+ private Optional<Container> removeContainerIfNeeded(NodeAgentContext context, Container existingContainer) {
+ Optional<String> removeReason = shouldRemoveContainer(context.node(), existingContainer);
if (removeReason.isPresent()) {
context.log(logger, "Will remove container: " + removeReason.get());
if (existingContainer.state.isRunning()) {
- if (node.getState() == Node.State.active) {
- orchestratorSuspendNode();
+ if (context.node().getState() == Node.State.active) {
+ orchestratorSuspendNode(context);
}
try {
- if (node.getState() != Node.State.dirty) {
+ if (context.node().getState() != Node.State.dirty) {
suspend();
}
stopServices();
@@ -378,9 +346,9 @@ public class NodeAgentImpl implements NodeAgent {
}
}
stopFilebeatSchedulerIfNeeded();
- storageMaintainer.handleCoreDumpsForContainer(context, node, Optional.of(existingContainer));
+ storageMaintainer.handleCoreDumpsForContainer(context, Optional.of(existingContainer));
dockerOperations.removeContainer(context, existingContainer);
- currentRebootGeneration = node.getWantedRebootGeneration();
+ currentRebootGeneration = context.node().getWantedRebootGeneration();
containerState = ABSENT;
context.log(logger, "Container successfully removed, new containerState is " + containerState);
return Optional.empty();
@@ -399,78 +367,29 @@ public class NodeAgentImpl implements NodeAgent {
}
}
- private void signalWorkToBeDone() {
- synchronized (monitor) {
- if (!workToDoNow) {
- workToDoNow = true;
- context.log(logger, LogLevel.DEBUG, "Signaling work to be done");
- monitor.notifyAll();
- }
- }
- }
-
- void tick() {
- boolean isFrozenCopy;
- synchronized (monitor) {
- while (!workToDoNow) {
- long remainder = timeBetweenEachConverge
- .minus(Duration.between(lastConverge, clock.instant()))
- .toMillis();
- if (remainder > 0) {
- try {
- monitor.wait(remainder);
- } catch (InterruptedException e) {
- context.log(logger, LogLevel.ERROR, "Interrupted while sleeping before tick, ignoring");
- }
- } else break;
- }
- lastConverge = clock.instant();
- workToDoNow = false;
-
- if (isFrozen != wantFrozen) {
- isFrozen = wantFrozen;
- context.log(logger, "Updated NodeAgent's frozen state, new value: isFrozen: " + isFrozen);
- }
- isFrozenCopy = isFrozen;
- }
-
- if (isFrozenCopy) {
- context.log(logger, LogLevel.DEBUG, "tick: isFrozen");
- } else {
- try {
- converge();
- } catch (OrchestratorException | ConvergenceException e) {
- context.log(logger, e.getMessage());
- } catch (ContainerNotFoundException e) {
- containerState = ABSENT;
- context.log(logger, LogLevel.WARNING, "Container unexpectedly gone, resetting containerState to " + containerState);
- } catch (DockerException e) {
- numberOfUnhandledException++;
- context.log(logger, LogLevel.ERROR, "Caught a DockerException", e);
- } catch (Exception e) {
- numberOfUnhandledException++;
- context.log(logger, LogLevel.ERROR, "Unhandled exception, ignoring.", e);
- }
+ public void converge(NodeAgentContext context) {
+ try {
+ doConverge(context);
+ } catch (OrchestratorException | ConvergenceException e) {
+ context.log(logger, e.getMessage());
+ } catch (ContainerNotFoundException e) {
+ containerState = ABSENT;
+ context.log(logger, LogLevel.WARNING, "Container unexpectedly gone, resetting containerState to " + containerState);
+ } catch (DockerException e) {
+ numberOfUnhandledException++;
+ context.log(logger, LogLevel.ERROR, "Caught a DockerException", e);
+ } catch (Throwable e) {
+ numberOfUnhandledException++;
+ context.log(logger, LogLevel.ERROR, "Unhandled exception, ignoring", e);
}
}
// Public for testing
- void converge() {
- final Optional<NodeSpec> optionalNode = nodeRepository.getOptionalNode(context.hostname().value());
-
- // We just removed the node from node repo, so this is expected until NodeAdmin stop this NodeAgent
- if (!optionalNode.isPresent() && expectNodeNotInNodeRepo) {
- context.log(logger, LogLevel.INFO, "Node removed from node repo (as expected)");
- return;
- }
-
- final NodeSpec node = optionalNode.orElseThrow(() ->
- new IllegalStateException(String.format("Node '%s' missing from node repository", context.hostname())));
- expectNodeNotInNodeRepo = false;
-
- Optional<Container> container = getContainer();
+ void doConverge(NodeAgentContext context) {
+ NodeSpec node = context.node();
+ Optional<Container> container = getContainer(context);
if (!node.equals(lastNode)) {
- logChangesToNodeSpec(lastNode, node);
+ logChangesToNodeSpec(context, lastNode, node);
// Current reboot generation uninitialized or incremented from outside to cancel reboot
if (currentRebootGeneration < node.getCurrentRebootGeneration())
@@ -485,7 +404,7 @@ public class NodeAgentImpl implements NodeAgent {
// Every time the node spec changes, we should clear the metrics for this container as the dimensions
// will change and we will be reporting duplicate metrics.
if (container.map(c -> c.state.isRunning()).orElse(false)) {
- storageMaintainer.writeMetricsConfig(context, node);
+ storageMaintainer.writeMetricsConfig(context);
}
lastNode = node;
@@ -496,11 +415,11 @@ public class NodeAgentImpl implements NodeAgent {
case reserved:
case parked:
case failed:
- removeContainerIfNeededUpdateContainerState(node, container);
- updateNodeRepoWithCurrentAttributes(node);
+ removeContainerIfNeededUpdateContainerState(context, container);
+ updateNodeRepoWithCurrentAttributes(context);
break;
case active:
- storageMaintainer.handleCoreDumpsForContainer(context, node, container);
+ storageMaintainer.handleCoreDumpsForContainer(context, container);
storageMaintainer.getDiskUsageFor(context)
.map(diskUsage -> (double) diskUsage / BYTES_IN_GB / node.getMinDiskAvailableGb())
@@ -512,17 +431,17 @@ public class NodeAgentImpl implements NodeAgent {
context.log(logger, LogLevel.DEBUG, "Waiting for image to download " + imageBeingDownloaded.asString());
return;
}
- container = removeContainerIfNeededUpdateContainerState(node, container);
+ container = removeContainerIfNeededUpdateContainerState(context, container);
athenzCredentialsMaintainer.ifPresent(maintainer -> maintainer.converge(context));
if (! container.isPresent()) {
containerState = STARTING;
- startContainer(node);
+ startContainer(context);
containerState = UNKNOWN;
aclMaintainer.ifPresent(AclMaintainer::converge);
}
- startServicesIfNeeded();
- resumeNodeIfNeeded(node);
+ startServicesIfNeeded(context);
+ resumeNodeIfNeeded(context);
healthChecker.ifPresent(checker -> checker.verifyHealth(context));
// Because it's more important to stop a bad release from rolling out in prod,
@@ -535,32 +454,31 @@ public class NodeAgentImpl implements NodeAgent {
// has been successfully rolled out.
// - Slobrok and internal orchestrator state is used to determine whether
// to allow upgrade (suspend).
- updateNodeRepoWithCurrentAttributes(node);
+ updateNodeRepoWithCurrentAttributes(context);
context.log(logger, "Call resume against Orchestrator");
orchestrator.resume(context.hostname().value());
break;
case inactive:
- removeContainerIfNeededUpdateContainerState(node, container);
- updateNodeRepoWithCurrentAttributes(node);
+ removeContainerIfNeededUpdateContainerState(context, container);
+ updateNodeRepoWithCurrentAttributes(context);
break;
case provisioned:
nodeRepository.setNodeState(context.hostname().value(), Node.State.dirty);
break;
case dirty:
- removeContainerIfNeededUpdateContainerState(node, container);
+ removeContainerIfNeededUpdateContainerState(context, container);
context.log(logger, "State is " + node.getState() + ", will delete application storage and mark node as ready");
athenzCredentialsMaintainer.ifPresent(maintainer -> maintainer.clearCredentials(context));
storageMaintainer.archiveNodeStorage(context);
- updateNodeRepoWithCurrentAttributes(node);
+ updateNodeRepoWithCurrentAttributes(context);
nodeRepository.setNodeState(context.hostname().value(), Node.State.ready);
- expectNodeNotInNodeRepo = true;
break;
default:
throw new RuntimeException("UNKNOWN STATE " + node.getState().name());
}
}
- private void logChangesToNodeSpec(NodeSpec lastNode, NodeSpec node) {
+ private static void logChangesToNodeSpec(NodeAgentContext context, NodeSpec lastNode, NodeSpec node) {
StringBuilder builder = new StringBuilder();
appendIfDifferent(builder, "state", lastNode, node, NodeSpec::getState);
if (builder.length() > 0) {
@@ -572,7 +490,7 @@ public class NodeAgentImpl implements NodeAgent {
return value == null ? "[absent]" : value.toString();
}
- private <T> void appendIfDifferent(StringBuilder builder, String name, NodeSpec oldNode, NodeSpec newNode, Function<NodeSpec, T> getter) {
+ private static <T> void appendIfDifferent(StringBuilder builder, String name, NodeSpec oldNode, NodeSpec newNode, Function<NodeSpec, T> getter) {
T oldValue = oldNode == null ? null : getter.apply(oldNode);
T newValue = getter.apply(newNode);
if (!Objects.equals(oldValue, newValue)) {
@@ -592,8 +510,9 @@ public class NodeAgentImpl implements NodeAgent {
@SuppressWarnings("unchecked")
public void updateContainerNodeMetrics() {
- final NodeSpec node = lastNode;
- if (node == null || containerState != UNKNOWN) return;
+ if (containerState != UNKNOWN) return;
+ final NodeAgentContext context = contextSupplier.currentContext();
+ final NodeSpec node = context.node();
Optional<ContainerStats> containerStats = dockerOperations.getContainerStats(context);
if (!containerStats.isPresent()) return;
@@ -660,10 +579,10 @@ public class NodeAgentImpl implements NodeAgent {
metrics.add(networkMetrics);
});
- pushMetricsToContainer(metrics);
+ pushMetricsToContainer(context, metrics);
}
- private void pushMetricsToContainer(List<DimensionMetrics> metrics) {
+ private void pushMetricsToContainer(NodeAgentContext context, List<DimensionMetrics> metrics) {
StringBuilder params = new StringBuilder();
try {
for (DimensionMetrics dimensionMetrics : metrics) {
@@ -679,7 +598,7 @@ public class NodeAgentImpl implements NodeAgent {
}
}
- private Optional<Container> getContainer() {
+ private Optional<Container> getContainer(NodeAgentContext context) {
if (containerState == ABSENT) return Optional.empty();
Optional<Container> container = dockerOperations.getContainer(context);
if (! container.isPresent()) containerState = ABSENT;
@@ -743,12 +662,12 @@ public class NodeAgentImpl implements NodeAgent {
// More generally, the node repo response should contain sufficient info on what the docker image is,
// to allow the node admin to make decisions that depend on the docker image. Or, each docker image
// needs to contain routines for drain and suspend. For many images, these can just be dummy routines.
- private void orchestratorSuspendNode() {
+ private void orchestratorSuspendNode(NodeAgentContext context) {
context.log(logger, "Ask Orchestrator for permission to suspend node");
orchestrator.suspend(context.hostname().value());
}
- protected ContainerData createContainerData(NodeAgentContext context, NodeSpec node) {
+ protected ContainerData createContainerData(NodeAgentContext context) {
return (pathInContainer, data) -> {
throw new UnsupportedOperationException("addFile not implemented");
};
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java
new file mode 100644
index 00000000000..540601ffa4f
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java
@@ -0,0 +1,21 @@
+// Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.nodeagent;
+
+import java.time.Duration;
+
+/**
+ * @author freva
+ */
+public interface NodeAgentScheduler {
+
+ /** Schedule a tick for NodeAgent to run with the given NodeAgentContext */
+ void scheduleTickWith(NodeAgentContext context);
+
+ /**
+ * Will eventually freeze/unfreeze the node agent
+ * @param frozen whether node agent should be frozen
+ * @param timeout maximum duration this method should block while waiting for NodeAgent to reach target state
+ * @return True if node agent has converged to the desired state
+ */
+ boolean setFrozen(boolean frozen, Duration timeout);
+}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepositoryTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepositoryTest.java
new file mode 100644
index 00000000000..c9e4e33f8bb
--- /dev/null
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepositoryTest.java
@@ -0,0 +1,41 @@
+// Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.configserver.flags;
+
+import com.yahoo.vespa.flags.FlagId;
+import com.yahoo.vespa.flags.json.FlagData;
+import com.yahoo.vespa.flags.json.wire.WireFlagData;
+import com.yahoo.vespa.flags.json.wire.WireFlagDataList;
+import com.yahoo.vespa.hosted.node.admin.configserver.ConfigServerApi;
+import org.hamcrest.collection.IsMapContaining;
+import org.hamcrest.collection.IsMapWithSize;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * @author hakonhall
+ */
+public class RealFlagRepositoryTest {
+ private final ConfigServerApi configServerApi = mock(ConfigServerApi.class);
+ private final RealFlagRepository repository = new RealFlagRepository(configServerApi);
+
+ @Test
+ public void test() {
+ WireFlagDataList list = new WireFlagDataList();
+ list.flags = new ArrayList<>();
+ list.flags.add(new WireFlagData());
+ list.flags.get(0).id = "id1";
+
+ when(configServerApi.get(any(), eq(WireFlagDataList.class))).thenReturn(list);
+ Map<FlagId, FlagData> allFlagData = repository.getAllFlagData();
+ assertThat(allFlagData, IsMapWithSize.aMapWithSize(1));
+ assertThat(allFlagData, IsMapContaining.hasKey(new FlagId("id1")));
+ }
+} \ No newline at end of file
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
index cb3a1fb5e2c..109bce4c13f 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
@@ -15,8 +15,9 @@ import com.yahoo.vespa.hosted.node.admin.docker.DockerOperationsImpl;
import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater;
-import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextFactory;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImpl;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentFactory;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl;
import com.yahoo.vespa.hosted.node.admin.task.util.network.IPAddressesMock;
import com.yahoo.vespa.hosted.provision.Node;
@@ -30,7 +31,6 @@ import java.nio.file.Paths;
import java.time.Clock;
import java.time.Duration;
import java.util.Optional;
-import java.util.function.Function;
import java.util.logging.Logger;
import static com.yahoo.yolean.Exceptions.uncheck;
@@ -87,19 +87,20 @@ public class DockerTester implements AutoCloseable {
.build();
nodeRepository.updateNodeRepositoryNode(hostSpec);
- Clock clock = Clock.systemUTC();
FileSystem fileSystem = TestFileSystem.create();
DockerOperations dockerOperations = new DockerOperationsImpl(docker, processExecuter, ipAddresses);
MetricReceiverWrapper mr = new MetricReceiverWrapper(MetricReceiver.nullImplementation);
- Function<String, NodeAgent> nodeAgentFactory = (hostName) -> new NodeAgentImpl(
- new NodeAgentContextImpl.Builder(hostName).fileSystem(fileSystem).build(), nodeRepository,
- orchestrator, dockerOperations, storageMaintainer, clock, INTERVAL, Optional.empty(), Optional.empty(), Optional.empty());
- nodeAdmin = new NodeAdminImpl(nodeAgentFactory, Optional.empty(), mr, Clock.systemUTC());
+ NodeAgentFactory nodeAgentFactory = contextSupplier -> new NodeAgentImpl(
+ contextSupplier, nodeRepository,
+ orchestrator, dockerOperations, storageMaintainer, Optional.empty(), Optional.empty(), Optional.empty());
+ NodeAgentContextFactory nodeAgentContextFactory = nodeSpec ->
+ new NodeAgentContextImpl.Builder(nodeSpec).fileSystem(fileSystem).build();
+ nodeAdmin = new NodeAdminImpl(nodeAgentFactory, nodeAgentContextFactory, Optional.empty(), mr, Clock.systemUTC());
nodeAdminStateUpdater = new NodeAdminStateUpdater(nodeRepository, orchestrator,
nodeAdmin, HOST_HOSTNAME);
- this.loopThread = new Thread(() -> {
+ loopThread = new Thread(() -> {
nodeAdminStateUpdater.start();
while (! terminated) {
@@ -135,8 +136,10 @@ public class DockerTester implements AutoCloseable {
@Override
public void close() {
- terminated = true;
+ // First, stop NodeAdmin and all the NodeAgents
+ nodeAdmin.stop();
+ terminated = true;
do {
try {
loopThread.join();
@@ -144,8 +147,5 @@ public class DockerTester implements AutoCloseable {
e.printStackTrace();
}
} while (loopThread.isAlive());
-
- // Finally, stop NodeAdmin and all the NodeAgents
- nodeAdmin.stop();
}
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java
index 9ea5c87511b..05b9c413594 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java
@@ -152,12 +152,8 @@ public class StorageMaintainerTest {
}
private Path executeAs(NodeType nodeType) {
- NodeAgentContext context = new NodeAgentContextImpl.Builder("host123-5.test.domain.tld")
- .nodeType(nodeType)
- .fileSystem(TestFileSystem.create())
- .zoneId(new ZoneId(SystemName.dev, Environment.prod, RegionName.from("us-north-1"))).build();
NodeSpec nodeSpec = new NodeSpec.Builder()
- .hostname(context.hostname().value())
+ .hostname("host123-5.test.domain.tld")
.nodeType(nodeType)
.state(Node.State.active)
.parentHostname("host123.test.domain.tld")
@@ -167,9 +163,12 @@ public class StorageMaintainerTest {
.flavor("d-2-8-50")
.canonicalFlavor("d-2-8-50")
.build();
+ NodeAgentContext context = new NodeAgentContextImpl.Builder(nodeSpec)
+ .fileSystem(TestFileSystem.create())
+ .zoneId(new ZoneId(SystemName.dev, Environment.prod, RegionName.from("us-north-1"))).build();
Path path = context.pathOnHostFromPathInNode("/etc/yamas-agent");
uncheck(() -> Files.createDirectories(path));
- storageMaintainer.writeMetricsConfig(context, nodeSpec);
+ storageMaintainer.writeMetricsConfig(context);
return path;
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
index 3860e2e9780..47e220a968b 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
@@ -1,22 +1,23 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.nodeadmin;
+import com.yahoo.config.provision.NodeType;
import com.yahoo.metrics.simple.MetricReceiver;
import com.yahoo.test.ManualClock;
import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
-import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent;
-import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextFactory;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImpl;
+import com.yahoo.vespa.hosted.provision.Node;
import org.junit.Test;
import org.mockito.InOrder;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashSet;
import java.util.List;
import java.util.Optional;
-import java.util.Set;
-import java.util.function.Function;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -31,75 +32,72 @@ import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
import static org.mockito.Mockito.when;
+import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl.NodeAgentWithScheduler;
+import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl.NodeAgentWithSchedulerFactory;
+
/**
* @author bakksjo
*/
public class NodeAdminImplTest {
- // Trick to allow mocking of typed interface without casts/warnings.
- private interface NodeAgentFactory extends Function<String, NodeAgent> {}
- private final Function<String, NodeAgent> nodeAgentFactory = mock(NodeAgentFactory.class);
+
+ private final NodeAgentWithSchedulerFactory nodeAgentWithSchedulerFactory = mock(NodeAgentWithSchedulerFactory.class);
+ private final NodeAgentContextFactory nodeAgentContextFactory = mock(NodeAgentContextFactory.class);
private final ManualClock clock = new ManualClock();
- private final NodeAdminImpl nodeAdmin = new NodeAdminImpl(nodeAgentFactory, Optional.empty(),
- new MetricReceiverWrapper(MetricReceiver.nullImplementation), clock);
+ private final NodeAdminImpl nodeAdmin = new NodeAdminImpl(nodeAgentWithSchedulerFactory, nodeAgentContextFactory,
+ Optional.empty(), new MetricReceiverWrapper(MetricReceiver.nullImplementation), clock);
@Test
public void nodeAgentsAreProperlyLifeCycleManaged() {
- final String hostName1 = "host1.test.yahoo.com";
- final String hostName2 = "host2.test.yahoo.com";
- final NodeAgent nodeAgent1 = mock(NodeAgentImpl.class);
- final NodeAgent nodeAgent2 = mock(NodeAgentImpl.class);
- when(nodeAgentFactory.apply(eq(hostName1))).thenReturn(nodeAgent1);
- when(nodeAgentFactory.apply(eq(hostName2))).thenReturn(nodeAgent2);
+ final NodeSpec nodeSpec1 = createNodeSpec("host1.test.yahoo.com");
+ final NodeSpec nodeSpec2 = createNodeSpec("host2.test.yahoo.com");
+ final NodeAgentWithScheduler nodeAgent1 = mockNodeAgentWithSchedulerFactory(nodeSpec1);
+ final NodeAgentWithScheduler nodeAgent2 = mockNodeAgentWithSchedulerFactory(nodeSpec2);
+ final InOrder inOrder = inOrder(nodeAgentWithSchedulerFactory, nodeAgent1, nodeAgent2);
+ nodeAdmin.refreshContainersToRun(Collections.emptyList());
+ verifyNoMoreInteractions(nodeAgentWithSchedulerFactory);
- final InOrder inOrder = inOrder(nodeAgentFactory, nodeAgent1, nodeAgent2);
- nodeAdmin.synchronizeNodesToNodeAgents(Collections.emptySet());
- verifyNoMoreInteractions(nodeAgentFactory);
-
- nodeAdmin.synchronizeNodesToNodeAgents(Collections.singleton(hostName1));
- inOrder.verify(nodeAgentFactory).apply(hostName1);
+ nodeAdmin.refreshContainersToRun(Collections.singletonList(nodeSpec1));
inOrder.verify(nodeAgent1).start();
+ inOrder.verify(nodeAgent2, never()).start();
inOrder.verify(nodeAgent1, never()).stop();
- nodeAdmin.synchronizeNodesToNodeAgents(Collections.singleton(hostName1));
- inOrder.verify(nodeAgentFactory, never()).apply(any(String.class));
+ nodeAdmin.refreshContainersToRun(Collections.singletonList(nodeSpec1));
+ inOrder.verify(nodeAgentWithSchedulerFactory, never()).create(any());
inOrder.verify(nodeAgent1, never()).start();
inOrder.verify(nodeAgent1, never()).stop();
- nodeAdmin.synchronizeNodesToNodeAgents(Collections.emptySet());
- inOrder.verify(nodeAgentFactory, never()).apply(any(String.class));
+ nodeAdmin.refreshContainersToRun(Collections.emptyList());
+ inOrder.verify(nodeAgentWithSchedulerFactory, never()).create(any());
verify(nodeAgent1).stop();
- nodeAdmin.synchronizeNodesToNodeAgents(Collections.singleton(hostName2));
- inOrder.verify(nodeAgentFactory).apply(hostName2);
+ nodeAdmin.refreshContainersToRun(Collections.singletonList(nodeSpec2));
inOrder.verify(nodeAgent2).start();
inOrder.verify(nodeAgent2, never()).stop();
- verify(nodeAgent1).stop();
+ inOrder.verify(nodeAgent1, never()).stop();
- nodeAdmin.synchronizeNodesToNodeAgents(Collections.emptySet());
- inOrder.verify(nodeAgentFactory, never()).apply(any(String.class));
+ nodeAdmin.refreshContainersToRun(Collections.emptyList());
+ inOrder.verify(nodeAgentWithSchedulerFactory, never()).create(any());
inOrder.verify(nodeAgent2, never()).start();
inOrder.verify(nodeAgent2).stop();
-
- verifyNoMoreInteractions(nodeAgent1);
- verifyNoMoreInteractions(nodeAgent2);
+ inOrder.verify(nodeAgent1, never()).start();
+ inOrder.verify(nodeAgent1, never()).stop();
}
@Test
public void testSetFrozen() {
- List<NodeAgent> nodeAgents = new ArrayList<>();
- Set<String> existingContainerHostnames = new HashSet<>();
+ List<NodeSpec> nodeSpecs = new ArrayList<>();
+ List<NodeAgentWithScheduler> nodeAgents = new ArrayList<>();
for (int i = 0; i < 3; i++) {
- final String hostName = "host" + i + ".test.yahoo.com";
- NodeAgent nodeAgent = mock(NodeAgent.class);
- nodeAgents.add(nodeAgent);
- when(nodeAgentFactory.apply(eq(hostName))).thenReturn(nodeAgent);
+ NodeSpec nodeSpec = createNodeSpec("host" + i + ".test.yahoo.com");
+ NodeAgentWithScheduler nodeAgent = mockNodeAgentWithSchedulerFactory(nodeSpec);
- existingContainerHostnames.add(hostName);
+ nodeSpecs.add(nodeSpec);
+ nodeAgents.add(nodeAgent);
}
- nodeAdmin.synchronizeNodesToNodeAgents(existingContainerHostnames);
+ nodeAdmin.refreshContainersToRun(nodeSpecs);
assertTrue(nodeAdmin.isFrozen()); // Initially everything is frozen to force convergence
mockNodeAgentSetFrozenResponse(nodeAgents, true, true, true);
@@ -155,10 +153,28 @@ public class NodeAdminImplTest {
assertEquals(Duration.ofSeconds(1), nodeAdmin.subsystemFreezeDuration());
}
- private void mockNodeAgentSetFrozenResponse(List<NodeAgent> nodeAgents, boolean... responses) {
+ private void mockNodeAgentSetFrozenResponse(List<NodeAgentWithScheduler> nodeAgents, boolean... responses) {
for (int i = 0; i < nodeAgents.size(); i++) {
- NodeAgent nodeAgent = nodeAgents.get(i);
- when(nodeAgent.setFrozen(anyBoolean())).thenReturn(responses[i]);
+ NodeAgentWithScheduler nodeAgent = nodeAgents.get(i);
+ when(nodeAgent.setFrozen(anyBoolean(), any())).thenReturn(responses[i]);
}
}
+
+ private NodeSpec createNodeSpec(String hostname) {
+ return new NodeSpec.Builder()
+ .hostname(hostname)
+ .state(Node.State.active)
+ .nodeType(NodeType.tenant)
+ .flavor("default")
+ .build();
+ }
+
+ private NodeAgentWithScheduler mockNodeAgentWithSchedulerFactory(NodeSpec nodeSpec) {
+ NodeAgentContext context = new NodeAgentContextImpl.Builder(nodeSpec).build();
+ when(nodeAgentContextFactory.create(eq(nodeSpec))).thenReturn(context);
+
+ NodeAgentWithScheduler nodeAgentWithScheduler = mock(NodeAgentWithScheduler.class);
+ when(nodeAgentWithSchedulerFactory.create(eq(context))).thenReturn(nodeAgentWithScheduler);
+ return nodeAgentWithScheduler;
+ }
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java
index 437195ca6d5..74ba5561c8e 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java
@@ -42,7 +42,7 @@ public class NodeAdminStateUpdaterTest {
private final NodeAdmin nodeAdmin = mock(NodeAdmin.class);
private final HostName hostHostname = HostName.from("basehost1.test.yahoo.com");
- private final NodeAdminStateUpdater refresher = spy(new NodeAdminStateUpdater(
+ private final NodeAdminStateUpdater updater = spy(new NodeAdminStateUpdater(
nodeRepository, orchestrator, nodeAdmin, hostHostname));
@@ -58,19 +58,19 @@ public class NodeAdminStateUpdaterTest {
{
// Initially everything is frozen to force convergence
- assertResumeStateError(RESUMED, "NodeAdmin is not yet unfrozen");
+ assertConvergeError(RESUMED, "NodeAdmin is not yet unfrozen");
when(nodeAdmin.setFrozen(eq(false))).thenReturn(true);
- refresher.converge(RESUMED);
+ updater.converge(RESUMED);
verify(orchestrator, times(1)).resume(hostHostname.value());
// We are already resumed, so this should return without resuming again
- refresher.converge(RESUMED);
+ updater.converge(RESUMED);
verify(orchestrator, times(1)).resume(hostHostname.value());
verify(nodeAdmin, times(2)).setFrozen(eq(false));
// Lets try to suspend node admin only
when(nodeAdmin.setFrozen(eq(true))).thenReturn(false);
- assertResumeStateError(SUSPENDED_NODE_ADMIN, "NodeAdmin is not yet frozen");
+ assertConvergeError(SUSPENDED_NODE_ADMIN, "NodeAdmin is not yet frozen");
verify(nodeAdmin, times(2)).setFrozen(eq(false));
}
@@ -81,10 +81,10 @@ public class NodeAdminStateUpdaterTest {
when(nodeAdmin.setFrozen(eq(true))).thenReturn(true);
doThrow(new RuntimeException(exceptionMessage)).doNothing()
.when(orchestrator).suspend(eq(hostHostname.value()));
- assertResumeStateError(SUSPENDED_NODE_ADMIN, exceptionMessage);
+ assertConvergeError(SUSPENDED_NODE_ADMIN, exceptionMessage);
verify(nodeAdmin, times(2)).setFrozen(eq(false));
- refresher.converge(SUSPENDED_NODE_ADMIN);
+ updater.converge(SUSPENDED_NODE_ADMIN);
verify(nodeAdmin, times(2)).setFrozen(eq(false));
}
@@ -93,13 +93,13 @@ public class NodeAdminStateUpdaterTest {
final String exceptionMessage = "Failed to stop services";
verify(orchestrator, times(0)).suspend(eq(hostHostname.value()), eq(suspendHostnames));
doThrow(new RuntimeException(exceptionMessage)).doNothing().when(nodeAdmin).stopNodeAgentServices(eq(activeHostnames));
- assertResumeStateError(SUSPENDED, exceptionMessage);
+ assertConvergeError(SUSPENDED, exceptionMessage);
verify(orchestrator, times(1)).suspend(eq(hostHostname.value()), eq(suspendHostnames));
// Make sure we dont roll back if we fail to stop services - we will try to stop again next tick
verify(nodeAdmin, times(2)).setFrozen(eq(false));
// Finally we are successful in transitioning to frozen
- refresher.converge(SUSPENDED);
+ updater.converge(SUSPENDED);
}
}
@@ -110,31 +110,38 @@ public class NodeAdminStateUpdaterTest {
// Initially everything is frozen to force convergence
when(nodeAdmin.setFrozen(eq(false))).thenReturn(true);
- refresher.converge(RESUMED);
+ updater.converge(RESUMED);
verify(nodeAdmin, times(1)).setFrozen(eq(false));
+ verify(nodeAdmin, times(1)).refreshContainersToRun(any());
// Let's start suspending, we are able to freeze the nodes, but orchestrator denies suspension
when(nodeAdmin.subsystemFreezeDuration()).thenReturn(Duration.ofSeconds(1));
when(nodeAdmin.setFrozen(eq(true))).thenReturn(true);
doThrow(new RuntimeException(exceptionMsg)).when(orchestrator).suspend(eq(hostHostname.value()));
- assertResumeStateError(SUSPENDED_NODE_ADMIN, exceptionMsg);
+ assertConvergeError(SUSPENDED_NODE_ADMIN, exceptionMsg);
verify(nodeAdmin, times(1)).setFrozen(eq(true));
- assertResumeStateError(SUSPENDED_NODE_ADMIN, exceptionMsg);
+ verify(orchestrator, times(1)).suspend(eq(hostHostname.value()));
+ assertConvergeError(SUSPENDED_NODE_ADMIN, exceptionMsg);
verify(nodeAdmin, times(2)).setFrozen(eq(true));
- assertResumeStateError(SUSPENDED_NODE_ADMIN, exceptionMsg);
+ verify(orchestrator, times(2)).suspend(eq(hostHostname.value()));
+ assertConvergeError(SUSPENDED_NODE_ADMIN, exceptionMsg);
verify(nodeAdmin, times(3)).setFrozen(eq(true));
- verify(nodeAdmin, times(1)).setFrozen(eq(false)); // No new unfreezes during last 2 ticks
+ verify(orchestrator, times(3)).suspend(eq(hostHostname.value()));
+
+ // No new unfreezes nor refresh while trying to freeze
+ verify(nodeAdmin, times(1)).setFrozen(eq(false));
verify(nodeAdmin, times(1)).refreshContainersToRun(any());
// Only resume and fetch containers when subsystem freeze duration expires
when(nodeAdmin.subsystemFreezeDuration()).thenReturn(Duration.ofHours(1));
- assertResumeStateError(SUSPENDED_NODE_ADMIN, exceptionMsg);
+ assertConvergeError(SUSPENDED_NODE_ADMIN, "Timed out trying to freeze all nodes: will force an unfrozen tick");
verify(nodeAdmin, times(2)).setFrozen(eq(false));
+ verify(orchestrator, times(3)).suspend(eq(hostHostname.value())); // no new suspend calls
verify(nodeAdmin, times(2)).refreshContainersToRun(any());
// We change our mind, want to remain resumed
- refresher.converge(RESUMED);
+ updater.converge(RESUMED);
verify(nodeAdmin, times(3)).setFrozen(eq(false)); // Make sure that we unfreeze!
}
@@ -146,24 +153,24 @@ public class NodeAdminStateUpdaterTest {
// Resume and suspend only require that node-agents are frozen and permission from
// orchestrator to resume/suspend host. Therefore, if host is not active, we only need to freeze.
- refresher.converge(RESUMED);
+ updater.converge(RESUMED);
verify(orchestrator, never()).resume(eq(hostHostname.value()));
- refresher.converge(SUSPENDED_NODE_ADMIN);
+ updater.converge(SUSPENDED_NODE_ADMIN);
verify(orchestrator, never()).suspend(eq(hostHostname.value()));
// When doing batch suspend, only suspend the containers if the host is not active
List<String> activeHostnames = nodeRepository.getNodes(hostHostname.value()).stream()
.map(NodeSpec::getHostname)
.collect(Collectors.toList());
- refresher.converge(SUSPENDED);
+ updater.converge(SUSPENDED);
verify(orchestrator, times(1)).suspend(eq(hostHostname.value()), eq(activeHostnames));
}
- private void assertResumeStateError(NodeAdminStateUpdater.State targetState, String reason) {
+ private void assertConvergeError(NodeAdminStateUpdater.State targetState, String reason) {
try {
- refresher.converge(targetState);
- fail("Expected set resume state to fail with \"" + reason + "\", but it succeeded without error");
+ updater.converge(targetState);
+ fail("Expected converging to " + targetState + " to fail with \"" + reason + "\", but it succeeded without error");
} catch (RuntimeException e) {
assertEquals(reason, e.getMessage());
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java
new file mode 100644
index 00000000000..f32e3d91e34
--- /dev/null
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java
@@ -0,0 +1,142 @@
+// Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.nodeagent;
+
+import org.junit.Test;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.util.Optional;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author freva
+ */
+public class NodeAgentContextManagerTest {
+
+ private static final int TIMEOUT = 10_000;
+
+ private final Clock clock = Clock.systemUTC();
+ private final NodeAgentContext initialContext = generateContext();
+ private final NodeAgentContextManager manager = new NodeAgentContextManager(clock, initialContext);
+
+ @Test(timeout = TIMEOUT)
+ public void returns_immediately_if_next_context_is_ready() throws InterruptedException {
+ NodeAgentContext context1 = generateContext();
+ manager.scheduleTickWith(context1);
+
+ assertSame(initialContext, manager.currentContext());
+ assertSame(context1, manager.nextContext());
+ assertSame(context1, manager.currentContext());
+ }
+
+ @Test(timeout = TIMEOUT)
+ public void blocks_in_nextContext_until_one_is_scheduled() throws InterruptedException {
+ AsyncExecutor<NodeAgentContext> async = new AsyncExecutor<>(manager::nextContext);
+ assertFalse(async.response.isPresent());
+ Thread.sleep(10);
+ assertFalse(async.response.isPresent());
+
+ NodeAgentContext context1 = generateContext();
+ manager.scheduleTickWith(context1);
+
+ async.awaitResult();
+ assertEquals(Optional.of(context1), async.response);
+ assertFalse(async.exception.isPresent());
+ }
+
+ @Test(timeout = TIMEOUT)
+ public void blocks_in_nextContext_until_interrupt() throws InterruptedException {
+ AsyncExecutor<NodeAgentContext> async = new AsyncExecutor<>(manager::nextContext);
+ assertFalse(async.response.isPresent());
+ Thread.sleep(10);
+ assertFalse(async.response.isPresent());
+
+ manager.interrupt();
+
+ async.awaitResult();
+ assertEquals(Optional.of(InterruptedException.class), async.exception.map(Exception::getClass));
+ assertFalse(async.response.isPresent());
+ }
+
+ @Test(timeout = TIMEOUT)
+ public void setFrozen_does_not_block_with_no_timeout() throws InterruptedException {
+ assertFalse(manager.setFrozen(false, Duration.ZERO));
+
+ // Generate new context and get it from the supplier, this completes the unfreeze
+ NodeAgentContext context1 = generateContext();
+ manager.scheduleTickWith(context1);
+ assertSame(context1, manager.nextContext());
+
+ assertTrue(manager.setFrozen(false, Duration.ZERO));
+ }
+
+ @Test(timeout = TIMEOUT)
+ public void setFrozen_blocks_at_least_for_duration_of_timeout() {
+ long wantedDurationMillis = 100;
+ long start = clock.millis();
+ assertFalse(manager.setFrozen(false, Duration.ofMillis(wantedDurationMillis)));
+ long actualDurationMillis = clock.millis() - start;
+
+ assertTrue(actualDurationMillis >= wantedDurationMillis);
+ }
+
+ @Test(timeout = TIMEOUT)
+ public void setFrozen_is_successful_if_converged_in_time() throws InterruptedException {
+ AsyncExecutor<Boolean> async = new AsyncExecutor<>(() -> manager.setFrozen(false, Duration.ofMillis(500)));
+
+ assertFalse(async.response.isPresent());
+
+ NodeAgentContext context1 = generateContext();
+ manager.scheduleTickWith(context1);
+ assertSame(context1, manager.nextContext());
+
+ async.awaitResult();
+ assertEquals(Optional.of(true), async.response);
+ assertFalse(async.exception.isPresent());
+ }
+
+ private static NodeAgentContext generateContext() {
+ return new NodeAgentContextImpl.Builder("container-123.domain.tld").build();
+ }
+
+ private class AsyncExecutor<T> {
+ private final Object monitor = new Object();
+ private final Thread thread;
+ private volatile Optional<T> response = Optional.empty();
+ private volatile Optional<Exception> exception = Optional.empty();
+ private boolean completed = false;
+
+ private AsyncExecutor(ThrowingSupplier<T> supplier) {
+ this.thread = new Thread(() -> {
+ try {
+ response = Optional.of(supplier.get());
+ } catch (Exception e) {
+ exception = Optional.of(e);
+ }
+ synchronized (monitor) {
+ completed = true;
+ monitor.notifyAll();
+ }
+ });
+ this.thread.start();
+ }
+
+ private void awaitResult() {
+ synchronized (monitor) {
+ while (!completed) {
+ try {
+ monitor.wait();
+ } catch (InterruptedException ignored) { }
+ }
+ }
+ }
+ }
+
+ private interface ThrowingSupplier<T> {
+ T get() throws Exception;
+ }
+} \ No newline at end of file
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index b6128fc8693..e392ac34414 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -4,8 +4,8 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.yahoo.config.provision.NodeType;
import com.yahoo.metrics.simple.MetricReceiver;
-import com.yahoo.test.ManualClock;
import com.yahoo.vespa.hosted.dockerapi.Container;
+import com.yahoo.vespa.hosted.dockerapi.ContainerName;
import com.yahoo.vespa.hosted.dockerapi.ContainerResources;
import com.yahoo.vespa.hosted.dockerapi.ContainerStats;
import com.yahoo.vespa.hosted.dockerapi.exception.DockerException;
@@ -13,6 +13,7 @@ import com.yahoo.vespa.hosted.dockerapi.DockerImage;
import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeAttributes;
+import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException;
import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations;
import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer;
import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer;
@@ -28,7 +29,6 @@ import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
-import java.time.Duration;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
@@ -36,8 +36,6 @@ import java.util.Set;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
@@ -56,14 +54,21 @@ import static org.mockito.Mockito.when;
* @author Øyvind Bakksjø
*/
public class NodeAgentImplTest {
- private static final Duration NODE_AGENT_SCAN_INTERVAL = Duration.ofSeconds(30);
private static final double MIN_CPU_CORES = 2;
private static final double MIN_MAIN_MEMORY_AVAILABLE_GB = 16;
private static final double MIN_DISK_AVAILABLE_GB = 250;
private static final String vespaVersion = "1.2.3";
private final String hostName = "host1.test.yahoo.com";
- private final NodeAgentContext context = new NodeAgentContextImpl.Builder(hostName).build();
+ private final NodeSpec.Builder nodeBuilder = new NodeSpec.Builder()
+ .hostname(hostName)
+ .nodeType(NodeType.tenant)
+ .flavor("docker")
+ .minCpuCores(MIN_CPU_CORES)
+ .minMainMemoryAvailableGb(MIN_MAIN_MEMORY_AVAILABLE_GB)
+ .minDiskAvailableGb(MIN_DISK_AVAILABLE_GB);
+
+ private final NodeAgentContextSupplier contextSupplier = mock(NodeAgentContextSupplier.class);
private final DockerImage dockerImage = new DockerImage("dockerImage");
private final DockerOperations dockerOperations = mock(DockerOperations.class);
private final NodeRepository nodeRepository = mock(NodeRepository.class);
@@ -76,16 +81,6 @@ public class NodeAgentImplTest {
Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
private final AthenzCredentialsMaintainer athenzCredentialsMaintainer = mock(AthenzCredentialsMaintainer.class);
- private final ManualClock clock = new ManualClock();
-
- private final NodeSpec.Builder nodeBuilder = new NodeSpec.Builder()
- .hostname(context.hostname().value())
- .nodeType(NodeType.tenant)
- .flavor("docker")
- .minCpuCores(MIN_CPU_CORES)
- .minMainMemoryAvailableGb(MIN_MAIN_MEMORY_AVAILABLE_GB)
- .minDiskAvailableGb(MIN_DISK_AVAILABLE_GB);
-
@Test
public void upToDateContainerIsUntouched() {
@@ -97,11 +92,12 @@ public class NodeAgentImplTest {
.vespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
when(storageMaintainer.getDiskUsageFor(eq(context))).thenReturn(Optional.of(187500000000L));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
verify(dockerOperations, never()).removeContainer(eq(context), any());
verify(orchestrator, never()).suspend(any(String.class));
@@ -125,11 +121,12 @@ public class NodeAgentImplTest {
.vespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
when(storageMaintainer.getDiskUsageFor(eq(context))).thenReturn(Optional.of(217432719360L));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
verify(storageMaintainer, times(1)).removeOldFilesFromNode(eq(context));
}
@@ -145,27 +142,28 @@ public class NodeAgentImplTest {
.vespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
when(storageMaintainer.getDiskUsageFor(eq(context))).thenReturn(Optional.of(187500000000L));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
inOrder.verify(dockerOperations, never()).startServices(eq(context));
inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context));
nodeAgent.suspend();
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
inOrder.verify(dockerOperations, never()).startServices(eq(context));
inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context)); // Expect a resume, but no start services
// No new suspends/stops, so no need to resume/start
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
inOrder.verify(dockerOperations, never()).startServices(eq(context));
inOrder.verify(dockerOperations, never()).resumeNode(eq(context));
nodeAgent.suspend();
nodeAgent.stopServices();
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
inOrder.verify(dockerOperations, times(1)).startServices(eq(context));
inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context));
}
@@ -181,13 +179,14 @@ public class NodeAgentImplTest {
.currentRestartGeneration(restartGeneration.get())
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
when(dockerOperations.pullImageAsyncIfNeeded(eq(dockerImage))).thenReturn(false);
when(storageMaintainer.getDiskUsageFor(eq(context))).thenReturn(Optional.of(201326592000L));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
verify(dockerOperations, never()).removeContainer(eq(context), any());
verify(dockerOperations, never()).startServices(any());
@@ -195,7 +194,7 @@ public class NodeAgentImplTest {
final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository, aclMaintainer, healthChecker);
inOrder.verify(dockerOperations, times(1)).pullImageAsyncIfNeeded(eq(dockerImage));
- inOrder.verify(dockerOperations, times(1)).createContainer(eq(context), eq(node), any());
+ inOrder.verify(dockerOperations, times(1)).createContainer(eq(context), any());
inOrder.verify(dockerOperations, times(1)).startContainer(eq(context));
inOrder.verify(aclMaintainer, times(1)).converge();
inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context));
@@ -216,13 +215,14 @@ public class NodeAgentImplTest {
.vespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
when(dockerOperations.pullImageAsyncIfNeeded(any())).thenReturn(true);
when(storageMaintainer.getDiskUsageFor(eq(context))).thenReturn(Optional.of(201326592000L));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
verify(orchestrator, never()).suspend(any(String.class));
verify(orchestrator, never()).resume(any(String.class));
@@ -241,29 +241,25 @@ public class NodeAgentImplTest {
.wantedVespaVersion(vespaVersion)
.vespaVersion(vespaVersion);
+ NodeAgentContext firstContext = createContext(specBuilder.build());
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
- NodeSpec firstSpec = specBuilder.build();
- NodeSpec secondSpec = specBuilder.minDiskAvailableGb(200).build();
- NodeSpec thirdSpec = specBuilder.minCpuCores(4).build();
-
- when(nodeRepository.getOptionalNode(hostName))
- .thenReturn(Optional.of(firstSpec))
- .thenReturn(Optional.of(secondSpec))
- .thenReturn(Optional.of(thirdSpec));
+
when(dockerOperations.pullImageAsyncIfNeeded(any())).thenReturn(true);
- when(storageMaintainer.getDiskUsageFor(eq(context))).thenReturn(Optional.of(201326592000L));
+ when(storageMaintainer.getDiskUsageFor(any())).thenReturn(Optional.of(201326592000L));
- nodeAgent.converge();
- nodeAgent.converge();
- nodeAgent.converge();
+ nodeAgent.doConverge(firstContext);
+ NodeAgentContext secondContext = createContext(specBuilder.minDiskAvailableGb(200).build());
+ nodeAgent.doConverge(secondContext);
+ NodeAgentContext thirdContext = createContext(specBuilder.minCpuCores(4).build());
+ nodeAgent.doConverge(thirdContext);
InOrder inOrder = inOrder(orchestrator, dockerOperations);
inOrder.verify(orchestrator).resume(any(String.class));
inOrder.verify(orchestrator).resume(any(String.class));
inOrder.verify(orchestrator).suspend(any(String.class));
- inOrder.verify(dockerOperations).removeContainer(eq(context), any());
- inOrder.verify(dockerOperations, times(1)).createContainer(eq(context), eq(thirdSpec), any());
- inOrder.verify(dockerOperations).startContainer(eq(context));
+ inOrder.verify(dockerOperations).removeContainer(eq(thirdContext), any());
+ inOrder.verify(dockerOperations, times(1)).createContainer(eq(thirdContext), any());
+ inOrder.verify(dockerOperations).startContainer(eq(thirdContext));
inOrder.verify(orchestrator).resume(any(String.class));
}
@@ -281,14 +277,16 @@ public class NodeAgentImplTest {
.currentRestartGeneration(currentRestartGeneration)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+ doThrow(new OrchestratorException("Denied")).when(orchestrator).suspend(eq(hostName));
try {
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
fail("Expected to throw an exception");
- } catch (Exception ignored) { }
+ } catch (OrchestratorException ignored) { }
- verify(dockerOperations, never()).createContainer(eq(context), eq(node), any());
+ verify(dockerOperations, never()).createContainer(eq(context), any());
verify(dockerOperations, never()).startContainer(eq(context));
verify(orchestrator, never()).resume(any(String.class));
verify(nodeRepository, never()).updateNodeAttributes(any(String.class), any(NodeAttributes.class));
@@ -308,6 +306,7 @@ public class NodeAgentImplTest {
.currentRebootGeneration(currentRebootGeneration)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
@@ -317,22 +316,22 @@ public class NodeAgentImplTest {
.when(healthChecker).verifyHealth(eq(context));
try {
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
} catch (ConvergenceException ignored) {}
// First time we fail to resume because health verification fails
verify(orchestrator, times(1)).suspend(eq(hostName));
verify(dockerOperations, times(1)).removeContainer(eq(context), any());
- verify(dockerOperations, times(1)).createContainer(eq(context), eq(node), any());
+ verify(dockerOperations, times(1)).createContainer(eq(context), any());
verify(dockerOperations, times(1)).startContainer(eq(context));
verify(orchestrator, never()).resume(eq(hostName));
verify(nodeRepository, never()).updateNodeAttributes(any(), any());
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
// Do not reboot the container again
verify(dockerOperations, times(1)).removeContainer(eq(context), any());
- verify(dockerOperations, times(1)).createContainer(eq(context), eq(node), any());
+ verify(dockerOperations, times(1)).createContainer(eq(context), any());
verify(orchestrator, times(1)).resume(eq(hostName));
verify(nodeRepository, times(1)).updateNodeAttributes(eq(hostName), eq(new NodeAttributes()
.withRebootGeneration(wantedRebootGeneration)));
@@ -348,11 +347,12 @@ public class NodeAgentImplTest {
.vespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
verify(dockerOperations, never()).removeContainer(eq(context), any());
verify(orchestrator, never()).resume(any(String.class));
@@ -365,18 +365,19 @@ public class NodeAgentImplTest {
.state(Node.State.ready)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(null,false);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
- nodeAgent.converge();
- nodeAgent.converge();
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
+ nodeAgent.doConverge(context);
+ nodeAgent.doConverge(context);
// Should only be called once, when we initialize
verify(dockerOperations, times(1)).getContainer(eq(context));
verify(dockerOperations, never()).removeContainer(eq(context), any());
- verify(dockerOperations, never()).createContainer(eq(context), eq(node), any());
+ verify(dockerOperations, never()).createContainer(eq(context), any());
verify(dockerOperations, never()).startContainer(eq(context));
verify(orchestrator, never()).resume(any(String.class));
verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any());
@@ -392,11 +393,12 @@ public class NodeAgentImplTest {
.vespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
final InOrder inOrder = inOrder(storageMaintainer, dockerOperations);
inOrder.verify(dockerOperations, never()).removeContainer(eq(context), any());
@@ -413,11 +415,12 @@ public class NodeAgentImplTest {
.wantedVespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any());
}
@@ -433,20 +436,21 @@ public class NodeAgentImplTest {
.state(nodeState)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
final InOrder inOrder = inOrder(storageMaintainer, dockerOperations, nodeRepository);
inOrder.verify(dockerOperations, times(1)).stopServices(eq(context));
- inOrder.verify(storageMaintainer, times(1)).handleCoreDumpsForContainer(eq(context), eq(node), any());
+ inOrder.verify(storageMaintainer, times(1)).handleCoreDumpsForContainer(eq(context), any());
inOrder.verify(dockerOperations, times(1)).removeContainer(eq(context), any());
inOrder.verify(storageMaintainer, times(1)).archiveNodeStorage(eq(context));
inOrder.verify(nodeRepository, times(1)).setNodeState(eq(hostName), eq(Node.State.ready));
- verify(dockerOperations, never()).createContainer(eq(context), any(), any());
+ verify(dockerOperations, never()).createContainer(eq(context), any());
verify(dockerOperations, never()).startContainer(eq(context));
verify(dockerOperations, never()).suspendNode(eq(context));
verify(dockerOperations, times(1)).stopServices(eq(context));
@@ -474,10 +478,11 @@ public class NodeAgentImplTest {
.state(Node.State.provisioned)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
verify(nodeRepository, times(1)).setNodeState(eq(hostName), eq(Node.State.dirty));
}
@@ -490,15 +495,16 @@ public class NodeAgentImplTest {
.vespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, false);
when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node));
when(storageMaintainer.getDiskUsageFor(eq(context))).thenReturn(Optional.of(201326592000L));
- nodeAgent.tick();
+ nodeAgent.doConverge(context);
verify(dockerOperations, times(1)).removeContainer(eq(context), any());
- verify(dockerOperations, times(1)).createContainer(eq(context), eq(node), any());
+ verify(dockerOperations, times(1)).createContainer(eq(context), any());
verify(dockerOperations, times(1)).startContainer(eq(context));
}
@@ -511,6 +517,7 @@ public class NodeAgentImplTest {
.vespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node));
@@ -523,7 +530,7 @@ public class NodeAgentImplTest {
// 1st try
try {
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
fail("Expected to throw an exception");
} catch (RuntimeException ignored) { }
@@ -531,7 +538,7 @@ public class NodeAgentImplTest {
inOrder.verifyNoMoreInteractions();
// 2nd try
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
inOrder.verify(dockerOperations).resumeNode(any());
inOrder.verify(orchestrator).resume(hostName);
@@ -539,33 +546,6 @@ public class NodeAgentImplTest {
}
@Test
- public void testSetFrozen() {
- NodeAgentImpl nodeAgent = spy(makeNodeAgent(dockerImage, true));
- doNothing().when(nodeAgent).converge();
-
- nodeAgent.tick();
- verify(nodeAgent, times(1)).converge();
-
- assertFalse(nodeAgent.setFrozen(true)); // Returns true because we are not frozen until tick is called
- nodeAgent.tick();
- verify(nodeAgent, times(1)).converge(); // Frozen should be set, therefore converge is never called
-
- assertTrue(nodeAgent.setFrozen(true)); // Attempt to set frozen again, but it's already set
- clock.advance(Duration.ofSeconds(35)); // workToDoNow is no longer set, so we need to wait the regular time
- nodeAgent.tick();
- verify(nodeAgent, times(1)).converge();
-
- assertFalse(nodeAgent.setFrozen(false)); // Unfreeze, but still need to call tick for it to take effect
- nodeAgent.tick();
- verify(nodeAgent, times(2)).converge();
-
- assertTrue(nodeAgent.setFrozen(false));
- clock.advance(Duration.ofSeconds(35)); // workToDoNow is no longer set, so we need to wait the regular time
- nodeAgent.tick();
- verify(nodeAgent, times(3)).converge();
- }
-
- @Test
public void start_container_subtask_failure_leads_to_container_restart() {
final NodeSpec node = nodeBuilder
.wantedDockerImage(dockerImage)
@@ -573,30 +553,30 @@ public class NodeAgentImplTest {
.wantedVespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = spy(makeNodeAgent(null, false));
- when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
when(dockerOperations.pullImageAsyncIfNeeded(eq(dockerImage))).thenReturn(false);
when(storageMaintainer.getDiskUsageFor(eq(context))).thenReturn(Optional.of(201326592000L));
doThrow(new DockerException("Failed to set up network")).doNothing().when(dockerOperations).startContainer(eq(context));
try {
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
fail("Expected to get DockerException");
} catch (DockerException ignored) { }
verify(dockerOperations, never()).removeContainer(eq(context), any());
- verify(dockerOperations, times(1)).createContainer(eq(context), eq(node), any());
+ verify(dockerOperations, times(1)).createContainer(eq(context), any());
verify(dockerOperations, times(1)).startContainer(eq(context));
verify(nodeAgent, never()).resumeNodeIfNeeded(any());
// The docker container was actually started and is running, but subsequent exec calls to set up
// networking failed
mockGetContainer(dockerImage, true);
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
verify(dockerOperations, times(1)).removeContainer(eq(context), any());
- verify(dockerOperations, times(2)).createContainer(eq(context), eq(node), any());
+ verify(dockerOperations, times(2)).createContainer(eq(context), any());
verify(dockerOperations, times(2)).startContainer(eq(context));
verify(nodeAgent, times(1)).resumeNodeIfNeeded(any());
}
@@ -631,6 +611,7 @@ public class NodeAgentImplTest {
.parentHostname("parent.host.name.yahoo.com")
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node));
@@ -638,12 +619,9 @@ public class NodeAgentImplTest {
when(dockerOperations.getContainerStats(eq(context)))
.thenReturn(Optional.of(stats1))
.thenReturn(Optional.of(stats2));
-
- nodeAgent.converge(); // Run the converge loop once to initialize lastNode
+
nodeAgent.updateContainerNodeMetrics(); // Update metrics once to init and lastCpuMetric
- clock.advance(Duration.ofSeconds(1234));
-
Path pathToExpectedMetrics = Paths.get(classLoader.getResource("expected.container.system.metrics.txt").getPath());
String expectedMetrics = new String(Files.readAllBytes(pathToExpectedMetrics))
.replaceAll("\\s", "")
@@ -674,13 +652,11 @@ public class NodeAgentImplTest {
.state(Node.State.ready)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
- when(nodeRepository.getOptionalNode(eq(hostName))).thenReturn(Optional.of(node));
when(dockerOperations.getContainerStats(eq(context))).thenReturn(Optional.empty());
- nodeAgent.converge(); // Run the converge loop once to initialize lastNode
-
nodeAgent.updateContainerNodeMetrics();
Set<Map<String, Object>> actualMetrics = metricReceiver.getAllMetricsRaw();
@@ -696,20 +672,21 @@ public class NodeAgentImplTest {
.wantedVespaVersion(vespaVersion)
.build();
+ NodeAgentContext context = createContext(node);
NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
when(dockerOperations.pullImageAsyncIfNeeded(eq(dockerImage))).thenReturn(false);
when(storageMaintainer.getDiskUsageFor(eq(context))).thenReturn(Optional.of(201326592000L));
- nodeAgent.converge();
+ nodeAgent.doConverge(context);
verify(dockerOperations, never()).removeContainer(eq(context), any());
verify(orchestrator, never()).suspend(any(String.class));
final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository, aclMaintainer);
inOrder.verify(dockerOperations, times(1)).pullImageAsyncIfNeeded(eq(dockerImage));
- inOrder.verify(dockerOperations, times(1)).createContainer(eq(context), eq(node), any());
+ inOrder.verify(dockerOperations, times(1)).createContainer(eq(context), any());
inOrder.verify(dockerOperations, times(1)).startContainer(eq(context));
inOrder.verify(aclMaintainer, times(1)).converge();
inOrder.verify(dockerOperations, times(1)).resumeNode(eq(context));
@@ -722,24 +699,33 @@ public class NodeAgentImplTest {
mockGetContainer(dockerImage, isRunning);
when(dockerOperations.getContainerStats(any())).thenReturn(Optional.of(emptyContainerStats));
- doNothing().when(storageMaintainer).writeMetricsConfig(any(), any());
+ doNothing().when(storageMaintainer).writeMetricsConfig(any());
- return new NodeAgentImpl(context, nodeRepository, orchestrator, dockerOperations,
- storageMaintainer, clock, NODE_AGENT_SCAN_INTERVAL, Optional.of(athenzCredentialsMaintainer), Optional.of(aclMaintainer),
+ return new NodeAgentImpl(contextSupplier, nodeRepository, orchestrator, dockerOperations,
+ storageMaintainer, Optional.of(athenzCredentialsMaintainer), Optional.of(aclMaintainer),
Optional.of(healthChecker));
}
private void mockGetContainer(DockerImage dockerImage, boolean isRunning) {
- Optional<Container> container = dockerImage != null ?
- Optional.of(new Container(
- hostName,
- dockerImage,
- ContainerResources.from(MIN_CPU_CORES, MIN_MAIN_MEMORY_AVAILABLE_GB),
- context.containerName(),
- isRunning ? Container.State.RUNNING : Container.State.EXITED,
- isRunning ? 1 : 0)) :
- Optional.empty();
-
- when(dockerOperations.getContainer(eq(context))).thenReturn(container);
+ doAnswer(invoc -> {
+ NodeAgentContext context = invoc.getArgument(0);
+ if (!hostName.equals(context.hostname().value()))
+ throw new IllegalArgumentException();
+ return dockerImage != null ?
+ Optional.of(new Container(
+ hostName,
+ dockerImage,
+ ContainerResources.from(MIN_CPU_CORES, MIN_MAIN_MEMORY_AVAILABLE_GB),
+ ContainerName.fromHostname(hostName),
+ isRunning ? Container.State.RUNNING : Container.State.EXITED,
+ isRunning ? 1 : 0)) :
+ Optional.empty();
+ }).when(dockerOperations).getContainer(any());
+ }
+
+ private NodeAgentContext createContext(NodeSpec nodeSpec) {
+ NodeAgentContext context = new NodeAgentContextImpl.Builder(nodeSpec).build();
+ when(contextSupplier.currentContext()).thenReturn(context);
+ return context;
}
}