summaryrefslogtreecommitdiffstats
path: root/node-admin/src/main/java
diff options
context:
space:
mode:
Diffstat (limited to 'node-admin/src/main/java')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java29
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java43
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java2
5 files changed, 53 insertions, 33 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
index 80c8f148cbf..aa7285ec17c 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
@@ -25,9 +25,11 @@ public interface DockerOperations {
void scheduleDownloadOfImage(ContainerName containerName, ContainerNodeSpec nodeSpec, Runnable callback);
- ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String[] command);
+ ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String... command);
- void executeCommandInNetworkNamespace(ContainerName containerName, String[] command);
+ ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, Long timeoutSeconds, String... command);
+
+ void executeCommandInNetworkNamespace(ContainerName containerName, String... command);
void resumeNode(ContainerName containerName);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
index 0ae807f7f04..fafbf3e2563 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
@@ -38,16 +38,17 @@ import static com.yahoo.vespa.defaults.Defaults.getDefaults;
/**
* Class that wraps the Docker class and have some tools related to running programs in docker.
+ *
* @author dybis
*/
public class DockerOperationsImpl implements DockerOperations {
public static final String NODE_PROGRAM = Defaults.getDefaults().underVespaHome("bin/vespa-nodectl");
private static final String[] GET_VESPA_VERSION_COMMAND = new String[]{NODE_PROGRAM, "vespa-version"};
- private static final String[] RESUME_NODE_COMMAND = new String[] {NODE_PROGRAM, "resume"};
- private static final String[] SUSPEND_NODE_COMMAND = new String[] {NODE_PROGRAM, "suspend"};
- private static final String[] RESTART_VESPA_ON_NODE_COMMAND = new String[] {NODE_PROGRAM, "restart-vespa"};
- private static final String[] STOP_NODE_COMMAND = new String[] {NODE_PROGRAM, "stop"};
+ private static final String[] RESUME_NODE_COMMAND = new String[]{NODE_PROGRAM, "resume"};
+ private static final String[] SUSPEND_NODE_COMMAND = new String[]{NODE_PROGRAM, "suspend"};
+ private static final String[] RESTART_VESPA_ON_NODE_COMMAND = new String[]{NODE_PROGRAM, "restart-vespa"};
+ private static final String[] STOP_NODE_COMMAND = new String[]{NODE_PROGRAM, "stop"};
private static final Pattern VESPA_VERSION_PATTERN = Pattern.compile("^(\\S*)$", Pattern.MULTILINE);
@@ -55,6 +56,7 @@ public class DockerOperationsImpl implements DockerOperations {
// Map of directories to mount and whether they should be writable by everyone
private static final Map<String, Boolean> DIRECTORIES_TO_MOUNT = new HashMap<>();
+
static {
DIRECTORIES_TO_MOUNT.put("/etc/yamas-agent", true);
DIRECTORIES_TO_MOUNT.put("/etc/filebeat", true);
@@ -231,7 +233,7 @@ public class DockerOperationsImpl implements DockerOperations {
* Try to suspend node. Suspending a node means the node should be taken offline,
* such that maintenance can be done of the node (upgrading, rebooting, etc),
* and such that we will start serving again as soon as possible afterwards.
- *
+ * <p>
* Any failures are logged and ignored.
*/
@Override
@@ -244,7 +246,7 @@ public class DockerOperationsImpl implements DockerOperations {
// It's bad to continue as-if nothing happened, but on the other hand if we do not proceed to
// remove container, we will not be able to upgrade to fix any problems in the suspend logic!
logger.warning("Failed trying to suspend container " + containerName.asString() + " with "
- + Arrays.toString(SUSPEND_NODE_COMMAND), e);
+ + Arrays.toString(SUSPEND_NODE_COMMAND), e);
}
}
@@ -274,10 +276,10 @@ public class DockerOperationsImpl implements DockerOperations {
});
}
- ProcessResult executeCommandInContainer(ContainerName containerName, String[] command) {
+ ProcessResult executeCommandInContainer(ContainerName containerName, String... command) {
ProcessResult result = docker.executeInContainerAsRoot(containerName, command);
- if (! result.isSuccess()) {
+ if (!result.isSuccess()) {
throw new RuntimeException("Container " + containerName.asString() +
": command " + Arrays.toString(command) + " failed: " + result);
}
@@ -285,12 +287,17 @@ public class DockerOperationsImpl implements DockerOperations {
}
@Override
- public ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String[] command) {
+ public ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, Long timeoutSeconds, String... command) {
+ return docker.executeInContainerAsRoot(containerName, timeoutSeconds, command);
+ }
+
+ @Override
+ public ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String... command) {
return docker.executeInContainerAsRoot(containerName, command);
}
@Override
- public void executeCommandInNetworkNamespace(ContainerName containerName, String[] command) {
+ public void executeCommandInNetworkNamespace(ContainerName containerName, String... command) {
final PrefixLogger logger = PrefixLogger.getNodeAgentLogger(DockerOperationsImpl.class, containerName);
final Integer containerPid = docker.getContainer(containerName)
.filter(container -> container.state.isRunning())
@@ -366,7 +373,7 @@ public class DockerOperationsImpl implements DockerOperations {
if (resultCode != 0) {
throw new RuntimeException("Command " + Joiner.on(' ').join(command) + " failed: " + output);
}
- } catch (IOException|InterruptedException e) {
+ } catch (IOException | InterruptedException e) {
throw new RuntimeException(e);
}
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java
index 0b44f526670..385e823dc3a 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java
@@ -22,11 +22,11 @@ import java.util.stream.Collectors;
* The responsibility of this class is to configure ACLs for all running containers. The ACLs are fetched from the Node
* repository. Based on those ACLs, iptables commands are created and then executed in each of the containers network
* namespace.
- *
+ * <p>
* If an ACL cannot be configured (e.g. iptables process execution fails), a rollback is attempted by setting the
* default policy to ACCEPT which will allow any traffic. The configuration will be retried the next time the
* maintainer runs.
- *
+ * <p>
* The ACL maintainer does not handle IPv4 addresses and is thus only intended to configure ACLs for IPv6-only
* containers (e.g. any container, except node-admin).
*
@@ -43,7 +43,7 @@ public class AclMaintainer implements Runnable {
private final Map<ContainerName, Acl> containerAcls;
public AclMaintainer(DockerOperations dockerOperations, NodeRepository nodeRepository,
- String nodeAdminHostname) {
+ String nodeAdminHostname) {
this.dockerOperations = dockerOperations;
this.nodeRepository = nodeRepository;
this.nodeAdminHostname = nodeAdminHostname;
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 2ff10560fc1..b4bfaf5c9b0 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent;
import com.yahoo.vespa.hosted.dockerapi.Container;
import com.yahoo.vespa.hosted.dockerapi.ContainerName;
import com.yahoo.vespa.hosted.dockerapi.Docker;
+import com.yahoo.vespa.hosted.dockerapi.DockerExecTimeoutException;
import com.yahoo.vespa.hosted.dockerapi.DockerImage;
import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions;
import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
@@ -80,6 +81,7 @@ public class NodeAgentImpl implements NodeAgent {
RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN,
RUNNING
}
+
private ContainerState containerState = ABSENT;
// The attributes of the last successful node repo attribute update for this node. Used to avoid redundant calls.
@@ -172,7 +174,7 @@ public class NodeAgentImpl implements NodeAgent {
}
loopThread = new Thread(() -> {
- while (! terminated.get()) tick();
+ while (!terminated.get()) tick();
});
loopThread.setName("tick-" + hostname);
loopThread.start();
@@ -199,7 +201,7 @@ public class NodeAgentImpl implements NodeAgent {
try {
FilebeatConfigProvider filebeatConfigProvider = new FilebeatConfigProvider(environment);
Optional<String> config = filebeatConfigProvider.getConfig(nodeSpec);
- if (! config.isPresent()) {
+ if (!config.isPresent()) {
logger.error("Was not able to generate a config for filebeat, ignoring filebeat file creation." + nodeSpec.toString());
return;
}
@@ -250,16 +252,16 @@ public class NodeAgentImpl implements NodeAgent {
// TODO: We should only update if the new current values do not match the node repo's current values
if (!currentAttributes.equals(lastAttributesSet)) {
logger.info("Publishing new set of attributes to node repo: "
- + lastAttributesSet + " -> " + currentAttributes);
+ + lastAttributesSet + " -> " + currentAttributes);
addDebugMessage("Publishing new set of attributes to node repo: {" +
- lastAttributesSet + "} -> {" + currentAttributes + "}");
+ lastAttributesSet + "} -> {" + currentAttributes + "}");
nodeRepository.updateNodeAttributes(hostname, currentAttributes);
lastAttributesSet = currentAttributes;
}
}
private void startContainerIfNeeded(final ContainerNodeSpec nodeSpec) {
- if (! getContainer().isPresent()) {
+ if (!getContainer().isPresent()) {
aclMaintainer.ifPresent(AclMaintainer::run);
dockerOperations.startContainer(containerName, nodeSpec);
metricReceiver.unsetMetricsForContainer(hostname);
@@ -268,7 +270,7 @@ public class NodeAgentImpl implements NodeAgent {
configureContainerMetrics(nodeSpec);
addDebugMessage("startContainerIfNeeded: containerState " + containerState + " -> " +
- RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN);
+ RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN);
containerState = RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN;
logger.info("Container successfully started, new containerState is " + containerState);
}
@@ -279,13 +281,13 @@ public class NodeAgentImpl implements NodeAgent {
shouldRestartServices(nodeSpec).ifPresent(restartReason -> {
logger.info("Will restart services for container " + existingContainer + ": " + restartReason);
restartServices(nodeSpec, existingContainer);
- }));
+ }));
}
private Optional<String> shouldRestartServices(ContainerNodeSpec nodeSpec) {
- if ( ! nodeSpec.wantedRestartGeneration.isPresent()) return Optional.empty();
+ if (!nodeSpec.wantedRestartGeneration.isPresent()) return Optional.empty();
- if (! nodeSpec.currentRestartGeneration.isPresent() ||
+ if (!nodeSpec.currentRestartGeneration.isPresent() ||
nodeSpec.currentRestartGeneration.get() < nodeSpec.wantedRestartGeneration.get()) {
return Optional.of("Restart requested - wanted restart generation has been bumped: "
+ nodeSpec.currentRestartGeneration.get() + " -> " + nodeSpec.wantedRestartGeneration.get());
@@ -317,7 +319,7 @@ public class NodeAgentImpl implements NodeAgent {
}
if (nodeSpec.wantedDockerImage.isPresent() && !nodeSpec.wantedDockerImage.get().equals(existingContainer.image)) {
return Optional.of("The node is supposed to run a new Docker image: "
- + existingContainer + " -> " + nodeSpec.wantedDockerImage.get());
+ + existingContainer + " -> " + nodeSpec.wantedDockerImage.get());
}
if (!existingContainer.state.isRunning()) {
return Optional.of("Container no longer running");
@@ -372,7 +374,7 @@ public class NodeAgentImpl implements NodeAgent {
private void signalWorkToBeDone() {
synchronized (monitor) {
- if (! workToDoNow) {
+ if (!workToDoNow) {
workToDoNow = true;
addDebugMessage("Signaling work to be done");
monitor.notifyAll();
@@ -383,7 +385,7 @@ public class NodeAgentImpl implements NodeAgent {
void tick() {
boolean isFrozenCopy;
synchronized (monitor) {
- while (! workToDoNow) {
+ while (!workToDoNow) {
long remainder = delaysBetweenEachConvergeMillis - Duration.between(lastConverge, clock.instant()).toMillis();
if (remainder > 0) {
try {
@@ -526,7 +528,7 @@ public class NodeAgentImpl implements NodeAgent {
// The remaining metrics require container to exists and be running
if (containerState == ABSENT) return;
Optional<Docker.ContainerStats> containerStats = dockerOperations.getContainerStats(containerName);
- if ( ! containerStats.isPresent()) return;
+ if (!containerStats.isPresent()) return;
Docker.ContainerStats stats = containerStats.get();
@@ -570,6 +572,15 @@ public class NodeAgentImpl implements NodeAgent {
metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_HOST_LIFE, dimensions, "uptime").sample(lastCpuMetric.getUptime());
metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_HOST_LIFE, dimensions, "alive").sample(1);
+
+ // Push metrics to the metrics proxy in each container - give it maximum 1 seconds to complete
+ try {
+ //TODO The command here is almost a dummy command until we have the proper RPC method in place
+ // Remember proper argument encoding
+ dockerOperations.executeCommandInContainerAsRoot(containerName, 1L, "sh", "-c", "'echo " + metricReceiver.toString() + "'");
+ } catch (DockerExecTimeoutException e) {
+ logger.warning("Unable to push metrics to container: " + containerName, e);
+ }
}
@SuppressWarnings("unchecked")
@@ -578,7 +589,7 @@ public class NodeAgentImpl implements NodeAgent {
if (metricsMap == null || !metricsMap.containsKey(metricName)) return;
try {
metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, yamasName)
- .sample(((Number) metricsMap.get(metricName)).doubleValue());
+ .sample(((Number) metricsMap.get(metricName)).doubleValue());
} catch (Throwable e) {
logger.warning("Failed to update " + yamasName + " metric with value " + metricsMap.get(metricName), e);
}
@@ -607,7 +618,7 @@ public class NodeAgentImpl implements NodeAgent {
}
private void configureContainerMetrics(ContainerNodeSpec nodeSpec) {
- if (! storageMaintainer.isPresent()) return;
+ if (!storageMaintainer.isPresent()) return;
final Path yamasAgentFolder = environment.pathInNodeAdminFromPathInNode(containerName, "/etc/yamas-agent/");
Path vespaCheckPath = Paths.get(getDefaults().underVespaHome("libexec/yms/yms_check_vespa"));
@@ -633,7 +644,7 @@ public class NodeAgentImpl implements NodeAgent {
try {
scheduleMaker.writeTo(yamasAgentFolder);
- final String[] restartYamasAgent = new String[] {"service" , "yamas-agent", "restart"};
+ final String[] restartYamasAgent = new String[]{"service", "yamas-agent", "restart"};
dockerOperations.executeCommandInContainerAsRoot(containerName, restartYamasAgent);
} catch (IOException e) {
throw new RuntimeException("Failed to write secret-agent schedules for " + containerName, e);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java
index 39b479c8ce8..487d1845c62 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java
@@ -83,7 +83,7 @@ public class ComponentsProviderImpl implements ComponentsProvider {
public ComponentsProviderImpl(final NodeAdminConfig config, final Docker docker, final MetricReceiverWrapper metricReceiver) {
this(docker, metricReceiver, new Environment(), config.isRunningLocally());
- if (! config.isRunningLocally()) {
+ if (!config.isRunningLocally()) {
setCorePattern(docker);
initializeNodeAgentSecretAgent(docker);
}