diff options
Diffstat (limited to 'node-admin/src/main/java')
5 files changed, 53 insertions, 33 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java index 80c8f148cbf..aa7285ec17c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java @@ -25,9 +25,11 @@ public interface DockerOperations { void scheduleDownloadOfImage(ContainerName containerName, ContainerNodeSpec nodeSpec, Runnable callback); - ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String[] command); + ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String... command); - void executeCommandInNetworkNamespace(ContainerName containerName, String[] command); + ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, Long timeoutSeconds, String... command); + + void executeCommandInNetworkNamespace(ContainerName containerName, String... command); void resumeNode(ContainerName containerName); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index 0ae807f7f04..fafbf3e2563 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -38,16 +38,17 @@ import static com.yahoo.vespa.defaults.Defaults.getDefaults; /** * Class that wraps the Docker class and have some tools related to running programs in docker. + * * @author dybis */ public class DockerOperationsImpl implements DockerOperations { public static final String NODE_PROGRAM = Defaults.getDefaults().underVespaHome("bin/vespa-nodectl"); private static final String[] GET_VESPA_VERSION_COMMAND = new String[]{NODE_PROGRAM, "vespa-version"}; - private static final String[] RESUME_NODE_COMMAND = new String[] {NODE_PROGRAM, "resume"}; - private static final String[] SUSPEND_NODE_COMMAND = new String[] {NODE_PROGRAM, "suspend"}; - private static final String[] RESTART_VESPA_ON_NODE_COMMAND = new String[] {NODE_PROGRAM, "restart-vespa"}; - private static final String[] STOP_NODE_COMMAND = new String[] {NODE_PROGRAM, "stop"}; + private static final String[] RESUME_NODE_COMMAND = new String[]{NODE_PROGRAM, "resume"}; + private static final String[] SUSPEND_NODE_COMMAND = new String[]{NODE_PROGRAM, "suspend"}; + private static final String[] RESTART_VESPA_ON_NODE_COMMAND = new String[]{NODE_PROGRAM, "restart-vespa"}; + private static final String[] STOP_NODE_COMMAND = new String[]{NODE_PROGRAM, "stop"}; private static final Pattern VESPA_VERSION_PATTERN = Pattern.compile("^(\\S*)$", Pattern.MULTILINE); @@ -55,6 +56,7 @@ public class DockerOperationsImpl implements DockerOperations { // Map of directories to mount and whether they should be writable by everyone private static final Map<String, Boolean> DIRECTORIES_TO_MOUNT = new HashMap<>(); + static { DIRECTORIES_TO_MOUNT.put("/etc/yamas-agent", true); DIRECTORIES_TO_MOUNT.put("/etc/filebeat", true); @@ -231,7 +233,7 @@ public class DockerOperationsImpl implements DockerOperations { * Try to suspend node. Suspending a node means the node should be taken offline, * such that maintenance can be done of the node (upgrading, rebooting, etc), * and such that we will start serving again as soon as possible afterwards. - * + * <p> * Any failures are logged and ignored. */ @Override @@ -244,7 +246,7 @@ public class DockerOperationsImpl implements DockerOperations { // It's bad to continue as-if nothing happened, but on the other hand if we do not proceed to // remove container, we will not be able to upgrade to fix any problems in the suspend logic! logger.warning("Failed trying to suspend container " + containerName.asString() + " with " - + Arrays.toString(SUSPEND_NODE_COMMAND), e); + + Arrays.toString(SUSPEND_NODE_COMMAND), e); } } @@ -274,10 +276,10 @@ public class DockerOperationsImpl implements DockerOperations { }); } - ProcessResult executeCommandInContainer(ContainerName containerName, String[] command) { + ProcessResult executeCommandInContainer(ContainerName containerName, String... command) { ProcessResult result = docker.executeInContainerAsRoot(containerName, command); - if (! result.isSuccess()) { + if (!result.isSuccess()) { throw new RuntimeException("Container " + containerName.asString() + ": command " + Arrays.toString(command) + " failed: " + result); } @@ -285,12 +287,17 @@ public class DockerOperationsImpl implements DockerOperations { } @Override - public ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String[] command) { + public ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, Long timeoutSeconds, String... command) { + return docker.executeInContainerAsRoot(containerName, timeoutSeconds, command); + } + + @Override + public ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String... command) { return docker.executeInContainerAsRoot(containerName, command); } @Override - public void executeCommandInNetworkNamespace(ContainerName containerName, String[] command) { + public void executeCommandInNetworkNamespace(ContainerName containerName, String... command) { final PrefixLogger logger = PrefixLogger.getNodeAgentLogger(DockerOperationsImpl.class, containerName); final Integer containerPid = docker.getContainer(containerName) .filter(container -> container.state.isRunning()) @@ -366,7 +373,7 @@ public class DockerOperationsImpl implements DockerOperations { if (resultCode != 0) { throw new RuntimeException("Command " + Joiner.on(' ').join(command) + " failed: " + output); } - } catch (IOException|InterruptedException e) { + } catch (IOException | InterruptedException e) { throw new RuntimeException(e); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java index 0b44f526670..385e823dc3a 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java @@ -22,11 +22,11 @@ import java.util.stream.Collectors; * The responsibility of this class is to configure ACLs for all running containers. The ACLs are fetched from the Node * repository. Based on those ACLs, iptables commands are created and then executed in each of the containers network * namespace. - * + * <p> * If an ACL cannot be configured (e.g. iptables process execution fails), a rollback is attempted by setting the * default policy to ACCEPT which will allow any traffic. The configuration will be retried the next time the * maintainer runs. - * + * <p> * The ACL maintainer does not handle IPv4 addresses and is thus only intended to configure ACLs for IPv6-only * containers (e.g. any container, except node-admin). * @@ -43,7 +43,7 @@ public class AclMaintainer implements Runnable { private final Map<ContainerName, Acl> containerAcls; public AclMaintainer(DockerOperations dockerOperations, NodeRepository nodeRepository, - String nodeAdminHostname) { + String nodeAdminHostname) { this.dockerOperations = dockerOperations; this.nodeRepository = nodeRepository; this.nodeAdminHostname = nodeAdminHostname; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 2ff10560fc1..b4bfaf5c9b0 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent; import com.yahoo.vespa.hosted.dockerapi.Container; import com.yahoo.vespa.hosted.dockerapi.ContainerName; import com.yahoo.vespa.hosted.dockerapi.Docker; +import com.yahoo.vespa.hosted.dockerapi.DockerExecTimeoutException; import com.yahoo.vespa.hosted.dockerapi.DockerImage; import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions; import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper; @@ -80,6 +81,7 @@ public class NodeAgentImpl implements NodeAgent { RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN, RUNNING } + private ContainerState containerState = ABSENT; // The attributes of the last successful node repo attribute update for this node. Used to avoid redundant calls. @@ -172,7 +174,7 @@ public class NodeAgentImpl implements NodeAgent { } loopThread = new Thread(() -> { - while (! terminated.get()) tick(); + while (!terminated.get()) tick(); }); loopThread.setName("tick-" + hostname); loopThread.start(); @@ -199,7 +201,7 @@ public class NodeAgentImpl implements NodeAgent { try { FilebeatConfigProvider filebeatConfigProvider = new FilebeatConfigProvider(environment); Optional<String> config = filebeatConfigProvider.getConfig(nodeSpec); - if (! config.isPresent()) { + if (!config.isPresent()) { logger.error("Was not able to generate a config for filebeat, ignoring filebeat file creation." + nodeSpec.toString()); return; } @@ -250,16 +252,16 @@ public class NodeAgentImpl implements NodeAgent { // TODO: We should only update if the new current values do not match the node repo's current values if (!currentAttributes.equals(lastAttributesSet)) { logger.info("Publishing new set of attributes to node repo: " - + lastAttributesSet + " -> " + currentAttributes); + + lastAttributesSet + " -> " + currentAttributes); addDebugMessage("Publishing new set of attributes to node repo: {" + - lastAttributesSet + "} -> {" + currentAttributes + "}"); + lastAttributesSet + "} -> {" + currentAttributes + "}"); nodeRepository.updateNodeAttributes(hostname, currentAttributes); lastAttributesSet = currentAttributes; } } private void startContainerIfNeeded(final ContainerNodeSpec nodeSpec) { - if (! getContainer().isPresent()) { + if (!getContainer().isPresent()) { aclMaintainer.ifPresent(AclMaintainer::run); dockerOperations.startContainer(containerName, nodeSpec); metricReceiver.unsetMetricsForContainer(hostname); @@ -268,7 +270,7 @@ public class NodeAgentImpl implements NodeAgent { configureContainerMetrics(nodeSpec); addDebugMessage("startContainerIfNeeded: containerState " + containerState + " -> " + - RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN); + RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN); containerState = RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN; logger.info("Container successfully started, new containerState is " + containerState); } @@ -279,13 +281,13 @@ public class NodeAgentImpl implements NodeAgent { shouldRestartServices(nodeSpec).ifPresent(restartReason -> { logger.info("Will restart services for container " + existingContainer + ": " + restartReason); restartServices(nodeSpec, existingContainer); - })); + })); } private Optional<String> shouldRestartServices(ContainerNodeSpec nodeSpec) { - if ( ! nodeSpec.wantedRestartGeneration.isPresent()) return Optional.empty(); + if (!nodeSpec.wantedRestartGeneration.isPresent()) return Optional.empty(); - if (! nodeSpec.currentRestartGeneration.isPresent() || + if (!nodeSpec.currentRestartGeneration.isPresent() || nodeSpec.currentRestartGeneration.get() < nodeSpec.wantedRestartGeneration.get()) { return Optional.of("Restart requested - wanted restart generation has been bumped: " + nodeSpec.currentRestartGeneration.get() + " -> " + nodeSpec.wantedRestartGeneration.get()); @@ -317,7 +319,7 @@ public class NodeAgentImpl implements NodeAgent { } if (nodeSpec.wantedDockerImage.isPresent() && !nodeSpec.wantedDockerImage.get().equals(existingContainer.image)) { return Optional.of("The node is supposed to run a new Docker image: " - + existingContainer + " -> " + nodeSpec.wantedDockerImage.get()); + + existingContainer + " -> " + nodeSpec.wantedDockerImage.get()); } if (!existingContainer.state.isRunning()) { return Optional.of("Container no longer running"); @@ -372,7 +374,7 @@ public class NodeAgentImpl implements NodeAgent { private void signalWorkToBeDone() { synchronized (monitor) { - if (! workToDoNow) { + if (!workToDoNow) { workToDoNow = true; addDebugMessage("Signaling work to be done"); monitor.notifyAll(); @@ -383,7 +385,7 @@ public class NodeAgentImpl implements NodeAgent { void tick() { boolean isFrozenCopy; synchronized (monitor) { - while (! workToDoNow) { + while (!workToDoNow) { long remainder = delaysBetweenEachConvergeMillis - Duration.between(lastConverge, clock.instant()).toMillis(); if (remainder > 0) { try { @@ -526,7 +528,7 @@ public class NodeAgentImpl implements NodeAgent { // The remaining metrics require container to exists and be running if (containerState == ABSENT) return; Optional<Docker.ContainerStats> containerStats = dockerOperations.getContainerStats(containerName); - if ( ! containerStats.isPresent()) return; + if (!containerStats.isPresent()) return; Docker.ContainerStats stats = containerStats.get(); @@ -570,6 +572,15 @@ public class NodeAgentImpl implements NodeAgent { metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_HOST_LIFE, dimensions, "uptime").sample(lastCpuMetric.getUptime()); metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_HOST_LIFE, dimensions, "alive").sample(1); + + // Push metrics to the metrics proxy in each container - give it maximum 1 seconds to complete + try { + //TODO The command here is almost a dummy command until we have the proper RPC method in place + // Remember proper argument encoding + dockerOperations.executeCommandInContainerAsRoot(containerName, 1L, "sh", "-c", "'echo " + metricReceiver.toString() + "'"); + } catch (DockerExecTimeoutException e) { + logger.warning("Unable to push metrics to container: " + containerName, e); + } } @SuppressWarnings("unchecked") @@ -578,7 +589,7 @@ public class NodeAgentImpl implements NodeAgent { if (metricsMap == null || !metricsMap.containsKey(metricName)) return; try { metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, yamasName) - .sample(((Number) metricsMap.get(metricName)).doubleValue()); + .sample(((Number) metricsMap.get(metricName)).doubleValue()); } catch (Throwable e) { logger.warning("Failed to update " + yamasName + " metric with value " + metricsMap.get(metricName), e); } @@ -607,7 +618,7 @@ public class NodeAgentImpl implements NodeAgent { } private void configureContainerMetrics(ContainerNodeSpec nodeSpec) { - if (! storageMaintainer.isPresent()) return; + if (!storageMaintainer.isPresent()) return; final Path yamasAgentFolder = environment.pathInNodeAdminFromPathInNode(containerName, "/etc/yamas-agent/"); Path vespaCheckPath = Paths.get(getDefaults().underVespaHome("libexec/yms/yms_check_vespa")); @@ -633,7 +644,7 @@ public class NodeAgentImpl implements NodeAgent { try { scheduleMaker.writeTo(yamasAgentFolder); - final String[] restartYamasAgent = new String[] {"service" , "yamas-agent", "restart"}; + final String[] restartYamasAgent = new String[]{"service", "yamas-agent", "restart"}; dockerOperations.executeCommandInContainerAsRoot(containerName, restartYamasAgent); } catch (IOException e) { throw new RuntimeException("Failed to write secret-agent schedules for " + containerName, e); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java index 39b479c8ce8..487d1845c62 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java @@ -83,7 +83,7 @@ public class ComponentsProviderImpl implements ComponentsProvider { public ComponentsProviderImpl(final NodeAdminConfig config, final Docker docker, final MetricReceiverWrapper metricReceiver) { this(docker, metricReceiver, new Environment(), config.isRunningLocally()); - if (! config.isRunningLocally()) { + if (!config.isRunningLocally()) { setCorePattern(docker); initializeNodeAgentSecretAgent(docker); } |