diff options
author | Torbjørn Smørgrav <smorgrav@users.noreply.github.com> | 2017-05-10 08:36:07 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-10 08:36:07 +0200 |
commit | 5dbb7109c8674d14be0cedad9236a69d1f938c39 (patch) | |
tree | e1c09850b762133ea874d904b897bced492a1b20 | |
parent | 2df4fa8147d54782532637f4cca050c5f06b4bfe (diff) | |
parent | f7308b8cada5f53e13069a3104691bd59552c007 (diff) |
Merge pull request #2208 from yahoo/smorgrav/dockermetrics
Push node metrics to containers - take 1
12 files changed, 230 insertions, 68 deletions
diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java index b628bd77d91..d2cffda329f 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java @@ -72,13 +72,36 @@ public interface Docker { void deleteUnusedDockerImages(); /** - * Execute a command in docker container as "yahoo" user - * TODO: Make this function interruptible + * Execute a command in docker container as "yahoo". Will block until the command is finished. * - * @param args Program arguments. args[0] must be the program filename. - * @throws RuntimeException (or some subclass thereof) on failure, including docker failure, command failure + * @param containerName The name of the container + * @param command The command with arguments to run. + * + * @return exitcodes, stdout and stderr in the ProcessResult + */ + ProcessResult executeInContainer(ContainerName containerName, String... command); + + /** + * Execute a command in docker container as "root". Will block until the command is finished. + * + * @param containerName The name of the container + * @param command The command with arguments to run. + * + * @return exitcodes, stdout and stderr in the ProcessResult */ - ProcessResult executeInContainer(ContainerName containerName, String... args); + ProcessResult executeInContainerAsRoot(ContainerName containerName, String... command); + + /** + * Execute a command in docker container as "root" + * The timeout will not kill the process spawned. + * + * @param containerName The name of the container + * @param timeoutSeconds Timeout for the process to finish in seconds or without timeout if empty + * @param command The command with arguments to run. + * + * @return exitcodes, stdout and stderr in the ProcessResult + */ + ProcessResult executeInContainerAsRoot(ContainerName containerName, Long timeoutSeconds, String... command); + - ProcessResult executeInContainerAsRoot(ContainerName containerName, String... args); } diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerExecTimeoutException.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerExecTimeoutException.java new file mode 100644 index 00000000000..153fc0257a3 --- /dev/null +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerExecTimeoutException.java @@ -0,0 +1,16 @@ +package com.yahoo.vespa.hosted.dockerapi; + +/** + * Runtime exception to be thrown when the exec commands did not finish in time. + * + * The underlying process has not been killed. If you need the process to be + * killed you need to wrap it into a commands that times out. + * + * @author smorgrav + */ +@SuppressWarnings("serial") +public class DockerExecTimeoutException extends RuntimeException { + public DockerExecTimeoutException(String msg) { + super(msg); + } +} diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java index 6b1d63dd12f..e94a2d9d118 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java @@ -93,7 +93,7 @@ public class DockerImpl implements Docker { true, /* fallback to 1.23 on errors */ metricReceiver); - if (! config.isRunningLocally()) { + if (!config.isRunningLocally()) { Duration minAgeToDelete = Duration.ofMinutes(config.imageGCMinTimeToLiveMinutes()); dockerImageGC = Optional.of(new DockerImageGarbageCollector(minAgeToDelete)); @@ -121,7 +121,7 @@ public class DockerImpl implements Docker { } private void setupDockerNetworkIfNeeded() throws IOException { - if (! dockerClient.listNetworksCmd().withNameFilter(DOCKER_CUSTOM_MACVLAN_NETWORK_NAME).exec().isEmpty()) return; + if (!dockerClient.listNetworksCmd().withNameFilter(DOCKER_CUSTOM_MACVLAN_NETWORK_NAME).exec().isEmpty()) return; // Use IPv6 address if there is a mix of IP4 and IPv6 by taking the longest address. List<InetAddress> hostAddresses = Arrays.asList(InetAddress.getAllByName(com.yahoo.net.HostName.getLocalhost())); @@ -225,22 +225,26 @@ public class DockerImpl implements Docker { @Override public ProcessResult executeInContainer(ContainerName containerName, String... args) { - return executeInContainerAsUser(containerName, "yahoo", args); + return executeInContainerAsUser(containerName, "yahoo", Optional.empty(), args); } @Override public ProcessResult executeInContainerAsRoot(ContainerName containerName, String... args) { - return executeInContainerAsUser(containerName, "root", args); + return executeInContainerAsUser(containerName, "root", Optional.empty(), args); + } + + @Override + public ProcessResult executeInContainerAsRoot(ContainerName containerName, Long timeoutSeconds, String... args) { + return executeInContainerAsUser(containerName, "root", Optional.of(timeoutSeconds), args); } /** * Execute command in container as user, "user" can be "username", "username:group", "uid" or "uid:gid" */ - private ProcessResult executeInContainerAsUser(ContainerName containerName, String user, String... args) { - assert args.length >= 1; + private ProcessResult executeInContainerAsUser(ContainerName containerName, String user, Optional<Long> timeoutSeconds, String... command) { try { final ExecCreateCmdResponse response = dockerClient.execCreateCmd(containerName.asString()) - .withCmd(args) + .withCmd(command) .withAttachStdout(true) .withAttachStderr(true) .withUser(user) @@ -249,7 +253,16 @@ public class DockerImpl implements Docker { ByteArrayOutputStream output = new ByteArrayOutputStream(); ByteArrayOutputStream errors = new ByteArrayOutputStream(); ExecStartCmd execStartCmd = dockerClient.execStartCmd(response.getId()); - execStartCmd.exec(new ExecStartResultCallback(output, errors)).awaitCompletion(); + ExecStartResultCallback callback = execStartCmd.exec(new ExecStartResultCallback(output, errors)); + + if (timeoutSeconds.isPresent()) { + if (!callback.awaitCompletion(timeoutSeconds.get(), TimeUnit.SECONDS)) { + throw new DockerExecTimeoutException(String.format("Command '%s' did not finish within %s seconds.", command[0], timeoutSeconds)); + } + } else { + // Wait for completion no timeout + callback.awaitCompletion(); + } final InspectExecResponse state = dockerClient.inspectExecCmd(execStartCmd.getExecId()).exec(); assert !state.isRunning(); @@ -260,7 +273,7 @@ public class DockerImpl implements Docker { } catch (DockerException | InterruptedException e) { numberOfDockerDaemonFails.add(); throw new RuntimeException("Container '" + containerName.asString() - + "' failed to execute " + Arrays.toString(args), e); + + "' failed to execute " + Arrays.toString(command), e); } } @@ -413,7 +426,7 @@ public class DockerImpl implements Docker { @Override public void deleteUnusedDockerImages() { - if (! dockerImageGC.isPresent()) return; + if (!dockerImageGC.isPresent()) return; List<Image> images = listAllImages(); List<com.github.dockerjava.api.model.Container> containers = listAllContainers(); @@ -481,7 +494,7 @@ public class DockerImpl implements Docker { remoteApiVersion = RemoteApiVersion.parseConfig(DockerClientImpl.getInstance( buildDockerClientConfig(config).build()) .withDockerCmdExecFactory(dockerFactory).versionCmd().exec().getApiVersion()); - logger.info("Found version of remote docker API: "+ remoteApiVersion); + logger.info("Found version of remote docker API: " + remoteApiVersion); // From version 1.24 a field was removed which causes trouble with the current docker java code. // When this is fixed, we can remove this and do not specify version. if (remoteApiVersion.isGreaterOrEqual(RemoteApiVersion.VERSION_1_24)) { @@ -489,7 +502,7 @@ public class DockerImpl implements Docker { logger.info("Found version 1.24 or newer of remote API, using 1.23."); } } catch (Exception e) { - if (! fallbackTo123orErrors) { + if (!fallbackTo123orErrors) { throw e; } logger.log(LogLevel.ERROR, "Failed when trying to figure out remote API version of docker, using 1.23", e); diff --git a/docker-api/src/test/java/com/yahoo/vespa/hosted/dockerapi/DockerTest.java b/docker-api/src/test/java/com/yahoo/vespa/hosted/dockerapi/DockerTest.java index 2c28eac5e4f..f91fccb2cc7 100644 --- a/docker-api/src/test/java/com/yahoo/vespa/hosted/dockerapi/DockerTest.java +++ b/docker-api/src/test/java/com/yahoo/vespa/hosted/dockerapi/DockerTest.java @@ -115,6 +115,43 @@ public class DockerTest { assertThat(docker.getAllContainersManagedBy(MANAGER_NAME).isEmpty(), is(true)); } + /** + * Test the expected behavior for exec when it times out - it should throw an exception when it times out, + * and before the process completes. + * + * The test timeout value is set quite high to avoid noise if screwdriver is slow but lower than the process time. + */ + @Test(expected = DockerExecTimeoutException.class, timeout = 2000) + public void testContainerExecHounorsTimeout() throws IOException, InterruptedException, ExecutionException { + final ContainerName containerName = new ContainerName("docker-test-foo"); + final String containerHostname = "hostName1"; + + docker.createContainerCommand(dockerImage, containerName, containerHostname).withManagedBy(MANAGER_NAME).create(); + docker.startContainer(containerName); + docker.executeInContainerAsRoot(containerName, 1L, "sh", "-c", "sleep 5"); + } + + /** + * Test the expected behavior for exec that completes before specified timeout - it should return when the process finishes and not + * wait for the timeout. Some previous tests indicated that this was not behaving correctly. + * + * No timeout implies infinite timeout. + * + * The test timeout value is set quite high to avoid noise if screwdriver is slow + */ + @Test(timeout = 4000) + public void testContainerExecDoesNotBlockUntilTimeoutWhenCommandFinishesBeforeTimeout() throws IOException, InterruptedException, ExecutionException { + final ContainerName containerName = new ContainerName("docker-test-foo"); + final String containerHostname = "hostName1"; + + docker.createContainerCommand(dockerImage, containerName, containerHostname).withManagedBy(MANAGER_NAME).create(); + docker.startContainer(containerName); + docker.executeInContainerAsRoot(containerName, 2L, "sh", "-c", "echo hei"); + + // Also test that this is the behavoir when not specifying timeout + docker.executeInContainerAsRoot(containerName,"sh", "-c", "echo hei"); + } + @Test public void testDockerNetworking() throws InterruptedException, ExecutionException, IOException { String hostName1 = "docker10.test.yahoo.com"; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java index 80c8f148cbf..aa7285ec17c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java @@ -25,9 +25,11 @@ public interface DockerOperations { void scheduleDownloadOfImage(ContainerName containerName, ContainerNodeSpec nodeSpec, Runnable callback); - ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String[] command); + ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String... command); - void executeCommandInNetworkNamespace(ContainerName containerName, String[] command); + ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, Long timeoutSeconds, String... command); + + void executeCommandInNetworkNamespace(ContainerName containerName, String... command); void resumeNode(ContainerName containerName); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index 0ae807f7f04..fafbf3e2563 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -38,16 +38,17 @@ import static com.yahoo.vespa.defaults.Defaults.getDefaults; /** * Class that wraps the Docker class and have some tools related to running programs in docker. + * * @author dybis */ public class DockerOperationsImpl implements DockerOperations { public static final String NODE_PROGRAM = Defaults.getDefaults().underVespaHome("bin/vespa-nodectl"); private static final String[] GET_VESPA_VERSION_COMMAND = new String[]{NODE_PROGRAM, "vespa-version"}; - private static final String[] RESUME_NODE_COMMAND = new String[] {NODE_PROGRAM, "resume"}; - private static final String[] SUSPEND_NODE_COMMAND = new String[] {NODE_PROGRAM, "suspend"}; - private static final String[] RESTART_VESPA_ON_NODE_COMMAND = new String[] {NODE_PROGRAM, "restart-vespa"}; - private static final String[] STOP_NODE_COMMAND = new String[] {NODE_PROGRAM, "stop"}; + private static final String[] RESUME_NODE_COMMAND = new String[]{NODE_PROGRAM, "resume"}; + private static final String[] SUSPEND_NODE_COMMAND = new String[]{NODE_PROGRAM, "suspend"}; + private static final String[] RESTART_VESPA_ON_NODE_COMMAND = new String[]{NODE_PROGRAM, "restart-vespa"}; + private static final String[] STOP_NODE_COMMAND = new String[]{NODE_PROGRAM, "stop"}; private static final Pattern VESPA_VERSION_PATTERN = Pattern.compile("^(\\S*)$", Pattern.MULTILINE); @@ -55,6 +56,7 @@ public class DockerOperationsImpl implements DockerOperations { // Map of directories to mount and whether they should be writable by everyone private static final Map<String, Boolean> DIRECTORIES_TO_MOUNT = new HashMap<>(); + static { DIRECTORIES_TO_MOUNT.put("/etc/yamas-agent", true); DIRECTORIES_TO_MOUNT.put("/etc/filebeat", true); @@ -231,7 +233,7 @@ public class DockerOperationsImpl implements DockerOperations { * Try to suspend node. Suspending a node means the node should be taken offline, * such that maintenance can be done of the node (upgrading, rebooting, etc), * and such that we will start serving again as soon as possible afterwards. - * + * <p> * Any failures are logged and ignored. */ @Override @@ -244,7 +246,7 @@ public class DockerOperationsImpl implements DockerOperations { // It's bad to continue as-if nothing happened, but on the other hand if we do not proceed to // remove container, we will not be able to upgrade to fix any problems in the suspend logic! logger.warning("Failed trying to suspend container " + containerName.asString() + " with " - + Arrays.toString(SUSPEND_NODE_COMMAND), e); + + Arrays.toString(SUSPEND_NODE_COMMAND), e); } } @@ -274,10 +276,10 @@ public class DockerOperationsImpl implements DockerOperations { }); } - ProcessResult executeCommandInContainer(ContainerName containerName, String[] command) { + ProcessResult executeCommandInContainer(ContainerName containerName, String... command) { ProcessResult result = docker.executeInContainerAsRoot(containerName, command); - if (! result.isSuccess()) { + if (!result.isSuccess()) { throw new RuntimeException("Container " + containerName.asString() + ": command " + Arrays.toString(command) + " failed: " + result); } @@ -285,12 +287,17 @@ public class DockerOperationsImpl implements DockerOperations { } @Override - public ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String[] command) { + public ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, Long timeoutSeconds, String... command) { + return docker.executeInContainerAsRoot(containerName, timeoutSeconds, command); + } + + @Override + public ProcessResult executeCommandInContainerAsRoot(ContainerName containerName, String... command) { return docker.executeInContainerAsRoot(containerName, command); } @Override - public void executeCommandInNetworkNamespace(ContainerName containerName, String[] command) { + public void executeCommandInNetworkNamespace(ContainerName containerName, String... command) { final PrefixLogger logger = PrefixLogger.getNodeAgentLogger(DockerOperationsImpl.class, containerName); final Integer containerPid = docker.getContainer(containerName) .filter(container -> container.state.isRunning()) @@ -366,7 +373,7 @@ public class DockerOperationsImpl implements DockerOperations { if (resultCode != 0) { throw new RuntimeException("Command " + Joiner.on(' ').join(command) + " failed: " + output); } - } catch (IOException|InterruptedException e) { + } catch (IOException | InterruptedException e) { throw new RuntimeException(e); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java index 0b44f526670..385e823dc3a 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java @@ -22,11 +22,11 @@ import java.util.stream.Collectors; * The responsibility of this class is to configure ACLs for all running containers. The ACLs are fetched from the Node * repository. Based on those ACLs, iptables commands are created and then executed in each of the containers network * namespace. - * + * <p> * If an ACL cannot be configured (e.g. iptables process execution fails), a rollback is attempted by setting the * default policy to ACCEPT which will allow any traffic. The configuration will be retried the next time the * maintainer runs. - * + * <p> * The ACL maintainer does not handle IPv4 addresses and is thus only intended to configure ACLs for IPv6-only * containers (e.g. any container, except node-admin). * @@ -43,7 +43,7 @@ public class AclMaintainer implements Runnable { private final Map<ContainerName, Acl> containerAcls; public AclMaintainer(DockerOperations dockerOperations, NodeRepository nodeRepository, - String nodeAdminHostname) { + String nodeAdminHostname) { this.dockerOperations = dockerOperations; this.nodeRepository = nodeRepository; this.nodeAdminHostname = nodeAdminHostname; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 2ff10560fc1..b4bfaf5c9b0 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent; import com.yahoo.vespa.hosted.dockerapi.Container; import com.yahoo.vespa.hosted.dockerapi.ContainerName; import com.yahoo.vespa.hosted.dockerapi.Docker; +import com.yahoo.vespa.hosted.dockerapi.DockerExecTimeoutException; import com.yahoo.vespa.hosted.dockerapi.DockerImage; import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions; import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper; @@ -80,6 +81,7 @@ public class NodeAgentImpl implements NodeAgent { RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN, RUNNING } + private ContainerState containerState = ABSENT; // The attributes of the last successful node repo attribute update for this node. Used to avoid redundant calls. @@ -172,7 +174,7 @@ public class NodeAgentImpl implements NodeAgent { } loopThread = new Thread(() -> { - while (! terminated.get()) tick(); + while (!terminated.get()) tick(); }); loopThread.setName("tick-" + hostname); loopThread.start(); @@ -199,7 +201,7 @@ public class NodeAgentImpl implements NodeAgent { try { FilebeatConfigProvider filebeatConfigProvider = new FilebeatConfigProvider(environment); Optional<String> config = filebeatConfigProvider.getConfig(nodeSpec); - if (! config.isPresent()) { + if (!config.isPresent()) { logger.error("Was not able to generate a config for filebeat, ignoring filebeat file creation." + nodeSpec.toString()); return; } @@ -250,16 +252,16 @@ public class NodeAgentImpl implements NodeAgent { // TODO: We should only update if the new current values do not match the node repo's current values if (!currentAttributes.equals(lastAttributesSet)) { logger.info("Publishing new set of attributes to node repo: " - + lastAttributesSet + " -> " + currentAttributes); + + lastAttributesSet + " -> " + currentAttributes); addDebugMessage("Publishing new set of attributes to node repo: {" + - lastAttributesSet + "} -> {" + currentAttributes + "}"); + lastAttributesSet + "} -> {" + currentAttributes + "}"); nodeRepository.updateNodeAttributes(hostname, currentAttributes); lastAttributesSet = currentAttributes; } } private void startContainerIfNeeded(final ContainerNodeSpec nodeSpec) { - if (! getContainer().isPresent()) { + if (!getContainer().isPresent()) { aclMaintainer.ifPresent(AclMaintainer::run); dockerOperations.startContainer(containerName, nodeSpec); metricReceiver.unsetMetricsForContainer(hostname); @@ -268,7 +270,7 @@ public class NodeAgentImpl implements NodeAgent { configureContainerMetrics(nodeSpec); addDebugMessage("startContainerIfNeeded: containerState " + containerState + " -> " + - RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN); + RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN); containerState = RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN; logger.info("Container successfully started, new containerState is " + containerState); } @@ -279,13 +281,13 @@ public class NodeAgentImpl implements NodeAgent { shouldRestartServices(nodeSpec).ifPresent(restartReason -> { logger.info("Will restart services for container " + existingContainer + ": " + restartReason); restartServices(nodeSpec, existingContainer); - })); + })); } private Optional<String> shouldRestartServices(ContainerNodeSpec nodeSpec) { - if ( ! nodeSpec.wantedRestartGeneration.isPresent()) return Optional.empty(); + if (!nodeSpec.wantedRestartGeneration.isPresent()) return Optional.empty(); - if (! nodeSpec.currentRestartGeneration.isPresent() || + if (!nodeSpec.currentRestartGeneration.isPresent() || nodeSpec.currentRestartGeneration.get() < nodeSpec.wantedRestartGeneration.get()) { return Optional.of("Restart requested - wanted restart generation has been bumped: " + nodeSpec.currentRestartGeneration.get() + " -> " + nodeSpec.wantedRestartGeneration.get()); @@ -317,7 +319,7 @@ public class NodeAgentImpl implements NodeAgent { } if (nodeSpec.wantedDockerImage.isPresent() && !nodeSpec.wantedDockerImage.get().equals(existingContainer.image)) { return Optional.of("The node is supposed to run a new Docker image: " - + existingContainer + " -> " + nodeSpec.wantedDockerImage.get()); + + existingContainer + " -> " + nodeSpec.wantedDockerImage.get()); } if (!existingContainer.state.isRunning()) { return Optional.of("Container no longer running"); @@ -372,7 +374,7 @@ public class NodeAgentImpl implements NodeAgent { private void signalWorkToBeDone() { synchronized (monitor) { - if (! workToDoNow) { + if (!workToDoNow) { workToDoNow = true; addDebugMessage("Signaling work to be done"); monitor.notifyAll(); @@ -383,7 +385,7 @@ public class NodeAgentImpl implements NodeAgent { void tick() { boolean isFrozenCopy; synchronized (monitor) { - while (! workToDoNow) { + while (!workToDoNow) { long remainder = delaysBetweenEachConvergeMillis - Duration.between(lastConverge, clock.instant()).toMillis(); if (remainder > 0) { try { @@ -526,7 +528,7 @@ public class NodeAgentImpl implements NodeAgent { // The remaining metrics require container to exists and be running if (containerState == ABSENT) return; Optional<Docker.ContainerStats> containerStats = dockerOperations.getContainerStats(containerName); - if ( ! containerStats.isPresent()) return; + if (!containerStats.isPresent()) return; Docker.ContainerStats stats = containerStats.get(); @@ -570,6 +572,15 @@ public class NodeAgentImpl implements NodeAgent { metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_HOST_LIFE, dimensions, "uptime").sample(lastCpuMetric.getUptime()); metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_HOST_LIFE, dimensions, "alive").sample(1); + + // Push metrics to the metrics proxy in each container - give it maximum 1 seconds to complete + try { + //TODO The command here is almost a dummy command until we have the proper RPC method in place + // Remember proper argument encoding + dockerOperations.executeCommandInContainerAsRoot(containerName, 1L, "sh", "-c", "'echo " + metricReceiver.toString() + "'"); + } catch (DockerExecTimeoutException e) { + logger.warning("Unable to push metrics to container: " + containerName, e); + } } @SuppressWarnings("unchecked") @@ -578,7 +589,7 @@ public class NodeAgentImpl implements NodeAgent { if (metricsMap == null || !metricsMap.containsKey(metricName)) return; try { metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, yamasName) - .sample(((Number) metricsMap.get(metricName)).doubleValue()); + .sample(((Number) metricsMap.get(metricName)).doubleValue()); } catch (Throwable e) { logger.warning("Failed to update " + yamasName + " metric with value " + metricsMap.get(metricName), e); } @@ -607,7 +618,7 @@ public class NodeAgentImpl implements NodeAgent { } private void configureContainerMetrics(ContainerNodeSpec nodeSpec) { - if (! storageMaintainer.isPresent()) return; + if (!storageMaintainer.isPresent()) return; final Path yamasAgentFolder = environment.pathInNodeAdminFromPathInNode(containerName, "/etc/yamas-agent/"); Path vespaCheckPath = Paths.get(getDefaults().underVespaHome("libexec/yms/yms_check_vespa")); @@ -633,7 +644,7 @@ public class NodeAgentImpl implements NodeAgent { try { scheduleMaker.writeTo(yamasAgentFolder); - final String[] restartYamasAgent = new String[] {"service" , "yamas-agent", "restart"}; + final String[] restartYamasAgent = new String[]{"service", "yamas-agent", "restart"}; dockerOperations.executeCommandInContainerAsRoot(containerName, restartYamasAgent); } catch (IOException e) { throw new RuntimeException("Failed to write secret-agent schedules for " + containerName, e); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java index 39b479c8ce8..487d1845c62 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java @@ -83,7 +83,7 @@ public class ComponentsProviderImpl implements ComponentsProvider { public ComponentsProviderImpl(final NodeAdminConfig config, final Docker docker, final MetricReceiverWrapper metricReceiver) { this(docker, metricReceiver, new Environment(), config.isRunningLocally()); - if (! config.isRunningLocally()) { + if (!config.isRunningLocally()) { setCorePattern(docker); initializeNodeAgentSecretAgent(docker); } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImplTest.java index 8d867a16c05..2822926945b 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImplTest.java @@ -40,7 +40,7 @@ public class DockerOperationsImplTest { final ContainerName containerName = new ContainerName("container-name"); final ProcessResult actualResult = new ProcessResult(0, "output", "errors"); final String programPath = "/bin/command"; - final String[] command = new String[] {programPath, "arg"}; + final String[] command = new String[]{programPath, "arg"}; when(docker.executeInContainerAsRoot(any(), anyVararg())) .thenReturn(actualResult); // output from node program @@ -56,12 +56,12 @@ public class DockerOperationsImplTest { assertThat(result, is(actualResult)); } - @Test(expected=RuntimeException.class) + @Test(expected = RuntimeException.class) public void processResultFromNodeProgramWhenNonZeroExitCode() throws Exception { final ContainerName containerName = new ContainerName("container-name"); final ProcessResult actualResult = new ProcessResult(3, "output", "errors"); final String programPath = "/bin/command"; - final String[] command = new String[] {programPath, "arg"}; + final String[] command = new String[]{programPath, "arg"}; when(docker.executeInContainerAsRoot(any(), anyVararg())) .thenReturn(actualResult); // output from node program diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerMock.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerMock.java index bc3ce6ce5bb..67c2de924b3 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerMock.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerMock.java @@ -159,6 +159,14 @@ public class DockerMock implements Docker { return new ProcessResult(0, null, ""); } + @Override + public ProcessResult executeInContainerAsRoot(ContainerName containerName, Long timeoutSeconds, String... args) { + synchronized (monitor) { + callOrderVerifier.add("executeInContainerAsRoot with " + containerName + ", args: " + Arrays.toString(args)); + } + return new ProcessResult(0, null, ""); + } + public static class StartContainerCommandMock implements CreateContainerCommand { @Override @@ -212,10 +220,14 @@ public class DockerMock implements Docker { } @Override - public CreateContainerCommand withAddCapability(String capabilityName) {return this; } + public CreateContainerCommand withAddCapability(String capabilityName) { + return this; + } @Override - public CreateContainerCommand withDropCapability(String capabilityName) {return this; } + public CreateContainerCommand withDropCapability(String capabilityName) { + return this; + } @Override public void create() { diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainerTest.java index eea72619032..f3cc352a3b8 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainerTest.java @@ -16,7 +16,6 @@ import java.util.List; import java.util.stream.Collectors; import java.util.stream.IntStream; -import static org.mockito.AdditionalMatchers.aryEq; import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.doThrow; @@ -102,7 +101,9 @@ public class AclMaintainerTest { verify(dockerOperations).executeCommandInNetworkNamespace( eq(container.name), - aryEq(new String[]{"ip6tables", "-P", "INPUT", "ACCEPT"}) + eq("ip6tables"), + eq("-F"), + eq("INPUT") ); } @@ -114,40 +115,80 @@ public class AclMaintainerTest { VerificationMode verificationMode) { verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), - aryEq(new String[]{"ip6tables", "-F", "INPUT"}) + eq("ip6tables"), + eq("-F"), + eq("INPUT") ); verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), - aryEq(new String[]{"ip6tables", "-P", "INPUT", "DROP"}) + eq("ip6tables"), + eq("-P"), + eq("INPUT"), + eq("DROP") ); verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), - aryEq(new String[]{"ip6tables", "-P", "FORWARD", "DROP"}) + eq("ip6tables"), + eq("-P"), + eq("FORWARD"), + eq("DROP") ); verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), - aryEq(new String[]{"ip6tables", "-P", "OUTPUT", "ACCEPT"}) + eq("ip6tables"), + eq("-P"), + eq("OUTPUT"), + eq("ACCEPT") ); verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), - aryEq(new String[]{"ip6tables", "-A", "INPUT", "-m", "state", "--state", "RELATED,ESTABLISHED", "-j", - "ACCEPT"}) + eq("ip6tables"), + eq("-A"), + eq("INPUT"), + eq("-m"), + eq("state"), + eq("--state"), + eq("RELATED,ESTABLISHED"), + eq("-j"), + eq("ACCEPT") ); verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), - aryEq(new String[]{"ip6tables", "-A", "INPUT", "-i", "lo", "-j", "ACCEPT"}) + eq("ip6tables"), + eq("-A"), + eq("INPUT"), + eq("-i"), + eq("lo"), + eq("-j"), + eq("ACCEPT") ); verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), - aryEq(new String[]{"ip6tables", "-A", "INPUT", "-p", "ipv6-icmp", "-j", "ACCEPT"}) + eq("ip6tables"), + eq("-A"), + eq("INPUT"), + eq("-p"), + eq("ipv6-icmp"), + eq("-j"), + eq("ACCEPT") ); containerAclSpecs.forEach(aclSpec -> verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), - aryEq(new String[]{"ip6tables", "-A", "INPUT", "-s", aclSpec.ipAddress() + "/128", "-j", "ACCEPT"}) + eq("ip6tables"), + eq("-A"), + eq("INPUT"), + eq("-s"), + eq(aclSpec.ipAddress() + "/128"), + eq("-j"), + eq("ACCEPT") )); verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), - aryEq(new String[]{"ip6tables", "-A", "INPUT", "-j", "REJECT"}) + eq("ip6tables"), + eq("-A"), + eq("INPUT"), + eq("-j"), + eq("REJECT") ); } |