diff options
Diffstat (limited to 'node-admin/src/main/java')
14 files changed, 245 insertions, 212 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index f6fd8c3bd18..a4118ebe9ff 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -22,6 +22,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.regex.Pattern; import java.util.stream.Stream; import static com.yahoo.vespa.defaults.Defaults.getDefaults; @@ -39,6 +40,8 @@ public class DockerOperationsImpl implements DockerOperations { private static final String[] RESTART_VESPA_ON_NODE_COMMAND = new String[]{NODE_PROGRAM, "restart-vespa"}; private static final String[] STOP_NODE_COMMAND = new String[]{NODE_PROGRAM, "stop"}; + private static final Pattern VESPA_VERSION_PATTERN = Pattern.compile("^(\\S*)$", Pattern.MULTILINE); + private static final String MANAGER_NAME = "node-admin"; // Map of directories to mount and whether they should be writable by everyone diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java index cf70963eee1..e02f81e8f30 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java @@ -62,5 +62,5 @@ public interface NodeAdmin { /** * Stop the NodeAgent. Will not delete the storage or stop the container. */ - void stop(); + void shutdown(); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index f227a166034..e39abb47788 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -55,6 +55,8 @@ public class NodeAdminImpl implements NodeAdmin { private final Map<ContainerName, NodeAgent> nodeAgents = new ConcurrentHashMap<>(); + private final int nodeAgentScanIntervalMillis; + private final GaugeWrapper numberOfContainersInLoadImageState; private final CounterWrapper numberOfUnhandledExceptionsInNodeAgent; @@ -62,11 +64,13 @@ public class NodeAdminImpl implements NodeAdmin { final Function<String, NodeAgent> nodeAgentFactory, final StorageMaintainer storageMaintainer, final AclMaintainer aclMaintainer, + final int nodeAgentScanIntervalMillis, final MetricReceiverWrapper metricReceiver, final Clock clock) { this.dockerOperations = dockerOperations; this.nodeAgentFactory = nodeAgentFactory; this.storageMaintainer = storageMaintainer; + this.nodeAgentScanIntervalMillis = nodeAgentScanIntervalMillis; this.clock = clock; this.previousWantFrozen = true; @@ -179,21 +183,24 @@ public class NodeAdminImpl implements NodeAdmin { } @Override - public void stop() { + public void shutdown() { metricsScheduler.shutdown(); aclScheduler.shutdown(); - - // Stop all node-agents in parallel, will block until the last NodeAgent is stopped - nodeAgents.values().parallelStream().forEach(NodeAgent::stop); - - do { - try { - metricsScheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); - aclScheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); - } catch (InterruptedException e) { - logger.info("Was interrupted while waiting for metricsScheduler and aclScheduler to shutdown"); + try { + boolean metricsSchedulerShutdown = metricsScheduler.awaitTermination(30, TimeUnit.SECONDS); + boolean aclSchedulerShutdown = aclScheduler.awaitTermination(30, TimeUnit.SECONDS); + if (! (metricsSchedulerShutdown && aclSchedulerShutdown)) { + throw new RuntimeException("Failed shutting down all scheduler(s), shutdown status:\n" + + "\tMetrics Scheduler: " + metricsSchedulerShutdown + "\n" + + "\tACL Scheduler: " + aclSchedulerShutdown); } - } while (!metricsScheduler.isTerminated() || !aclScheduler.isTerminated()); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + + for (NodeAgent nodeAgent : nodeAgents.values()) { + nodeAgent.stop(); + } } // Set-difference. Returns minuend minus subtrahend. @@ -250,7 +257,7 @@ public class NodeAdminImpl implements NodeAdmin { } final NodeAgent agent = nodeAgentFactory.apply(hostname); - agent.start(); + agent.start(nodeAgentScanIntervalMillis); nodeAgents.put(containerName, agent); try { Thread.sleep(1000); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java index d1f23b13e6c..a848dae9388 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.nodeadmin; +import com.yahoo.component.AbstractComponent; import com.yahoo.concurrent.ThreadFactoryFactory; import com.yahoo.log.LogLevel; import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec; @@ -35,7 +36,7 @@ import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater. * * @author dybis, stiankri */ -public class NodeAdminStateUpdater { +public class NodeAdminStateUpdater extends AbstractComponent { static final Duration FREEZE_CONVERGENCE_TIMEOUT = Duration.ofMinutes(5); private final AtomicBoolean terminated = new AtomicBoolean(false); @@ -51,31 +52,35 @@ public class NodeAdminStateUpdater { private Thread loopThread; private final NodeRepository nodeRepository; - private final Orchestrator orchestrator; - private final StorageMaintainer storageMaintainer; private final NodeAdmin nodeAdmin; private final Clock clock; + private final Orchestrator orchestrator; private final String dockerHostHostName; - private final Duration nodeAdminConvergeStateInterval; + private long delaysBetweenEachTickMillis = 30_000; private Instant lastTick; public NodeAdminStateUpdater( - NodeRepository nodeRepository, - Orchestrator orchestrator, + final NodeRepository nodeRepository, + final NodeAdmin nodeAdmin, StorageMaintainer storageMaintainer, - NodeAdmin nodeAdmin, - String dockerHostHostName, Clock clock, - Duration nodeAdminConvergeStateInterval) { + Orchestrator orchestrator, + String dockerHostHostName) { + log.log(LogLevel.INFO, objectToString() + ": Creating object"); this.nodeRepository = nodeRepository; - this.orchestrator = orchestrator; - this.storageMaintainer = storageMaintainer; this.nodeAdmin = nodeAdmin; - this.dockerHostHostName = dockerHostHostName; this.clock = clock; - this.nodeAdminConvergeStateInterval = nodeAdminConvergeStateInterval; + this.orchestrator = orchestrator; + this.dockerHostHostName = dockerHostHostName; this.lastTick = clock.instant(); + + specVerifierScheduler.scheduleWithFixedDelay(() -> + updateHardwareDivergence(storageMaintainer), 5, 60, TimeUnit.MINUTES); + } + + private String objectToString() { + return this.getClass().getSimpleName() + "@" + Integer.toString(System.identityHashCode(this)); } public enum State { RESUMED, SUSPENDED_NODE_ADMIN, SUSPENDED} @@ -128,8 +133,7 @@ public class NodeAdminStateUpdater { State wantedStateCopy; synchronized (monitor) { while (! workToDoNow) { - Duration timeSinceLastConverge = Duration.between(lastTick, clock.instant()); - long remainder = nodeAdminConvergeStateInterval.minus(timeSinceLastConverge).toMillis(); + long remainder = delaysBetweenEachTickMillis - Duration.between(lastTick, clock.instant()).toMillis(); if (remainder > 0) { try { monitor.wait(remainder); @@ -227,7 +231,7 @@ public class NodeAdminStateUpdater { } final List<ContainerNodeSpec> containersToRun; try { - containersToRun = nodeRepository.getContainersToRun(dockerHostHostName); + containersToRun = nodeRepository.getContainersToRun(); } catch (Exception e) { log.log(LogLevel.WARNING, "Failed fetching container info from node repository", e); return; @@ -246,7 +250,7 @@ public class NodeAdminStateUpdater { private List<String> getNodesInActiveState() { try { - return nodeRepository.getContainersToRun(dockerHostHostName) + return nodeRepository.getContainersToRun() .stream() .filter(nodespec -> nodespec.nodeState == Node.State.active) .map(nodespec -> nodespec.hostname) @@ -256,7 +260,8 @@ public class NodeAdminStateUpdater { } } - public void start() { + public void start(long stateConvergeInterval) { + delaysBetweenEachTickMillis = stateConvergeInterval; if (loopThread != null) { throw new RuntimeException("Can not restart NodeAdminStateUpdater"); } @@ -266,30 +271,24 @@ public class NodeAdminStateUpdater { }); loopThread.setName("tick-NodeAdminStateUpdater"); loopThread.start(); - - specVerifierScheduler.scheduleWithFixedDelay(() -> - updateHardwareDivergence(storageMaintainer), 5, 60, TimeUnit.MINUTES); } - public void stop() { - specVerifierScheduler.shutdown(); + @Override + public void deconstruct() { if (!terminated.compareAndSet(false, true)) { throw new RuntimeException("Can not re-stop a node agent."); } - - // First we need to stop NodeAdminStateUpdater thread to make sure no new NodeAgents are spawned + log.log(LogLevel.INFO, objectToString() + ": Deconstruct called"); signalWorkToBeDone(); - - do { - try { - loopThread.join(); - specVerifierScheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); - } catch (InterruptedException e1) { - log.info("Interrupted while waiting for NodeAdminStateUpdater thread and specVerfierScheduler to shutdown"); + try { + loopThread.join(10000); + if (loopThread.isAlive()) { + log.log(LogLevel.ERROR, "Could not stop tick thread"); } - } while (loopThread.isAlive() || !specVerifierScheduler.isTerminated()); - - // Finally, stop NodeAdmin and all the NodeAgents - nodeAdmin.stop(); + } catch (InterruptedException e1) { + log.log(LogLevel.ERROR, "Interrupted; Could not stop thread"); + } + nodeAdmin.shutdown(); + log.log(LogLevel.INFO, objectToString() + ": Deconstruct complete"); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java index 92c44969d5e..5d31c10fcc1 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java @@ -29,7 +29,7 @@ public interface NodeAgent { * Starts the agent. After this method is called, the agent will asynchronously maintain the node, continuously * striving to make the current state equal to the wanted state. */ - void start(); + void start(int intervalMillis); /** * Signals to the agent that the node is at the end of its lifecycle and no longer needs a managing agent. diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 77348a9dc45..6ea65be6799 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -64,20 +64,20 @@ public class NodeAgentImpl implements NodeAgent { private final PrefixLogger logger; private DockerImage imageBeingDownloaded = null; - private final ContainerName containerName; private final String hostname; + private final ContainerName containerName; private final NodeRepository nodeRepository; private final Orchestrator orchestrator; private final DockerOperations dockerOperations; private final StorageMaintainer storageMaintainer; - private final AclMaintainer aclMaintainer; private final Environment environment; private final Clock clock; - private final Duration timeBetweenEachConverge; + private final AclMaintainer aclMaintainer; private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private final LinkedList<String> debugMessages = new LinkedList<>(); + private long delaysBetweenEachConvergeMillis = 30_000; private int numberOfUnhandledException = 0; private Instant lastConverge; @@ -85,7 +85,7 @@ public class NodeAgentImpl implements NodeAgent { private final ScheduledExecutorService filebeatRestarter = Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("filebeatrestarter")); - private Consumer<String> serviceRestarter; + private final Consumer<String> serviceRestarter; private Future<?> currentFilebeatRestarter; private boolean resumeScriptRun = false; @@ -117,8 +117,7 @@ public class NodeAgentImpl implements NodeAgent { final StorageMaintainer storageMaintainer, final AclMaintainer aclMaintainer, final Environment environment, - final Clock clock, - final Duration timeBetweenEachConverge) { + final Clock clock) { this.containerName = ContainerName.fromHostname(hostName); this.logger = PrefixLogger.getNodeAgentLogger(NodeAgentImpl.class, containerName); this.hostname = hostName; @@ -129,8 +128,19 @@ public class NodeAgentImpl implements NodeAgent { this.aclMaintainer = aclMaintainer; this.environment = environment; this.clock = clock; - this.timeBetweenEachConverge = timeBetweenEachConverge; this.lastConverge = clock.instant(); + this.serviceRestarter = service -> { + try { + ProcessResult processResult = dockerOperations.executeCommandInContainerAsRoot( + containerName, "service", service, "restart"); + + if (!processResult.isSuccess()) { + logger.error("Failed to restart service " + service + ": " + processResult); + } + } catch (Exception e) { + logger.error("Failed to restart service " + service, e); + } + }; } @Override @@ -173,11 +183,11 @@ public class NodeAgentImpl implements NodeAgent { } @Override - public void start() { - String message = "Starting with interval " + timeBetweenEachConverge.toMillis() + " ms"; + public void start(int intervalMillis) { + String message = "Starting with interval " + intervalMillis + " ms"; logger.info(message); addDebugMessage(message); - + delaysBetweenEachConvergeMillis = intervalMillis; if (loopThread != null) { throw new RuntimeException("Can not restart a node agent."); } @@ -187,19 +197,6 @@ public class NodeAgentImpl implements NodeAgent { }); loopThread.setName("tick-" + hostname); loopThread.start(); - - serviceRestarter = service -> { - try { - ProcessResult processResult = dockerOperations.executeCommandInContainerAsRoot( - containerName, "service", service, "restart"); - - if (!processResult.isSuccess()) { - logger.error("Failed to restart service " + service + ": " + processResult); - } - } catch (Exception e) { - logger.error("Failed to restart service " + service, e); - } - }; } @Override @@ -210,15 +207,19 @@ public class NodeAgentImpl implements NodeAgent { throw new RuntimeException("Can not re-stop a node agent."); } signalWorkToBeDone(); - - do { - try { - loopThread.join(); - filebeatRestarter.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); - } catch (InterruptedException e) { - logger.error("Interrupted while waiting for converge thread and filebeatRestarter scheduler to shutdown"); + try { + loopThread.join(10000); + if (loopThread.isAlive()) { + logger.error("Could not stop host thread " + hostname); } - } while (loopThread.isAlive() || !filebeatRestarter.isTerminated()); + } catch (InterruptedException e1) { + logger.error("Interrupted; Could not stop host thread " + hostname); + } + try { + filebeatRestarter.awaitTermination(10, TimeUnit.SECONDS); + } catch (InterruptedException e) { + logger.error("Interrupted; Could not stop filebeatrestarter thread"); + } logger.info("Stopped"); } @@ -374,7 +375,7 @@ public class NodeAgentImpl implements NodeAgent { boolean isFrozenCopy; synchronized (monitor) { while (!workToDoNow) { - long remainder = timeBetweenEachConverge.minus(Duration.between(lastConverge, clock.instant())).toMillis(); + long remainder = delaysBetweenEachConvergeMillis - Duration.between(lastConverge, clock.instant()).toMillis(); if (remainder > 0) { try { monitor.wait(remainder); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepository.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepository.java index 9f4c6916b48..d14cd2f1330 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepository.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepository.java @@ -13,7 +13,7 @@ import java.util.Optional; * @author stiankri */ public interface NodeRepository { - List<ContainerNodeSpec> getContainersToRun(String baseHostName) throws IOException; + List<ContainerNodeSpec> getContainersToRun() throws IOException; Optional<ContainerNodeSpec> getContainerNodeSpec(String hostName); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java index 08957a489b6..7d73d05ca36 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java @@ -28,17 +28,18 @@ import java.util.stream.Collectors; */ public class NodeRepositoryImpl implements NodeRepository { private static final PrefixLogger NODE_ADMIN_LOGGER = PrefixLogger.getNodeAdminLogger(NodeRepositoryImpl.class); - - private final ConfigServerHttpRequestExecutor requestExecutor; + private final String baseHostName; private final int port; + private final ConfigServerHttpRequestExecutor requestExecutor; - public NodeRepositoryImpl(ConfigServerHttpRequestExecutor requestExecutor, int port) { + public NodeRepositoryImpl(ConfigServerHttpRequestExecutor requestExecutor, int configPort, String baseHostName) { + this.baseHostName = baseHostName; + this.port = configPort; this.requestExecutor = requestExecutor; - this.port = port; } @Override - public List<ContainerNodeSpec> getContainersToRun(String baseHostName) throws IOException { + public List<ContainerNodeSpec> getContainersToRun() throws IOException { try { final GetNodesResponse nodesForHost = requestExecutor.get( "/nodes/v2/node/?parentHost=" + baseHostName + "&recursive=true", diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImpl.java index bd9df486e7b..5117a1bb079 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImpl.java @@ -1,6 +1,8 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.orchestrator; +import com.yahoo.vespa.defaults.Defaults; + import com.yahoo.vespa.hosted.node.admin.util.ConfigServerHttpRequestExecutor; import com.yahoo.vespa.orchestrator.restapi.HostApi; @@ -17,6 +19,7 @@ import java.util.Optional; * @author dybis */ public class OrchestratorImpl implements Orchestrator { + static final int WEB_SERVICE_PORT = Defaults.getDefaults().vespaWebServicePort(); // TODO: Find a way to avoid duplicating this (present in orchestrator's services.xml also). private static final String ORCHESTRATOR_PATH_PREFIX = "/orchestrator"; static final String ORCHESTRATOR_PATH_PREFIX_HOST_API @@ -25,11 +28,9 @@ public class OrchestratorImpl implements Orchestrator { = ORCHESTRATOR_PATH_PREFIX + HostSuspensionApi.PATH_PREFIX; private final ConfigServerHttpRequestExecutor requestExecutor; - private final int port; - public OrchestratorImpl(ConfigServerHttpRequestExecutor requestExecutor, int port) { + public OrchestratorImpl(ConfigServerHttpRequestExecutor requestExecutor) { this.requestExecutor = requestExecutor; - this.port = port; } @Override @@ -37,7 +38,7 @@ public class OrchestratorImpl implements Orchestrator { UpdateHostResponse response; try { response = requestExecutor.put(getSuspendPath(hostName), - port, + WEB_SERVICE_PORT, Optional.empty(), /* body */ UpdateHostResponse.class); } catch (ConfigServerHttpRequestExecutor.NotFoundException n) { @@ -57,7 +58,7 @@ public class OrchestratorImpl implements Orchestrator { try { batchOperationResult = requestExecutor.put( ORCHESTRATOR_PATH_PREFIX_HOST_SUSPENSION_API, - port, + WEB_SERVICE_PORT, Optional.of(new BatchHostSuspendRequest(parentHostName, hostNames)), BatchOperationResult.class); } catch (Exception e) { @@ -74,7 +75,7 @@ public class OrchestratorImpl implements Orchestrator { UpdateHostResponse response; try { String path = getSuspendPath(hostName); - response = requestExecutor.delete(path, port, UpdateHostResponse.class); + response = requestExecutor.delete(path, WEB_SERVICE_PORT, UpdateHostResponse.class); } catch (ConfigServerHttpRequestExecutor.NotFoundException n) { throw new OrchestratorNotFoundException("Failed to resume " + hostName + ", host not found"); } catch (Exception e) { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProvider.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProvider.java new file mode 100644 index 00000000000..3211e4feb56 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProvider.java @@ -0,0 +1,16 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.provider; + +import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper; +import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater; + +/** + * Class for setting up instances of classes; enables testing. + * + * @author dybis + */ +public interface ComponentsProvider { + NodeAdminStateUpdater getNodeAdminStateUpdater(); + + MetricReceiverWrapper getMetricReceiverWrapper(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java new file mode 100644 index 00000000000..98d7593ef69 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java @@ -0,0 +1,118 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.provider; + +import com.google.inject.Inject; +import com.yahoo.net.HostName; +import static com.yahoo.vespa.defaults.Defaults.getDefaults; + +import com.yahoo.system.ProcessExecuter; +import com.yahoo.vespa.hosted.dockerapi.ContainerName; +import com.yahoo.vespa.hosted.dockerapi.Docker; +import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper; +import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations; +import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer; +import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer; +import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdmin; +import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl; +import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl; +import com.yahoo.vespa.hosted.node.admin.docker.DockerOperationsImpl; +import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepository; +import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepositoryImpl; +import com.yahoo.vespa.hosted.node.admin.orchestrator.Orchestrator; +import com.yahoo.vespa.hosted.node.admin.orchestrator.OrchestratorImpl; +import com.yahoo.vespa.hosted.node.admin.util.ConfigServerHttpRequestExecutor; +import com.yahoo.vespa.hosted.node.admin.util.Environment; +import com.yahoo.vespa.hosted.node.admin.util.SecretAgentScheduleMaker; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Clock; +import java.util.Set; +import java.util.function.Function; + +/** + * Set up node admin for production. + * + * @author dybis + */ +public class ComponentsProviderImpl implements ComponentsProvider { + private static final ContainerName NODE_ADMIN_CONTAINER_NAME = new ContainerName("node-admin"); + + private final NodeAdminStateUpdater nodeAdminStateUpdater; + private final MetricReceiverWrapper metricReceiverWrapper; + + private static final int NODE_AGENT_SCAN_INTERVAL_MILLIS = 30000; + private static final int WEB_SERVICE_PORT = getDefaults().vespaWebServicePort(); + + // Converge towards desired node admin state every 30 seconds + private static final int NODE_ADMIN_CONVERGE_STATE_INTERVAL_MILLIS = 30000; + + @Inject + public ComponentsProviderImpl(Docker docker, MetricReceiverWrapper metricReceiver) { + String baseHostName = HostName.getLocalhost(); + Environment environment = new Environment(); + Set<String> configServerHosts = environment.getConfigServerHosts(); + if (configServerHosts.isEmpty()) { + throw new IllegalStateException("Environment setting for config servers missing or empty."); + } + + Clock clock = Clock.systemUTC(); + ProcessExecuter processExecuter = new ProcessExecuter(); + ConfigServerHttpRequestExecutor requestExecutor = ConfigServerHttpRequestExecutor.create(configServerHosts); + Orchestrator orchestrator = new OrchestratorImpl(requestExecutor); + NodeRepository nodeRepository = new NodeRepositoryImpl(requestExecutor, WEB_SERVICE_PORT, baseHostName); + DockerOperations dockerOperations = new DockerOperationsImpl(docker, environment, processExecuter); + + StorageMaintainer storageMaintainer = new StorageMaintainer(docker, processExecuter, metricReceiver, environment, clock); + AclMaintainer aclMaintainer = new AclMaintainer(dockerOperations, nodeRepository, baseHostName); + + Function<String, NodeAgent> nodeAgentFactory = + (hostName) -> new NodeAgentImpl(hostName, nodeRepository, orchestrator, dockerOperations, + storageMaintainer, aclMaintainer, environment, clock); + NodeAdmin nodeAdmin = new NodeAdminImpl(dockerOperations, nodeAgentFactory, storageMaintainer, aclMaintainer, + NODE_AGENT_SCAN_INTERVAL_MILLIS, metricReceiver, clock); + nodeAdminStateUpdater = new NodeAdminStateUpdater(nodeRepository, nodeAdmin, storageMaintainer, clock, orchestrator, baseHostName); + nodeAdminStateUpdater.start(NODE_ADMIN_CONVERGE_STATE_INTERVAL_MILLIS); + + metricReceiverWrapper = metricReceiver; + + setCorePattern(docker); + initializeNodeAgentSecretAgent(docker); + } + + @Override + public NodeAdminStateUpdater getNodeAdminStateUpdater() { + return nodeAdminStateUpdater; + } + + @Override + public MetricReceiverWrapper getMetricReceiverWrapper() { + return metricReceiverWrapper; + } + + + private void setCorePattern(Docker docker) { + final String[] sysctlCorePattern = {"sysctl", "-w", "kernel.core_pattern=" + + getDefaults().underVespaHome("var/crash/%e.core.%p")}; + docker.executeInContainerAsRoot(NODE_ADMIN_CONTAINER_NAME, sysctlCorePattern); + } + + private void initializeNodeAgentSecretAgent(Docker docker) { + final Path yamasAgentFolder = Paths.get("/etc/yamas-agent/"); + docker.executeInContainerAsRoot(NODE_ADMIN_CONTAINER_NAME, "chmod", "a+w", yamasAgentFolder.toString()); + + Path nodeAdminCheckPath = Paths.get("/usr/bin/curl"); + SecretAgentScheduleMaker scheduleMaker = new SecretAgentScheduleMaker("node-admin", 60, nodeAdminCheckPath, + "localhost:4080/rest/metrics"); + + try { + scheduleMaker.writeTo(yamasAgentFolder); + docker.executeInContainerAsRoot(NODE_ADMIN_CONTAINER_NAME, "service", "yamas-agent", "restart"); + } catch (IOException e) { + throw new RuntimeException("Failed to write secret-agent schedules for node-admin", e); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminProvider.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminProvider.java deleted file mode 100644 index 5536ee1551b..00000000000 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminProvider.java +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.provider; - -import com.google.inject.Inject; -import com.yahoo.concurrent.lock.Lock; -import com.yahoo.concurrent.lock.Locking; -import com.yahoo.container.di.componentgraph.Provider; -import com.yahoo.log.LogLevel; -import com.yahoo.net.HostName; - -import com.yahoo.system.ProcessExecuter; -import com.yahoo.vespa.hosted.dockerapi.Docker; -import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper; -import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations; -import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer; -import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer; -import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdmin; -import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl; -import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater; -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent; -import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl; -import com.yahoo.vespa.hosted.node.admin.docker.DockerOperationsImpl; -import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepository; -import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepositoryImpl; -import com.yahoo.vespa.hosted.node.admin.orchestrator.Orchestrator; -import com.yahoo.vespa.hosted.node.admin.orchestrator.OrchestratorImpl; -import com.yahoo.vespa.hosted.node.admin.util.ConfigServerHttpRequestExecutor; -import com.yahoo.vespa.hosted.node.admin.util.Environment; - -import java.time.Clock; -import java.time.Duration; -import java.util.function.Function; -import java.util.logging.Logger; - -import static com.yahoo.vespa.defaults.Defaults.getDefaults; - -/** - * Set up node admin for production. - * - * @author dybis - */ -public class NodeAdminProvider implements Provider<NodeAdminStateUpdater> { - private static final int WEB_SERVICE_PORT = getDefaults().vespaWebServicePort(); - private static final Duration NODE_AGENT_SCAN_INTERVAL = Duration.ofSeconds(30); - private static final Duration NODE_ADMIN_CONVERGE_STATE_INTERVAL = Duration.ofSeconds(30); - - private final Logger log = Logger.getLogger(NodeAdminProvider.class.getName()); - private final NodeAdminStateUpdater nodeAdminStateUpdater; - private final Lock classLock; - - @Inject - public NodeAdminProvider(Docker docker, MetricReceiverWrapper metricReceiver, Locking locking) { - log.log(LogLevel.INFO, objectToString() + ": Creating object, acquiring lock..."); - classLock = locking.lock(this.getClass()); - try { - log.log(LogLevel.INFO, objectToString() + ": Lock acquired"); - - Clock clock = Clock.systemUTC(); - String dockerHostHostName = HostName.getLocalhost(); - ProcessExecuter processExecuter = new ProcessExecuter(); - Environment environment = new Environment(); - - ConfigServerHttpRequestExecutor requestExecutor = ConfigServerHttpRequestExecutor.create(environment.getConfigServerHosts()); - NodeRepository nodeRepository = new NodeRepositoryImpl(requestExecutor, WEB_SERVICE_PORT); - Orchestrator orchestrator = new OrchestratorImpl(requestExecutor, WEB_SERVICE_PORT); - DockerOperations dockerOperations = new DockerOperationsImpl(docker, environment, processExecuter); - - StorageMaintainer storageMaintainer = new StorageMaintainer(docker, processExecuter, metricReceiver, environment, clock); - AclMaintainer aclMaintainer = new AclMaintainer(dockerOperations, nodeRepository, dockerHostHostName); - - Function<String, NodeAgent> nodeAgentFactory = - (hostName) -> new NodeAgentImpl(hostName, nodeRepository, orchestrator, dockerOperations, - storageMaintainer, aclMaintainer, environment, clock, NODE_AGENT_SCAN_INTERVAL); - NodeAdmin nodeAdmin = new NodeAdminImpl(dockerOperations, nodeAgentFactory, storageMaintainer, aclMaintainer, - metricReceiver, clock); - - nodeAdminStateUpdater = new NodeAdminStateUpdater(nodeRepository, orchestrator, storageMaintainer, nodeAdmin, - dockerHostHostName, clock, NODE_ADMIN_CONVERGE_STATE_INTERVAL); - nodeAdminStateUpdater.start(); - } catch (Exception e) { - classLock.close(); - throw e; - } - } - - @Override - public NodeAdminStateUpdater get() { - return nodeAdminStateUpdater; - } - - @Override - public void deconstruct() { - log.log(LogLevel.INFO, objectToString() + ": Stop called"); - - nodeAdminStateUpdater.stop(); - log.log(LogLevel.INFO, objectToString() + ": Stop complete"); - - classLock.close(); - log.log(LogLevel.INFO, objectToString() + ": Lock released"); - } - - private String objectToString() { - return this.getClass().getSimpleName() + "@" + Integer.toString(System.identityHashCode(this)); - } -} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java index adfb937b8d7..ff6ac9ce1e7 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.node.admin.restapi; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.yahoo.container.di.componentgraph.Provider; import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.container.jdisc.HttpResponse; import com.yahoo.container.jdisc.LoggingRequestHandler; @@ -11,8 +10,8 @@ import com.yahoo.container.logging.AccessLog; import com.yahoo.vespa.hosted.dockerapi.metrics.DimensionMetrics; import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper; import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater; +import com.yahoo.vespa.hosted.node.admin.provider.ComponentsProvider; -import javax.inject.Inject; import javax.ws.rs.core.MediaType; import java.io.IOException; import java.io.OutputStream; @@ -37,13 +36,10 @@ public class RestApiHandler extends LoggingRequestHandler{ private final NodeAdminStateUpdater refresher; private final MetricReceiverWrapper metricReceiverWrapper; - @Inject - public RestApiHandler(Executor executor, AccessLog accessLog, - Provider<NodeAdminStateUpdater> componentsProvider, - MetricReceiverWrapper metricReceiverWrapper) { + public RestApiHandler(Executor executor, AccessLog accessLog, ComponentsProvider componentsProvider) { super(executor, accessLog); - this.refresher = componentsProvider.get(); - this.metricReceiverWrapper = metricReceiverWrapper; + this.refresher = componentsProvider.getNodeAdminStateUpdater(); + this.metricReceiverWrapper = componentsProvider.getMetricReceiverWrapper(); } @Override diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/ConfigServerHttpRequestExecutor.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/ConfigServerHttpRequestExecutor.java index 9c8dc198388..4434213989f 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/ConfigServerHttpRequestExecutor.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/ConfigServerHttpRequestExecutor.java @@ -52,10 +52,6 @@ public class ConfigServerHttpRequestExecutor { } public static ConfigServerHttpRequestExecutor create(Set<String> configServerHosts) { - if (configServerHosts.isEmpty()) { - throw new IllegalStateException("Environment setting for config servers missing or empty."); - } - PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(); // Increase max total connections to 200, which should be enough cm.setMaxTotal(200); |