summaryrefslogtreecommitdiffstats
path: root/node-admin/src/main/java/com/yahoo
diff options
context:
space:
mode:
Diffstat (limited to 'node-admin/src/main/java/com/yahoo')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java3
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java33
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java71
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java65
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepository.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java11
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImpl.java13
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProvider.java16
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java118
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminProvider.java105
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java12
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/ConfigServerHttpRequestExecutor.java4
14 files changed, 245 insertions, 212 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
index f6fd8c3bd18..a4118ebe9ff 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
@@ -22,6 +22,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.regex.Pattern;
import java.util.stream.Stream;
import static com.yahoo.vespa.defaults.Defaults.getDefaults;
@@ -39,6 +40,8 @@ public class DockerOperationsImpl implements DockerOperations {
private static final String[] RESTART_VESPA_ON_NODE_COMMAND = new String[]{NODE_PROGRAM, "restart-vespa"};
private static final String[] STOP_NODE_COMMAND = new String[]{NODE_PROGRAM, "stop"};
+ private static final Pattern VESPA_VERSION_PATTERN = Pattern.compile("^(\\S*)$", Pattern.MULTILINE);
+
private static final String MANAGER_NAME = "node-admin";
// Map of directories to mount and whether they should be writable by everyone
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java
index cf70963eee1..e02f81e8f30 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java
@@ -62,5 +62,5 @@ public interface NodeAdmin {
/**
* Stop the NodeAgent. Will not delete the storage or stop the container.
*/
- void stop();
+ void shutdown();
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
index f227a166034..e39abb47788 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
@@ -55,6 +55,8 @@ public class NodeAdminImpl implements NodeAdmin {
private final Map<ContainerName, NodeAgent> nodeAgents = new ConcurrentHashMap<>();
+ private final int nodeAgentScanIntervalMillis;
+
private final GaugeWrapper numberOfContainersInLoadImageState;
private final CounterWrapper numberOfUnhandledExceptionsInNodeAgent;
@@ -62,11 +64,13 @@ public class NodeAdminImpl implements NodeAdmin {
final Function<String, NodeAgent> nodeAgentFactory,
final StorageMaintainer storageMaintainer,
final AclMaintainer aclMaintainer,
+ final int nodeAgentScanIntervalMillis,
final MetricReceiverWrapper metricReceiver,
final Clock clock) {
this.dockerOperations = dockerOperations;
this.nodeAgentFactory = nodeAgentFactory;
this.storageMaintainer = storageMaintainer;
+ this.nodeAgentScanIntervalMillis = nodeAgentScanIntervalMillis;
this.clock = clock;
this.previousWantFrozen = true;
@@ -179,21 +183,24 @@ public class NodeAdminImpl implements NodeAdmin {
}
@Override
- public void stop() {
+ public void shutdown() {
metricsScheduler.shutdown();
aclScheduler.shutdown();
-
- // Stop all node-agents in parallel, will block until the last NodeAgent is stopped
- nodeAgents.values().parallelStream().forEach(NodeAgent::stop);
-
- do {
- try {
- metricsScheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
- aclScheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
- } catch (InterruptedException e) {
- logger.info("Was interrupted while waiting for metricsScheduler and aclScheduler to shutdown");
+ try {
+ boolean metricsSchedulerShutdown = metricsScheduler.awaitTermination(30, TimeUnit.SECONDS);
+ boolean aclSchedulerShutdown = aclScheduler.awaitTermination(30, TimeUnit.SECONDS);
+ if (! (metricsSchedulerShutdown && aclSchedulerShutdown)) {
+ throw new RuntimeException("Failed shutting down all scheduler(s), shutdown status:\n" +
+ "\tMetrics Scheduler: " + metricsSchedulerShutdown + "\n" +
+ "\tACL Scheduler: " + aclSchedulerShutdown);
}
- } while (!metricsScheduler.isTerminated() || !aclScheduler.isTerminated());
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+
+ for (NodeAgent nodeAgent : nodeAgents.values()) {
+ nodeAgent.stop();
+ }
}
// Set-difference. Returns minuend minus subtrahend.
@@ -250,7 +257,7 @@ public class NodeAdminImpl implements NodeAdmin {
}
final NodeAgent agent = nodeAgentFactory.apply(hostname);
- agent.start();
+ agent.start(nodeAgentScanIntervalMillis);
nodeAgents.put(containerName, agent);
try {
Thread.sleep(1000);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
index d1f23b13e6c..a848dae9388 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.nodeadmin;
+import com.yahoo.component.AbstractComponent;
import com.yahoo.concurrent.ThreadFactoryFactory;
import com.yahoo.log.LogLevel;
import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec;
@@ -35,7 +36,7 @@ import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.
*
* @author dybis, stiankri
*/
-public class NodeAdminStateUpdater {
+public class NodeAdminStateUpdater extends AbstractComponent {
static final Duration FREEZE_CONVERGENCE_TIMEOUT = Duration.ofMinutes(5);
private final AtomicBoolean terminated = new AtomicBoolean(false);
@@ -51,31 +52,35 @@ public class NodeAdminStateUpdater {
private Thread loopThread;
private final NodeRepository nodeRepository;
- private final Orchestrator orchestrator;
- private final StorageMaintainer storageMaintainer;
private final NodeAdmin nodeAdmin;
private final Clock clock;
+ private final Orchestrator orchestrator;
private final String dockerHostHostName;
- private final Duration nodeAdminConvergeStateInterval;
+ private long delaysBetweenEachTickMillis = 30_000;
private Instant lastTick;
public NodeAdminStateUpdater(
- NodeRepository nodeRepository,
- Orchestrator orchestrator,
+ final NodeRepository nodeRepository,
+ final NodeAdmin nodeAdmin,
StorageMaintainer storageMaintainer,
- NodeAdmin nodeAdmin,
- String dockerHostHostName,
Clock clock,
- Duration nodeAdminConvergeStateInterval) {
+ Orchestrator orchestrator,
+ String dockerHostHostName) {
+ log.log(LogLevel.INFO, objectToString() + ": Creating object");
this.nodeRepository = nodeRepository;
- this.orchestrator = orchestrator;
- this.storageMaintainer = storageMaintainer;
this.nodeAdmin = nodeAdmin;
- this.dockerHostHostName = dockerHostHostName;
this.clock = clock;
- this.nodeAdminConvergeStateInterval = nodeAdminConvergeStateInterval;
+ this.orchestrator = orchestrator;
+ this.dockerHostHostName = dockerHostHostName;
this.lastTick = clock.instant();
+
+ specVerifierScheduler.scheduleWithFixedDelay(() ->
+ updateHardwareDivergence(storageMaintainer), 5, 60, TimeUnit.MINUTES);
+ }
+
+ private String objectToString() {
+ return this.getClass().getSimpleName() + "@" + Integer.toString(System.identityHashCode(this));
}
public enum State { RESUMED, SUSPENDED_NODE_ADMIN, SUSPENDED}
@@ -128,8 +133,7 @@ public class NodeAdminStateUpdater {
State wantedStateCopy;
synchronized (monitor) {
while (! workToDoNow) {
- Duration timeSinceLastConverge = Duration.between(lastTick, clock.instant());
- long remainder = nodeAdminConvergeStateInterval.minus(timeSinceLastConverge).toMillis();
+ long remainder = delaysBetweenEachTickMillis - Duration.between(lastTick, clock.instant()).toMillis();
if (remainder > 0) {
try {
monitor.wait(remainder);
@@ -227,7 +231,7 @@ public class NodeAdminStateUpdater {
}
final List<ContainerNodeSpec> containersToRun;
try {
- containersToRun = nodeRepository.getContainersToRun(dockerHostHostName);
+ containersToRun = nodeRepository.getContainersToRun();
} catch (Exception e) {
log.log(LogLevel.WARNING, "Failed fetching container info from node repository", e);
return;
@@ -246,7 +250,7 @@ public class NodeAdminStateUpdater {
private List<String> getNodesInActiveState() {
try {
- return nodeRepository.getContainersToRun(dockerHostHostName)
+ return nodeRepository.getContainersToRun()
.stream()
.filter(nodespec -> nodespec.nodeState == Node.State.active)
.map(nodespec -> nodespec.hostname)
@@ -256,7 +260,8 @@ public class NodeAdminStateUpdater {
}
}
- public void start() {
+ public void start(long stateConvergeInterval) {
+ delaysBetweenEachTickMillis = stateConvergeInterval;
if (loopThread != null) {
throw new RuntimeException("Can not restart NodeAdminStateUpdater");
}
@@ -266,30 +271,24 @@ public class NodeAdminStateUpdater {
});
loopThread.setName("tick-NodeAdminStateUpdater");
loopThread.start();
-
- specVerifierScheduler.scheduleWithFixedDelay(() ->
- updateHardwareDivergence(storageMaintainer), 5, 60, TimeUnit.MINUTES);
}
- public void stop() {
- specVerifierScheduler.shutdown();
+ @Override
+ public void deconstruct() {
if (!terminated.compareAndSet(false, true)) {
throw new RuntimeException("Can not re-stop a node agent.");
}
-
- // First we need to stop NodeAdminStateUpdater thread to make sure no new NodeAgents are spawned
+ log.log(LogLevel.INFO, objectToString() + ": Deconstruct called");
signalWorkToBeDone();
-
- do {
- try {
- loopThread.join();
- specVerifierScheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
- } catch (InterruptedException e1) {
- log.info("Interrupted while waiting for NodeAdminStateUpdater thread and specVerfierScheduler to shutdown");
+ try {
+ loopThread.join(10000);
+ if (loopThread.isAlive()) {
+ log.log(LogLevel.ERROR, "Could not stop tick thread");
}
- } while (loopThread.isAlive() || !specVerifierScheduler.isTerminated());
-
- // Finally, stop NodeAdmin and all the NodeAgents
- nodeAdmin.stop();
+ } catch (InterruptedException e1) {
+ log.log(LogLevel.ERROR, "Interrupted; Could not stop thread");
+ }
+ nodeAdmin.shutdown();
+ log.log(LogLevel.INFO, objectToString() + ": Deconstruct complete");
}
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
index 92c44969d5e..5d31c10fcc1 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
@@ -29,7 +29,7 @@ public interface NodeAgent {
* Starts the agent. After this method is called, the agent will asynchronously maintain the node, continuously
* striving to make the current state equal to the wanted state.
*/
- void start();
+ void start(int intervalMillis);
/**
* Signals to the agent that the node is at the end of its lifecycle and no longer needs a managing agent.
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 77348a9dc45..6ea65be6799 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -64,20 +64,20 @@ public class NodeAgentImpl implements NodeAgent {
private final PrefixLogger logger;
private DockerImage imageBeingDownloaded = null;
- private final ContainerName containerName;
private final String hostname;
+ private final ContainerName containerName;
private final NodeRepository nodeRepository;
private final Orchestrator orchestrator;
private final DockerOperations dockerOperations;
private final StorageMaintainer storageMaintainer;
- private final AclMaintainer aclMaintainer;
private final Environment environment;
private final Clock clock;
- private final Duration timeBetweenEachConverge;
+ private final AclMaintainer aclMaintainer;
private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private final LinkedList<String> debugMessages = new LinkedList<>();
+ private long delaysBetweenEachConvergeMillis = 30_000;
private int numberOfUnhandledException = 0;
private Instant lastConverge;
@@ -85,7 +85,7 @@ public class NodeAgentImpl implements NodeAgent {
private final ScheduledExecutorService filebeatRestarter =
Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("filebeatrestarter"));
- private Consumer<String> serviceRestarter;
+ private final Consumer<String> serviceRestarter;
private Future<?> currentFilebeatRestarter;
private boolean resumeScriptRun = false;
@@ -117,8 +117,7 @@ public class NodeAgentImpl implements NodeAgent {
final StorageMaintainer storageMaintainer,
final AclMaintainer aclMaintainer,
final Environment environment,
- final Clock clock,
- final Duration timeBetweenEachConverge) {
+ final Clock clock) {
this.containerName = ContainerName.fromHostname(hostName);
this.logger = PrefixLogger.getNodeAgentLogger(NodeAgentImpl.class, containerName);
this.hostname = hostName;
@@ -129,8 +128,19 @@ public class NodeAgentImpl implements NodeAgent {
this.aclMaintainer = aclMaintainer;
this.environment = environment;
this.clock = clock;
- this.timeBetweenEachConverge = timeBetweenEachConverge;
this.lastConverge = clock.instant();
+ this.serviceRestarter = service -> {
+ try {
+ ProcessResult processResult = dockerOperations.executeCommandInContainerAsRoot(
+ containerName, "service", service, "restart");
+
+ if (!processResult.isSuccess()) {
+ logger.error("Failed to restart service " + service + ": " + processResult);
+ }
+ } catch (Exception e) {
+ logger.error("Failed to restart service " + service, e);
+ }
+ };
}
@Override
@@ -173,11 +183,11 @@ public class NodeAgentImpl implements NodeAgent {
}
@Override
- public void start() {
- String message = "Starting with interval " + timeBetweenEachConverge.toMillis() + " ms";
+ public void start(int intervalMillis) {
+ String message = "Starting with interval " + intervalMillis + " ms";
logger.info(message);
addDebugMessage(message);
-
+ delaysBetweenEachConvergeMillis = intervalMillis;
if (loopThread != null) {
throw new RuntimeException("Can not restart a node agent.");
}
@@ -187,19 +197,6 @@ public class NodeAgentImpl implements NodeAgent {
});
loopThread.setName("tick-" + hostname);
loopThread.start();
-
- serviceRestarter = service -> {
- try {
- ProcessResult processResult = dockerOperations.executeCommandInContainerAsRoot(
- containerName, "service", service, "restart");
-
- if (!processResult.isSuccess()) {
- logger.error("Failed to restart service " + service + ": " + processResult);
- }
- } catch (Exception e) {
- logger.error("Failed to restart service " + service, e);
- }
- };
}
@Override
@@ -210,15 +207,19 @@ public class NodeAgentImpl implements NodeAgent {
throw new RuntimeException("Can not re-stop a node agent.");
}
signalWorkToBeDone();
-
- do {
- try {
- loopThread.join();
- filebeatRestarter.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
- } catch (InterruptedException e) {
- logger.error("Interrupted while waiting for converge thread and filebeatRestarter scheduler to shutdown");
+ try {
+ loopThread.join(10000);
+ if (loopThread.isAlive()) {
+ logger.error("Could not stop host thread " + hostname);
}
- } while (loopThread.isAlive() || !filebeatRestarter.isTerminated());
+ } catch (InterruptedException e1) {
+ logger.error("Interrupted; Could not stop host thread " + hostname);
+ }
+ try {
+ filebeatRestarter.awaitTermination(10, TimeUnit.SECONDS);
+ } catch (InterruptedException e) {
+ logger.error("Interrupted; Could not stop filebeatrestarter thread");
+ }
logger.info("Stopped");
}
@@ -374,7 +375,7 @@ public class NodeAgentImpl implements NodeAgent {
boolean isFrozenCopy;
synchronized (monitor) {
while (!workToDoNow) {
- long remainder = timeBetweenEachConverge.minus(Duration.between(lastConverge, clock.instant())).toMillis();
+ long remainder = delaysBetweenEachConvergeMillis - Duration.between(lastConverge, clock.instant()).toMillis();
if (remainder > 0) {
try {
monitor.wait(remainder);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepository.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepository.java
index 9f4c6916b48..d14cd2f1330 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepository.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepository.java
@@ -13,7 +13,7 @@ import java.util.Optional;
* @author stiankri
*/
public interface NodeRepository {
- List<ContainerNodeSpec> getContainersToRun(String baseHostName) throws IOException;
+ List<ContainerNodeSpec> getContainersToRun() throws IOException;
Optional<ContainerNodeSpec> getContainerNodeSpec(String hostName);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java
index 08957a489b6..7d73d05ca36 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java
@@ -28,17 +28,18 @@ import java.util.stream.Collectors;
*/
public class NodeRepositoryImpl implements NodeRepository {
private static final PrefixLogger NODE_ADMIN_LOGGER = PrefixLogger.getNodeAdminLogger(NodeRepositoryImpl.class);
-
- private final ConfigServerHttpRequestExecutor requestExecutor;
+ private final String baseHostName;
private final int port;
+ private final ConfigServerHttpRequestExecutor requestExecutor;
- public NodeRepositoryImpl(ConfigServerHttpRequestExecutor requestExecutor, int port) {
+ public NodeRepositoryImpl(ConfigServerHttpRequestExecutor requestExecutor, int configPort, String baseHostName) {
+ this.baseHostName = baseHostName;
+ this.port = configPort;
this.requestExecutor = requestExecutor;
- this.port = port;
}
@Override
- public List<ContainerNodeSpec> getContainersToRun(String baseHostName) throws IOException {
+ public List<ContainerNodeSpec> getContainersToRun() throws IOException {
try {
final GetNodesResponse nodesForHost = requestExecutor.get(
"/nodes/v2/node/?parentHost=" + baseHostName + "&recursive=true",
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImpl.java
index bd9df486e7b..5117a1bb079 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/orchestrator/OrchestratorImpl.java
@@ -1,6 +1,8 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.orchestrator;
+import com.yahoo.vespa.defaults.Defaults;
+
import com.yahoo.vespa.hosted.node.admin.util.ConfigServerHttpRequestExecutor;
import com.yahoo.vespa.orchestrator.restapi.HostApi;
@@ -17,6 +19,7 @@ import java.util.Optional;
* @author dybis
*/
public class OrchestratorImpl implements Orchestrator {
+ static final int WEB_SERVICE_PORT = Defaults.getDefaults().vespaWebServicePort();
// TODO: Find a way to avoid duplicating this (present in orchestrator's services.xml also).
private static final String ORCHESTRATOR_PATH_PREFIX = "/orchestrator";
static final String ORCHESTRATOR_PATH_PREFIX_HOST_API
@@ -25,11 +28,9 @@ public class OrchestratorImpl implements Orchestrator {
= ORCHESTRATOR_PATH_PREFIX + HostSuspensionApi.PATH_PREFIX;
private final ConfigServerHttpRequestExecutor requestExecutor;
- private final int port;
- public OrchestratorImpl(ConfigServerHttpRequestExecutor requestExecutor, int port) {
+ public OrchestratorImpl(ConfigServerHttpRequestExecutor requestExecutor) {
this.requestExecutor = requestExecutor;
- this.port = port;
}
@Override
@@ -37,7 +38,7 @@ public class OrchestratorImpl implements Orchestrator {
UpdateHostResponse response;
try {
response = requestExecutor.put(getSuspendPath(hostName),
- port,
+ WEB_SERVICE_PORT,
Optional.empty(), /* body */
UpdateHostResponse.class);
} catch (ConfigServerHttpRequestExecutor.NotFoundException n) {
@@ -57,7 +58,7 @@ public class OrchestratorImpl implements Orchestrator {
try {
batchOperationResult = requestExecutor.put(
ORCHESTRATOR_PATH_PREFIX_HOST_SUSPENSION_API,
- port,
+ WEB_SERVICE_PORT,
Optional.of(new BatchHostSuspendRequest(parentHostName, hostNames)),
BatchOperationResult.class);
} catch (Exception e) {
@@ -74,7 +75,7 @@ public class OrchestratorImpl implements Orchestrator {
UpdateHostResponse response;
try {
String path = getSuspendPath(hostName);
- response = requestExecutor.delete(path, port, UpdateHostResponse.class);
+ response = requestExecutor.delete(path, WEB_SERVICE_PORT, UpdateHostResponse.class);
} catch (ConfigServerHttpRequestExecutor.NotFoundException n) {
throw new OrchestratorNotFoundException("Failed to resume " + hostName + ", host not found");
} catch (Exception e) {
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProvider.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProvider.java
new file mode 100644
index 00000000000..3211e4feb56
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProvider.java
@@ -0,0 +1,16 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.provider;
+
+import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
+import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater;
+
+/**
+ * Class for setting up instances of classes; enables testing.
+ *
+ * @author dybis
+ */
+public interface ComponentsProvider {
+ NodeAdminStateUpdater getNodeAdminStateUpdater();
+
+ MetricReceiverWrapper getMetricReceiverWrapper();
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java
new file mode 100644
index 00000000000..98d7593ef69
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java
@@ -0,0 +1,118 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.provider;
+
+import com.google.inject.Inject;
+import com.yahoo.net.HostName;
+import static com.yahoo.vespa.defaults.Defaults.getDefaults;
+
+import com.yahoo.system.ProcessExecuter;
+import com.yahoo.vespa.hosted.dockerapi.ContainerName;
+import com.yahoo.vespa.hosted.dockerapi.Docker;
+import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
+import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations;
+import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer;
+import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer;
+import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdmin;
+import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl;
+import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent;
+import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl;
+import com.yahoo.vespa.hosted.node.admin.docker.DockerOperationsImpl;
+import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepository;
+import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepositoryImpl;
+import com.yahoo.vespa.hosted.node.admin.orchestrator.Orchestrator;
+import com.yahoo.vespa.hosted.node.admin.orchestrator.OrchestratorImpl;
+import com.yahoo.vespa.hosted.node.admin.util.ConfigServerHttpRequestExecutor;
+import com.yahoo.vespa.hosted.node.admin.util.Environment;
+import com.yahoo.vespa.hosted.node.admin.util.SecretAgentScheduleMaker;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Clock;
+import java.util.Set;
+import java.util.function.Function;
+
+/**
+ * Set up node admin for production.
+ *
+ * @author dybis
+ */
+public class ComponentsProviderImpl implements ComponentsProvider {
+ private static final ContainerName NODE_ADMIN_CONTAINER_NAME = new ContainerName("node-admin");
+
+ private final NodeAdminStateUpdater nodeAdminStateUpdater;
+ private final MetricReceiverWrapper metricReceiverWrapper;
+
+ private static final int NODE_AGENT_SCAN_INTERVAL_MILLIS = 30000;
+ private static final int WEB_SERVICE_PORT = getDefaults().vespaWebServicePort();
+
+ // Converge towards desired node admin state every 30 seconds
+ private static final int NODE_ADMIN_CONVERGE_STATE_INTERVAL_MILLIS = 30000;
+
+ @Inject
+ public ComponentsProviderImpl(Docker docker, MetricReceiverWrapper metricReceiver) {
+ String baseHostName = HostName.getLocalhost();
+ Environment environment = new Environment();
+ Set<String> configServerHosts = environment.getConfigServerHosts();
+ if (configServerHosts.isEmpty()) {
+ throw new IllegalStateException("Environment setting for config servers missing or empty.");
+ }
+
+ Clock clock = Clock.systemUTC();
+ ProcessExecuter processExecuter = new ProcessExecuter();
+ ConfigServerHttpRequestExecutor requestExecutor = ConfigServerHttpRequestExecutor.create(configServerHosts);
+ Orchestrator orchestrator = new OrchestratorImpl(requestExecutor);
+ NodeRepository nodeRepository = new NodeRepositoryImpl(requestExecutor, WEB_SERVICE_PORT, baseHostName);
+ DockerOperations dockerOperations = new DockerOperationsImpl(docker, environment, processExecuter);
+
+ StorageMaintainer storageMaintainer = new StorageMaintainer(docker, processExecuter, metricReceiver, environment, clock);
+ AclMaintainer aclMaintainer = new AclMaintainer(dockerOperations, nodeRepository, baseHostName);
+
+ Function<String, NodeAgent> nodeAgentFactory =
+ (hostName) -> new NodeAgentImpl(hostName, nodeRepository, orchestrator, dockerOperations,
+ storageMaintainer, aclMaintainer, environment, clock);
+ NodeAdmin nodeAdmin = new NodeAdminImpl(dockerOperations, nodeAgentFactory, storageMaintainer, aclMaintainer,
+ NODE_AGENT_SCAN_INTERVAL_MILLIS, metricReceiver, clock);
+ nodeAdminStateUpdater = new NodeAdminStateUpdater(nodeRepository, nodeAdmin, storageMaintainer, clock, orchestrator, baseHostName);
+ nodeAdminStateUpdater.start(NODE_ADMIN_CONVERGE_STATE_INTERVAL_MILLIS);
+
+ metricReceiverWrapper = metricReceiver;
+
+ setCorePattern(docker);
+ initializeNodeAgentSecretAgent(docker);
+ }
+
+ @Override
+ public NodeAdminStateUpdater getNodeAdminStateUpdater() {
+ return nodeAdminStateUpdater;
+ }
+
+ @Override
+ public MetricReceiverWrapper getMetricReceiverWrapper() {
+ return metricReceiverWrapper;
+ }
+
+
+ private void setCorePattern(Docker docker) {
+ final String[] sysctlCorePattern = {"sysctl", "-w", "kernel.core_pattern=" +
+ getDefaults().underVespaHome("var/crash/%e.core.%p")};
+ docker.executeInContainerAsRoot(NODE_ADMIN_CONTAINER_NAME, sysctlCorePattern);
+ }
+
+ private void initializeNodeAgentSecretAgent(Docker docker) {
+ final Path yamasAgentFolder = Paths.get("/etc/yamas-agent/");
+ docker.executeInContainerAsRoot(NODE_ADMIN_CONTAINER_NAME, "chmod", "a+w", yamasAgentFolder.toString());
+
+ Path nodeAdminCheckPath = Paths.get("/usr/bin/curl");
+ SecretAgentScheduleMaker scheduleMaker = new SecretAgentScheduleMaker("node-admin", 60, nodeAdminCheckPath,
+ "localhost:4080/rest/metrics");
+
+ try {
+ scheduleMaker.writeTo(yamasAgentFolder);
+ docker.executeInContainerAsRoot(NODE_ADMIN_CONTAINER_NAME, "service", "yamas-agent", "restart");
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to write secret-agent schedules for node-admin", e);
+ }
+ }
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminProvider.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminProvider.java
deleted file mode 100644
index 5536ee1551b..00000000000
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminProvider.java
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.node.admin.provider;
-
-import com.google.inject.Inject;
-import com.yahoo.concurrent.lock.Lock;
-import com.yahoo.concurrent.lock.Locking;
-import com.yahoo.container.di.componentgraph.Provider;
-import com.yahoo.log.LogLevel;
-import com.yahoo.net.HostName;
-
-import com.yahoo.system.ProcessExecuter;
-import com.yahoo.vespa.hosted.dockerapi.Docker;
-import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
-import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations;
-import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer;
-import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer;
-import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdmin;
-import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl;
-import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater;
-import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent;
-import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl;
-import com.yahoo.vespa.hosted.node.admin.docker.DockerOperationsImpl;
-import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepository;
-import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepositoryImpl;
-import com.yahoo.vespa.hosted.node.admin.orchestrator.Orchestrator;
-import com.yahoo.vespa.hosted.node.admin.orchestrator.OrchestratorImpl;
-import com.yahoo.vespa.hosted.node.admin.util.ConfigServerHttpRequestExecutor;
-import com.yahoo.vespa.hosted.node.admin.util.Environment;
-
-import java.time.Clock;
-import java.time.Duration;
-import java.util.function.Function;
-import java.util.logging.Logger;
-
-import static com.yahoo.vespa.defaults.Defaults.getDefaults;
-
-/**
- * Set up node admin for production.
- *
- * @author dybis
- */
-public class NodeAdminProvider implements Provider<NodeAdminStateUpdater> {
- private static final int WEB_SERVICE_PORT = getDefaults().vespaWebServicePort();
- private static final Duration NODE_AGENT_SCAN_INTERVAL = Duration.ofSeconds(30);
- private static final Duration NODE_ADMIN_CONVERGE_STATE_INTERVAL = Duration.ofSeconds(30);
-
- private final Logger log = Logger.getLogger(NodeAdminProvider.class.getName());
- private final NodeAdminStateUpdater nodeAdminStateUpdater;
- private final Lock classLock;
-
- @Inject
- public NodeAdminProvider(Docker docker, MetricReceiverWrapper metricReceiver, Locking locking) {
- log.log(LogLevel.INFO, objectToString() + ": Creating object, acquiring lock...");
- classLock = locking.lock(this.getClass());
- try {
- log.log(LogLevel.INFO, objectToString() + ": Lock acquired");
-
- Clock clock = Clock.systemUTC();
- String dockerHostHostName = HostName.getLocalhost();
- ProcessExecuter processExecuter = new ProcessExecuter();
- Environment environment = new Environment();
-
- ConfigServerHttpRequestExecutor requestExecutor = ConfigServerHttpRequestExecutor.create(environment.getConfigServerHosts());
- NodeRepository nodeRepository = new NodeRepositoryImpl(requestExecutor, WEB_SERVICE_PORT);
- Orchestrator orchestrator = new OrchestratorImpl(requestExecutor, WEB_SERVICE_PORT);
- DockerOperations dockerOperations = new DockerOperationsImpl(docker, environment, processExecuter);
-
- StorageMaintainer storageMaintainer = new StorageMaintainer(docker, processExecuter, metricReceiver, environment, clock);
- AclMaintainer aclMaintainer = new AclMaintainer(dockerOperations, nodeRepository, dockerHostHostName);
-
- Function<String, NodeAgent> nodeAgentFactory =
- (hostName) -> new NodeAgentImpl(hostName, nodeRepository, orchestrator, dockerOperations,
- storageMaintainer, aclMaintainer, environment, clock, NODE_AGENT_SCAN_INTERVAL);
- NodeAdmin nodeAdmin = new NodeAdminImpl(dockerOperations, nodeAgentFactory, storageMaintainer, aclMaintainer,
- metricReceiver, clock);
-
- nodeAdminStateUpdater = new NodeAdminStateUpdater(nodeRepository, orchestrator, storageMaintainer, nodeAdmin,
- dockerHostHostName, clock, NODE_ADMIN_CONVERGE_STATE_INTERVAL);
- nodeAdminStateUpdater.start();
- } catch (Exception e) {
- classLock.close();
- throw e;
- }
- }
-
- @Override
- public NodeAdminStateUpdater get() {
- return nodeAdminStateUpdater;
- }
-
- @Override
- public void deconstruct() {
- log.log(LogLevel.INFO, objectToString() + ": Stop called");
-
- nodeAdminStateUpdater.stop();
- log.log(LogLevel.INFO, objectToString() + ": Stop complete");
-
- classLock.close();
- log.log(LogLevel.INFO, objectToString() + ": Lock released");
- }
-
- private String objectToString() {
- return this.getClass().getSimpleName() + "@" + Integer.toString(System.identityHashCode(this));
- }
-}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java
index adfb937b8d7..ff6ac9ce1e7 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java
@@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.node.admin.restapi;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.yahoo.container.di.componentgraph.Provider;
import com.yahoo.container.jdisc.HttpRequest;
import com.yahoo.container.jdisc.HttpResponse;
import com.yahoo.container.jdisc.LoggingRequestHandler;
@@ -11,8 +10,8 @@ import com.yahoo.container.logging.AccessLog;
import com.yahoo.vespa.hosted.dockerapi.metrics.DimensionMetrics;
import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater;
+import com.yahoo.vespa.hosted.node.admin.provider.ComponentsProvider;
-import javax.inject.Inject;
import javax.ws.rs.core.MediaType;
import java.io.IOException;
import java.io.OutputStream;
@@ -37,13 +36,10 @@ public class RestApiHandler extends LoggingRequestHandler{
private final NodeAdminStateUpdater refresher;
private final MetricReceiverWrapper metricReceiverWrapper;
- @Inject
- public RestApiHandler(Executor executor, AccessLog accessLog,
- Provider<NodeAdminStateUpdater> componentsProvider,
- MetricReceiverWrapper metricReceiverWrapper) {
+ public RestApiHandler(Executor executor, AccessLog accessLog, ComponentsProvider componentsProvider) {
super(executor, accessLog);
- this.refresher = componentsProvider.get();
- this.metricReceiverWrapper = metricReceiverWrapper;
+ this.refresher = componentsProvider.getNodeAdminStateUpdater();
+ this.metricReceiverWrapper = componentsProvider.getMetricReceiverWrapper();
}
@Override
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/ConfigServerHttpRequestExecutor.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/ConfigServerHttpRequestExecutor.java
index 9c8dc198388..4434213989f 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/ConfigServerHttpRequestExecutor.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/ConfigServerHttpRequestExecutor.java
@@ -52,10 +52,6 @@ public class ConfigServerHttpRequestExecutor {
}
public static ConfigServerHttpRequestExecutor create(Set<String> configServerHosts) {
- if (configServerHosts.isEmpty()) {
- throw new IllegalStateException("Environment setting for config servers missing or empty.");
- }
-
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
// Increase max total connections to 200, which should be enough
cm.setMaxTotal(200);