summaryrefslogtreecommitdiffstats
path: root/node-admin/src
diff options
context:
space:
mode:
authorValerij Fredriksen <valerij92@gmail.com>2019-02-09 11:01:23 +0100
committerValerij Fredriksen <valerij92@gmail.com>2019-02-09 11:07:51 +0100
commit40dcf524be4ec1bda36631447cfb70d30e7f5654 (patch)
treec1a8389ad4b21958379e049230c7d73cc7de7005 /node-admin/src
parent7bf67793bac909c047f994e922f19d647a464fec (diff)
Do not run metricsscheduler when suspended
Diffstat (limited to 'node-admin/src')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java3
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java19
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java37
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java2
4 files changed, 44 insertions, 17 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java
index 37d79d97e74..456391c65c2 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java
@@ -18,6 +18,9 @@ public interface NodeAdmin {
*/
void refreshContainersToRun(final List<NodeSpec> containersToRun);
+ /** Gather node agent and its docker container metrics and forward them to the {@code MetricReceiverWrapper} */
+ void updateNodeAgentMetrics();
+
/**
* Attempts to freeze/unfreeze all NodeAgents and itself. To freeze a NodeAgent means that
* they will not pick up any changes from NodeRepository.
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
index 288003ade3c..2b37dcdf69c 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
@@ -42,8 +42,6 @@ public class NodeAdminImpl implements NodeAdmin {
private final ScheduledExecutorService aclScheduler =
Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("aclscheduler"));
- private final ScheduledExecutorService metricsScheduler =
- Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("metricsscheduler"));
private final NodeAgentWithSchedulerFactory nodeAgentWithSchedulerFactory;
private final NodeAgentContextFactory nodeAgentContextFactory;
@@ -121,13 +119,15 @@ public class NodeAdminImpl implements NodeAdmin {
}
}
- private void updateNodeAgentMetrics() {
+ @Override
+ public void updateNodeAgentMetrics() {
int numberContainersWaitingImage = 0;
int numberOfNewUnhandledExceptions = 0;
for (NodeAgentWithScheduler nodeAgentWithScheduler : nodeAgentWithSchedulerByHostname.values()) {
if (nodeAgentWithScheduler.isDownloadingImage()) numberContainersWaitingImage++;
numberOfNewUnhandledExceptions += nodeAgentWithScheduler.getAndResetNumberOfUnhandledExceptions();
+ nodeAgentWithScheduler.updateContainerNodeMetrics();
}
numberOfContainersInLoadImageState.sample(numberContainersWaitingImage);
@@ -186,15 +186,6 @@ public class NodeAdminImpl implements NodeAdmin {
@Override
public void start() {
- metricsScheduler.scheduleAtFixedRate(() -> {
- try {
- updateNodeAgentMetrics();
- nodeAgentWithSchedulerByHostname.values().forEach(NodeAgent::updateContainerNodeMetrics);
- } catch (Throwable e) {
- logger.warning("Metric fetcher scheduler failed", e);
- }
- }, 10, 55, TimeUnit.SECONDS);
-
aclMaintainer.ifPresent(maintainer -> {
int delay = 120; // WARNING: Reducing this will increase the load on config servers.
aclScheduler.scheduleWithFixedDelay(() -> {
@@ -205,7 +196,6 @@ public class NodeAdminImpl implements NodeAdmin {
@Override
public void stop() {
- metricsScheduler.shutdown();
aclScheduler.shutdown();
// Stop all node-agents in parallel, will block until the last NodeAgent is stopped
@@ -213,12 +203,11 @@ public class NodeAdminImpl implements NodeAdmin {
do {
try {
- metricsScheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
aclScheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
} catch (InterruptedException e) {
logger.info("Was interrupted while waiting for metricsScheduler and aclScheduler to shutdown");
}
- } while (!metricsScheduler.isTerminated() || !aclScheduler.isTerminated());
+ } while (!aclScheduler.isTerminated());
}
// Set-difference. Returns minuend minus subtrahend.
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
index 13d3f3307d2..18c3a836e41 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.nodeadmin;
+import com.yahoo.concurrent.ThreadFactoryFactory;
import com.yahoo.config.provision.HostName;
import com.yahoo.log.LogLevel;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
@@ -10,11 +11,17 @@ import com.yahoo.vespa.hosted.provision.Node;
import java.time.Duration;
import java.util.ArrayList;
+import java.util.EnumSet;
import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.RESUMED;
+import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.SUSPENDED;
import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN;
import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.TRANSITIONING;
@@ -27,6 +34,9 @@ public class NodeAdminStateUpdater {
private static final Logger log = Logger.getLogger(NodeAdminStateUpdater.class.getName());
private static final Duration FREEZE_CONVERGENCE_TIMEOUT = Duration.ofMinutes(5);
+ private final ScheduledExecutorService metricsScheduler =
+ Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("metricsscheduler"));
+
private final NodeRepository nodeRepository;
private final Orchestrator orchestrator;
private final NodeAdmin nodeAdmin;
@@ -34,7 +44,7 @@ public class NodeAdminStateUpdater {
public enum State { TRANSITIONING, RESUMED, SUSPENDED_NODE_ADMIN, SUSPENDED }
- private State currentState = SUSPENDED_NODE_ADMIN;
+ private volatile State currentState = SUSPENDED_NODE_ADMIN;
public NodeAdminStateUpdater(
NodeRepository nodeRepository,
@@ -49,6 +59,31 @@ public class NodeAdminStateUpdater {
public void start() {
nodeAdmin.start();
+
+ EnumSet<State> suspendedStates = EnumSet.of(SUSPENDED_NODE_ADMIN, SUSPENDED);
+ metricsScheduler.scheduleAtFixedRate(() -> {
+ try {
+ if (suspendedStates.contains(currentState)) return;
+ nodeAdmin.updateNodeAgentMetrics();
+ } catch (Throwable e) {
+ log.log(Level.WARNING, "Metric fetcher scheduler failed", e);
+ }
+ }, 10, 55, TimeUnit.SECONDS);
+ }
+
+ public void stop() {
+ metricsScheduler.shutdown();
+
+ // Stop all node-agents in parallel, will block until the last NodeAgent is stopped
+ nodeAdmin.stop();
+
+ do {
+ try {
+ metricsScheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
+ } catch (InterruptedException e) {
+ log.info("Was interrupted while waiting for metricsScheduler and shutdown");
+ }
+ } while (!metricsScheduler.isTerminated());
}
/**
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
index e475e9a53c2..0254f58e7eb 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
@@ -139,7 +139,7 @@ public class DockerTester implements AutoCloseable {
@Override
public void close() {
// First, stop NodeAdmin and all the NodeAgents
- nodeAdmin.stop();
+ nodeAdminStateUpdater.stop();
terminated = true;
do {