diff options
author | valerijf <valerijf@yahoo-inc.com> | 2016-07-05 13:36:00 +0200 |
---|---|---|
committer | valerijf <valerijf@yahoo-inc.com> | 2016-07-05 13:36:00 +0200 |
commit | be6f11951d373cc86e33b341a7559114206d8ef6 (patch) | |
tree | 83b01cedd90978292165f3e51507e3bb01a37167 /node-admin | |
parent | 983dd5714698d929a14a124cc1de3e4039a7ef0c (diff) |
Improved the /rest/info page by showing more variables and keeping a short history for each node agent
Diffstat (limited to 'node-admin')
6 files changed, 86 insertions, 25 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java index fb65f03a57b..6fbabfc1ad7 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java @@ -5,6 +5,7 @@ import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec; import java.util.List; +import java.util.Map; import java.util.Set; /** @@ -38,7 +39,7 @@ public interface NodeAdmin { /** * Return the state as a human readable string. Do not try to parse output or use in tests. */ - String debugInfo(); + Map debugInfo(); /** * Stop the NodeAgent. Will not delete the storage or stop the container. diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index ad60c74dd1b..a274131ed30 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -11,9 +11,11 @@ import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent; import java.io.IOException; import java.time.Duration; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -86,13 +88,15 @@ public class NodeAdminImpl implements NodeAdmin { } @Override - public String debugInfo() { - StringBuilder debug = new StringBuilder(); + public Map debugInfo() { + Map debug = new LinkedHashMap(); + List<Map> nodeAgentDebugs = new ArrayList<>(); + for (Map.Entry<HostName, NodeAgent> node : nodeAgents.entrySet()) { - debug.append("Node ").append(node.getKey().toString()); - debug.append(" state ").append(node.getValue().debugInfo()); + nodeAgentDebugs.add(node.getValue().debugInfo()); } - return debug.toString(); + debug.put("NodeAgents", nodeAgentDebugs); + return debug; } @Override diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java index df4bd558922..c1567b909b5 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java @@ -7,7 +7,9 @@ import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepository; import com.yahoo.vespa.hosted.node.admin.orchestrator.Orchestrator; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; @@ -52,14 +54,14 @@ public class NodeAdminStateUpdater extends AbstractComponent { this.baseHostName = baseHostName; } - public String getDebugPage() { - StringBuilder info = new StringBuilder(); + public Map getDebugPage() { + Map debug = new LinkedHashMap(); synchronized (monitor) { - info.append("isRunningUpdates is " + isRunningUpdates+ ". "); - info.append("NodeAdmin: "); - info.append(nodeAdmin.debugInfo()); + debug.put("isRunningUpdates", isRunningUpdates); + debug.put("baseHostName", baseHostName); + debug.put("NodeAdmin", nodeAdmin.debugInfo()); } - return info.toString(); + return debug; } public enum State { RESUMED, SUSPENDED} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java index c030cfbe058..b721a49dbf7 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java @@ -1,6 +1,8 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.nodeagent; +import java.util.Map; + /** * Responsible for management of a single node over its lifecycle. * May own its own resources, threads etc. Runs independently, but receives signals @@ -33,7 +35,7 @@ public interface NodeAgent { /** * Human readable string for the state of the NodeAgent. */ - String debugInfo(); + Map debugInfo(); /** * Starts the agent. After this method is called, the agent will asynchronously maintain the node, continuously diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 9ec9d455cc4..60f7357ab6a 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -9,8 +9,12 @@ import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepository; import com.yahoo.vespa.hosted.node.admin.orchestrator.Orchestrator; import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; import java.util.logging.Level; import java.util.logging.Logger; @@ -43,7 +47,8 @@ public class NodeAgentImpl implements NodeAgent { private final Object monitor = new Object(); - private AtomicReference<String> debugString = new AtomicReference<>("not started"); + private final LinkedList<String> debugMessages = new LinkedList<>(); + private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private long delaysBetweenEachTickMillis; @@ -58,6 +63,7 @@ public class NodeAgentImpl implements NodeAgent { // The attributes of the last successful noderepo attribute update for this node. Used to avoid redundant calls. private NodeAttributes lastAttributesSet = null; + private ContainerNodeSpec lastNodeSpec = null; public NodeAgentImpl( final HostName hostName, @@ -73,12 +79,18 @@ public class NodeAgentImpl implements NodeAgent { @Override public void freeze() { + if (!wantFrozen.get()) { + addDebugMessage("Freezing"); + } wantFrozen.set(true); signalWorkToBeDone(); } @Override public void unfreeze() { + if (wantFrozen.get()) { + addDebugMessage("Unfreezing"); + } wantFrozen.set(false); signalWorkToBeDone(); } @@ -88,13 +100,33 @@ public class NodeAgentImpl implements NodeAgent { return isFrozen.get(); } + private void addDebugMessage(String message) { + synchronized (monitor) { + while (debugMessages.size() > 100) { + debugMessages.pop(); + } + + debugMessages.add("[" + sdf.format(new Date()) + "] " + message); + } + } + @Override - public String debugInfo() { - return debugString.get(); + public Map debugInfo() { + Map debug = new LinkedHashMap(); + debug.put("Hostname", hostname); + debug.put("isFrozen", isFrozen()); + debug.put("wantFrozen", wantFrozen.get()); + debug.put("terminated", terminated.get()); + debug.put("workToDoNow", workToDoNow.get()); + synchronized (monitor) { + debug.put("History", new LinkedList<>(debugMessages)); + } + return debug; } @Override public void start(int intervalMillis) { + addDebugMessage("Starting with interval " + intervalMillis + "ms"); delaysBetweenEachTickMillis = intervalMillis; if (loopThread != null) { throw new RuntimeException("Can not restart a node agent."); @@ -106,6 +138,7 @@ public class NodeAgentImpl implements NodeAgent { @Override public void stop() { + addDebugMessage("Stopping"); if (!terminated.compareAndSet(false, true)) { throw new RuntimeException("Can not re-stop a node agent."); } @@ -124,6 +157,7 @@ public class NodeAgentImpl implements NodeAgent { if (containerState != RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN) { return; } + addDebugMessage("Starting optional node program resume command"); logger.log(Level.INFO, logPrefix + "Starting optional node program resume command"); dockerOperations.executeResume(nodeSpec.containerName);//, RESUME_NODE_COMMAND); containerState = RUNNING; @@ -139,6 +173,8 @@ public class NodeAgentImpl implements NodeAgent { if (!currentAttributes.equals(lastAttributesSet)) { logger.log(Level.INFO, logPrefix + "Publishing new set of attributes to node repo: " + lastAttributesSet + " -> " + currentAttributes); + addDebugMessage("Publishing new set of attributes to node repo: {" + + lastAttributesSet + "} -> {" + currentAttributes + "}"); nodeRepository.updateNodeAttributes( nodeSpec.hostname, currentAttributes.restartGeneration, @@ -152,10 +188,14 @@ public class NodeAgentImpl implements NodeAgent { private void startContainerIfNeeded(final ContainerNodeSpec nodeSpec) { if (dockerOperations.startContainerIfNeeded(nodeSpec)) { + addDebugMessage("startContainerIfNeeded: containerState " + containerState + " -> " + + RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN); containerState = RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN; } else { // In case container was already running on startup, we found the container, but should call if (containerState == ABSENT) { + addDebugMessage("startContainerIfNeeded: was already running, containerState set to " + + RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN); containerState = RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN; } } @@ -163,6 +203,7 @@ public class NodeAgentImpl implements NodeAgent { private void removeContainerIfNeededUpdateContainerState(ContainerNodeSpec nodeSpec) throws Exception { if (dockerOperations.removeContainerIfNeeded(nodeSpec, hostname, orchestrator)) { + addDebugMessage("removeContainerIfNeededUpdateContainerState: containerState " + containerState + " -> ABSENT"); containerState = ABSENT; } } @@ -183,6 +224,9 @@ public class NodeAgentImpl implements NodeAgent { @Override public void signalWorkToBeDone() { + if (!workToDoNow.get()) { + addDebugMessage("Signaling work to be done"); + } workToDoNow.set(true); synchronized (monitor) { monitor.notifyAll(); @@ -204,13 +248,13 @@ public class NodeAgentImpl implements NodeAgent { workToDoNow.set(false); isFrozen.set(wantFrozen.get()); if (isFrozen.get()) { - debugString.set(hostname + " frozen"); + addDebugMessage("loop: isFrozen"); } else { try { tick(); } catch (Exception e) { logger.log(LogLevel.ERROR, logPrefix + "Unhandled exception, ignoring.", e); - debugString.set(hostname + " " + e.getMessage()); + addDebugMessage(e.getMessage()); } catch (Throwable t) { logger.log(LogLevel.ERROR, logPrefix + "Unhandled throwable, taking down system.", t); System.exit(234); @@ -221,11 +265,15 @@ public class NodeAgentImpl implements NodeAgent { // For testing public void tick() throws Exception { - StringBuilder debugStringBuilder = new StringBuilder(hostname.toString()); final ContainerNodeSpec nodeSpec = nodeRepository.getContainerNodeSpec(hostname) .orElseThrow(() -> new IllegalStateException(String.format("Node '%s' missing from node repository.", hostname))); - debugStringBuilder.append("Loaded node spec: ").append(nodeSpec.toString()); + + if (!nodeSpec.equals(lastNodeSpec)) { + addDebugMessage("Loading new node spec: " + nodeSpec.toString()); + lastNodeSpec = nodeSpec; + } + switch (nodeSpec.nodeState) { case PROVISIONED: removeContainerIfNeededUpdateContainerState(nodeSpec); @@ -242,8 +290,7 @@ public class NodeAgentImpl implements NodeAgent { case ACTIVE: scheduleDownLoadIfNeeded(nodeSpec); if (imageBeingDownloaded != null) { - debugStringBuilder.append("Waiting for image to download " + imageBeingDownloaded.asString()); - debugString.set(debugStringBuilder.toString()); + addDebugMessage("Waiting for image to download " + imageBeingDownloaded.asString()); return; } removeContainerIfNeededUpdateContainerState(nodeSpec); @@ -278,6 +325,5 @@ public class NodeAgentImpl implements NodeAgent { default: throw new RuntimeException("UNKNOWN STATE " + nodeSpec.nodeState.name()); } - debugString.set(debugStringBuilder.toString()); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java index 261d6db5ff4..082f651731c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java @@ -53,7 +53,13 @@ public class RestApiHandler extends LoggingRequestHandler{ private HttpResponse handleGet(HttpRequest request) { String path = request.getUri().getPath(); if (path.endsWith("/info")) { - return new SimpleResponse(200, refresher.getDebugPage()); + HttpResponse response = new HttpResponse(200) { + @Override + public void render(OutputStream outputStream) throws IOException { + objectMapper.writeValue(outputStream, refresher.getDebugPage()); + } + }; + return response; } return new SimpleResponse(400, "unknown path" + path); } |