diff options
author | Haakon Dybdahl <dybis@users.noreply.github.com> | 2016-07-05 15:19:55 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-07-05 15:19:55 +0200 |
commit | 2baf746a77e66b95afbfa4db8c223a3c2e9060cb (patch) | |
tree | d4e130fd981a45bc8e54c156f14c4efc3e26ff3a | |
parent | 9465d4a498d7ee32eee2f1dda8e9fdebdf43815f (diff) | |
parent | 956ed615fc1871f1099a081d70e5978cfae21fc5 (diff) |
Merge pull request #309 from yahoo/freva/improve-rest/info-debug-page
Improved the /rest/info page by showing more variables and keeping a …
7 files changed, 91 insertions, 27 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java index fb65f03a57b..aed8fddecb8 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java @@ -5,6 +5,7 @@ import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec; import java.util.List; +import java.util.Map; import java.util.Set; /** @@ -36,9 +37,10 @@ public interface NodeAdmin { Set<HostName> getListOfHosts(); /** - * Return the state as a human readable string. Do not try to parse output or use in tests. + * Returns a map containing all relevant NodeAdmin variables and their current values. + * Do not try to parse output or use in tests. */ - String debugInfo(); + Map<String, Object> debugInfo(); /** * Stop the NodeAgent. Will not delete the storage or stop the container. diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index ad60c74dd1b..cde906dce45 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -11,9 +11,11 @@ import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent; import java.io.IOException; import java.time.Duration; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -86,13 +88,15 @@ public class NodeAdminImpl implements NodeAdmin { } @Override - public String debugInfo() { - StringBuilder debug = new StringBuilder(); + public Map<String, Object> debugInfo() { + Map<String, Object> debug = new LinkedHashMap<>(); + List<Map<String, Object>> nodeAgentDebugs = new ArrayList<>(); + for (Map.Entry<HostName, NodeAgent> node : nodeAgents.entrySet()) { - debug.append("Node ").append(node.getKey().toString()); - debug.append(" state ").append(node.getValue().debugInfo()); + nodeAgentDebugs.add(node.getValue().debugInfo()); } - return debug.toString(); + debug.put("NodeAgents", nodeAgentDebugs); + return debug; } @Override diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java index df4bd558922..aad0c97fdd7 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java @@ -7,7 +7,9 @@ import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepository; import com.yahoo.vespa.hosted.node.admin.orchestrator.Orchestrator; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; @@ -52,14 +54,14 @@ public class NodeAdminStateUpdater extends AbstractComponent { this.baseHostName = baseHostName; } - public String getDebugPage() { - StringBuilder info = new StringBuilder(); + public Map<String, Object> getDebugPage() { + Map<String, Object> debug = new LinkedHashMap<>(); synchronized (monitor) { - info.append("isRunningUpdates is " + isRunningUpdates+ ". "); - info.append("NodeAdmin: "); - info.append(nodeAdmin.debugInfo()); + debug.put("isRunningUpdates", isRunningUpdates); + debug.put("baseHostName", baseHostName); + debug.put("NodeAdmin", nodeAdmin.debugInfo()); } - return info.toString(); + return debug; } public enum State { RESUMED, SUSPENDED} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java index c030cfbe058..e16322af25c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java @@ -1,6 +1,8 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.nodeagent; +import java.util.Map; + /** * Responsible for management of a single node over its lifecycle. * May own its own resources, threads etc. Runs independently, but receives signals @@ -31,9 +33,9 @@ public interface NodeAgent { boolean isFrozen(); /** - * Human readable string for the state of the NodeAgent. + * Returns a map containing all relevant NodeAgent variables and their current values. */ - String debugInfo(); + Map<String, Object> debugInfo(); /** * Starts the agent. After this method is called, the agent will asynchronously maintain the node, continuously diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 2690a8f3fb8..249563146d3 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -9,10 +9,14 @@ import com.yahoo.vespa.hosted.node.admin.noderepository.NodeRepository; import com.yahoo.vespa.hosted.node.admin.orchestrator.Orchestrator; import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.Map; import java.time.Duration; import java.time.Instant; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; import java.util.logging.Level; import java.util.logging.Logger; @@ -45,7 +49,8 @@ public class NodeAgentImpl implements NodeAgent { private final Object monitor = new Object(); - private AtomicReference<String> debugString = new AtomicReference<>("not started"); + private final LinkedList<String> debugMessages = new LinkedList<>(); + private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private long delaysBetweenEachTickMillis; @@ -60,6 +65,7 @@ public class NodeAgentImpl implements NodeAgent { // The attributes of the last successful noderepo attribute update for this node. Used to avoid redundant calls. private NodeAttributes lastAttributesSet = null; + private ContainerNodeSpec lastNodeSpec = null; public NodeAgentImpl( final HostName hostName, @@ -75,12 +81,18 @@ public class NodeAgentImpl implements NodeAgent { @Override public void freeze() { + if (!wantFrozen.get()) { + addDebugMessage("Freezing"); + } wantFrozen.set(true); signalWorkToBeDone(); } @Override public void unfreeze() { + if (wantFrozen.get()) { + addDebugMessage("Unfreezing"); + } wantFrozen.set(false); signalWorkToBeDone(); } @@ -90,13 +102,33 @@ public class NodeAgentImpl implements NodeAgent { return isFrozen.get(); } + private void addDebugMessage(String message) { + synchronized (monitor) { + while (debugMessages.size() > 100) { + debugMessages.pop(); + } + + debugMessages.add("[" + sdf.format(new Date()) + "] " + message); + } + } + @Override - public String debugInfo() { - return debugString.get(); + public Map<String, Object> debugInfo() { + Map<String, Object> debug = new LinkedHashMap<>(); + debug.put("Hostname", hostname); + debug.put("isFrozen", isFrozen()); + debug.put("wantFrozen", wantFrozen.get()); + debug.put("terminated", terminated.get()); + debug.put("workToDoNow", workToDoNow); + synchronized (monitor) { + debug.put("History", new LinkedList<>(debugMessages)); + } + return debug; } @Override public void start(int intervalMillis) { + addDebugMessage("Starting with interval " + intervalMillis + "ms"); delaysBetweenEachTickMillis = intervalMillis; if (loopThread != null) { throw new RuntimeException("Can not restart a node agent."); @@ -108,6 +140,7 @@ public class NodeAgentImpl implements NodeAgent { @Override public void stop() { + addDebugMessage("Stopping"); if (!terminated.compareAndSet(false, true)) { throw new RuntimeException("Can not re-stop a node agent."); } @@ -126,6 +159,7 @@ public class NodeAgentImpl implements NodeAgent { if (containerState != RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN) { return; } + addDebugMessage("Starting optional node program resume command"); logger.log(Level.INFO, logPrefix + "Starting optional node program resume command"); dockerOperations.executeResume(nodeSpec.containerName);//, RESUME_NODE_COMMAND); containerState = RUNNING; @@ -141,6 +175,8 @@ public class NodeAgentImpl implements NodeAgent { if (!currentAttributes.equals(lastAttributesSet)) { logger.log(Level.INFO, logPrefix + "Publishing new set of attributes to node repo: " + lastAttributesSet + " -> " + currentAttributes); + addDebugMessage("Publishing new set of attributes to node repo: {" + + lastAttributesSet + "} -> {" + currentAttributes + "}"); nodeRepository.updateNodeAttributes( nodeSpec.hostname, currentAttributes.restartGeneration, @@ -154,10 +190,14 @@ public class NodeAgentImpl implements NodeAgent { private void startContainerIfNeeded(final ContainerNodeSpec nodeSpec) { if (dockerOperations.startContainerIfNeeded(nodeSpec)) { + addDebugMessage("startContainerIfNeeded: containerState " + containerState + " -> " + + RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN); containerState = RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN; } else { // In case container was already running on startup, we found the container, but should call if (containerState == ABSENT) { + addDebugMessage("startContainerIfNeeded: was already running, containerState set to " + + RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN); containerState = RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN; } } @@ -165,6 +205,7 @@ public class NodeAgentImpl implements NodeAgent { private void removeContainerIfNeededUpdateContainerState(ContainerNodeSpec nodeSpec) throws Exception { if (dockerOperations.removeContainerIfNeeded(nodeSpec, hostname, orchestrator)) { + addDebugMessage("removeContainerIfNeededUpdateContainerState: containerState " + containerState + " -> ABSENT"); containerState = ABSENT; } } @@ -185,6 +226,10 @@ public class NodeAgentImpl implements NodeAgent { @Override public void signalWorkToBeDone() { + if (!workToDoNow) { + addDebugMessage("Signaling work to be done"); + } + synchronized (monitor) { workToDoNow = true; monitor.notifyAll(); @@ -209,13 +254,13 @@ public class NodeAgentImpl implements NodeAgent { } isFrozen.set(wantFrozen.get()); if (isFrozen.get()) { - debugString.set(hostname + " frozen"); + addDebugMessage("loop: isFrozen"); } else { try { tick(); } catch (Exception e) { logger.log(LogLevel.ERROR, logPrefix + "Unhandled exception, ignoring.", e); - debugString.set(hostname + " " + e.getMessage()); + addDebugMessage(e.getMessage()); } catch (Throwable t) { logger.log(LogLevel.ERROR, logPrefix + "Unhandled throwable, taking down system.", t); System.exit(234); @@ -226,11 +271,15 @@ public class NodeAgentImpl implements NodeAgent { // For testing public void tick() throws Exception { - StringBuilder debugStringBuilder = new StringBuilder(hostname.toString()); final ContainerNodeSpec nodeSpec = nodeRepository.getContainerNodeSpec(hostname) .orElseThrow(() -> new IllegalStateException(String.format("Node '%s' missing from node repository.", hostname))); - debugStringBuilder.append("Loaded node spec: ").append(nodeSpec.toString()); + + if (!nodeSpec.equals(lastNodeSpec)) { + addDebugMessage("Loading new node spec: " + nodeSpec.toString()); + lastNodeSpec = nodeSpec; + } + switch (nodeSpec.nodeState) { case PROVISIONED: removeContainerIfNeededUpdateContainerState(nodeSpec); @@ -247,8 +296,7 @@ public class NodeAgentImpl implements NodeAgent { case ACTIVE: scheduleDownLoadIfNeeded(nodeSpec); if (imageBeingDownloaded != null) { - debugStringBuilder.append("Waiting for image to download " + imageBeingDownloaded.asString()); - debugString.set(debugStringBuilder.toString()); + addDebugMessage("Waiting for image to download " + imageBeingDownloaded.asString()); return; } removeContainerIfNeededUpdateContainerState(nodeSpec); @@ -283,6 +331,5 @@ public class NodeAgentImpl implements NodeAgent { default: throw new RuntimeException("UNKNOWN STATE " + nodeSpec.nodeState.name()); } - debugString.set(debugStringBuilder.toString()); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java index 261d6db5ff4..2ce4151f497 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java @@ -53,7 +53,12 @@ public class RestApiHandler extends LoggingRequestHandler{ private HttpResponse handleGet(HttpRequest request) { String path = request.getUri().getPath(); if (path.endsWith("/info")) { - return new SimpleResponse(200, refresher.getDebugPage()); + return new HttpResponse(200) { + @Override + public void render(OutputStream outputStream) throws IOException { + objectMapper.writeValue(outputStream, refresher.getDebugPage()); + } + }; } return new SimpleResponse(400, "unknown path" + path); } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java index 51445afcea9..5cbf80fb3c5 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java @@ -114,6 +114,8 @@ public class RunInContainerTest { OrchestratorMock.setForceGroupSuspendResponse(Optional.of("Denied")); assertThat(doPutCall("suspend"), is(false)); assertThat(OrchestratorMock.getRequests(), is("Suspend with parent: localhost and hostnames: [] - Forced response: Optional[Denied]\n")); + + assertThat(doGetInfoCall(), is("{\"isRunningUpdates\":false,\"baseHostName\":\"localhost\",\"NodeAdmin\":{\"NodeAgents\":[]}}")); } |