diff options
author | Jon Bratseth <jonbratseth@yahoo.com> | 2017-02-06 13:31:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-06 13:31:42 +0100 |
commit | 030f6a8b659ed0e5ca5b2a42b4ba9761dfae3121 (patch) | |
tree | c1c986e09b2b1794b12b49cec03f9e1d0e5dbeb2 /container-search | |
parent | f4ace23e3314e4dd3708044c38453c1ac15c2a5e (diff) | |
parent | d4c44d4a41d477182630ca0e8821c2e466adf06b (diff) |
Merge pull request #1686 from yahoo/arnej/assume-node-not-working
assume node down until proven otherwise
Diffstat (limited to 'container-search')
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java | 1 | ||||
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java | 27 |
2 files changed, 12 insertions, 16 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java index 871ecc37ea5..0312db914df 100644 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java @@ -67,6 +67,7 @@ public class ClusterMonitor implements Runnable, Freezable { if (isFrozen()) throw new IllegalStateException("Can not add new nodes after ClusterMonitor has been frozen."); nodeMonitors.put(node, new NodeMonitor(node)); + updateVipStatus(); } /** Called from ClusterSearcher/NodeManager when a node failed */ diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java index c06b7fe04ba..ab52bab08a0 100644 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java @@ -15,6 +15,7 @@ import com.yahoo.search.result.ErrorMessage; * <ul> * <li>A node is taken out of operation if it gives no response in 10 s</li> * <li>A node is put back in operation when it responds correctly again</li> + * <li>A node is initially considered not in operation until we have some data from it</li> * </ul> * * @author bratseth @@ -27,13 +28,13 @@ public class NodeMonitor { /** The object representing the monitored node */ private final VespaBackEndSearcher node; - private boolean isWorking = true; + private boolean isWorking = false; /** The last time this node responded successfully */ private long succeededAt = 0; /** Whether it is assumed the node has documents available to serve */ - private boolean searchNodesOnline = true; + private boolean searchNodesOnline = false; /** * Creates a new node monitor for a node @@ -66,7 +67,8 @@ public class NodeMonitor { long respondedAt = System.currentTimeMillis(); if (error.getCode() == BACKEND_COMMUNICATION_ERROR.code - || error.getCode() == NO_ANSWER_WHEN_PINGING_NODE.code) { + || error.getCode() == NO_ANSWER_WHEN_PINGING_NODE.code) + { // Only count not being able to talk to backend at all // as errors we care about if ((respondedAt - succeededAt) > 10000) { @@ -83,26 +85,19 @@ public class NodeMonitor { public void responded(boolean searchNodesOnline) { succeededAt = System.currentTimeMillis(); this.searchNodesOnline = searchNodesOnline; - atStartUp = false; - - if ( ! isWorking) + if (! isWorking) setWorking(true, "Responds correctly"); + atStartUp = false; } /** Changes the state of this node if required */ private void setWorking(boolean working, String explanation) { if (isWorking == working) return; // Old news - String explanationToLog; - if (explanation == null) - explanationToLog = ""; - else - explanationToLog = ": " + explanation; - - if (working) - log.info("Putting " + node + " in service" + explanationToLog); - else if ( ! atStartUp) - log.info("Taking " + node + " out of service" + explanationToLog); + if (working && ! atStartUp) + log.info("Putting " + node + " in service:" + explanation); + else if (! atStartUp) + log.info("Taking " + node + " out of service:" + explanation); isWorking = working; } |