diff options
author | Arne H Juul <arnej@yahoo-inc.com> | 2017-02-03 15:07:02 +0100 |
---|---|---|
committer | Arne H Juul <arnej@yahoo-inc.com> | 2017-02-06 09:27:44 +0000 |
commit | d4c44d4a41d477182630ca0e8821c2e466adf06b (patch) | |
tree | 49b06f33c28041ab88bbb4eb9711389c97bd521b /container-search | |
parent | 5355427111df0ef2700b594022a8054a6f034d6f (diff) |
assume node down until proven otherwise
* change initializer values in NodeMonitor to assume
the node is down and has no search nodes online
until we see some data from it. Also, update vip
status as soon as we have some nodes attached.
This is to avoid the container believing it should be in
service at startup and then changing its mind when it
discovers that underlying services are down.
Diffstat (limited to 'container-search')
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java | 1 | ||||
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java | 27 |
2 files changed, 12 insertions, 16 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java index 871ecc37ea5..0312db914df 100644 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java @@ -67,6 +67,7 @@ public class ClusterMonitor implements Runnable, Freezable { if (isFrozen()) throw new IllegalStateException("Can not add new nodes after ClusterMonitor has been frozen."); nodeMonitors.put(node, new NodeMonitor(node)); + updateVipStatus(); } /** Called from ClusterSearcher/NodeManager when a node failed */ diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java index c06b7fe04ba..ab52bab08a0 100644 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java @@ -15,6 +15,7 @@ import com.yahoo.search.result.ErrorMessage; * <ul> * <li>A node is taken out of operation if it gives no response in 10 s</li> * <li>A node is put back in operation when it responds correctly again</li> + * <li>A node is initially considered not in operation until we have some data from it</li> * </ul> * * @author bratseth @@ -27,13 +28,13 @@ public class NodeMonitor { /** The object representing the monitored node */ private final VespaBackEndSearcher node; - private boolean isWorking = true; + private boolean isWorking = false; /** The last time this node responded successfully */ private long succeededAt = 0; /** Whether it is assumed the node has documents available to serve */ - private boolean searchNodesOnline = true; + private boolean searchNodesOnline = false; /** * Creates a new node monitor for a node @@ -66,7 +67,8 @@ public class NodeMonitor { long respondedAt = System.currentTimeMillis(); if (error.getCode() == BACKEND_COMMUNICATION_ERROR.code - || error.getCode() == NO_ANSWER_WHEN_PINGING_NODE.code) { + || error.getCode() == NO_ANSWER_WHEN_PINGING_NODE.code) + { // Only count not being able to talk to backend at all // as errors we care about if ((respondedAt - succeededAt) > 10000) { @@ -83,26 +85,19 @@ public class NodeMonitor { public void responded(boolean searchNodesOnline) { succeededAt = System.currentTimeMillis(); this.searchNodesOnline = searchNodesOnline; - atStartUp = false; - - if ( ! isWorking) + if (! isWorking) setWorking(true, "Responds correctly"); + atStartUp = false; } /** Changes the state of this node if required */ private void setWorking(boolean working, String explanation) { if (isWorking == working) return; // Old news - String explanationToLog; - if (explanation == null) - explanationToLog = ""; - else - explanationToLog = ": " + explanation; - - if (working) - log.info("Putting " + node + " in service" + explanationToLog); - else if ( ! atStartUp) - log.info("Taking " + node + " out of service" + explanationToLog); + if (working && ! atStartUp) + log.info("Putting " + node + " in service:" + explanation); + else if (! atStartUp) + log.info("Taking " + node + " out of service:" + explanation); isWorking = working; } |