summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorArne H Juul <arnej@yahoo-inc.com>2017-02-03 15:07:02 +0100
committerArne H Juul <arnej@yahoo-inc.com>2017-02-06 09:27:44 +0000
commitd4c44d4a41d477182630ca0e8821c2e466adf06b (patch)
tree49b06f33c28041ab88bbb4eb9711389c97bd521b /container-search
parent5355427111df0ef2700b594022a8054a6f034d6f (diff)
assume node down until proven otherwise
* change initializer values in NodeMonitor to assume the node is down and has no search nodes online until we see some data from it. Also, update vip status as soon as we have some nodes attached. This is to avoid the container believing it should be in service at startup and then changing its mind when it discovers that underlying services are down.
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java1
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java27
2 files changed, 12 insertions, 16 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java
index 871ecc37ea5..0312db914df 100644
--- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java
+++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java
@@ -67,6 +67,7 @@ public class ClusterMonitor implements Runnable, Freezable {
if (isFrozen())
throw new IllegalStateException("Can not add new nodes after ClusterMonitor has been frozen.");
nodeMonitors.put(node, new NodeMonitor(node));
+ updateVipStatus();
}
/** Called from ClusterSearcher/NodeManager when a node failed */
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java
index c06b7fe04ba..ab52bab08a0 100644
--- a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java
+++ b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java
@@ -15,6 +15,7 @@ import com.yahoo.search.result.ErrorMessage;
* <ul>
* <li>A node is taken out of operation if it gives no response in 10 s</li>
* <li>A node is put back in operation when it responds correctly again</li>
+ * <li>A node is initially considered not in operation until we have some data from it</li>
* </ul>
*
* @author bratseth
@@ -27,13 +28,13 @@ public class NodeMonitor {
/** The object representing the monitored node */
private final VespaBackEndSearcher node;
- private boolean isWorking = true;
+ private boolean isWorking = false;
/** The last time this node responded successfully */
private long succeededAt = 0;
/** Whether it is assumed the node has documents available to serve */
- private boolean searchNodesOnline = true;
+ private boolean searchNodesOnline = false;
/**
* Creates a new node monitor for a node
@@ -66,7 +67,8 @@ public class NodeMonitor {
long respondedAt = System.currentTimeMillis();
if (error.getCode() == BACKEND_COMMUNICATION_ERROR.code
- || error.getCode() == NO_ANSWER_WHEN_PINGING_NODE.code) {
+ || error.getCode() == NO_ANSWER_WHEN_PINGING_NODE.code)
+ {
// Only count not being able to talk to backend at all
// as errors we care about
if ((respondedAt - succeededAt) > 10000) {
@@ -83,26 +85,19 @@ public class NodeMonitor {
public void responded(boolean searchNodesOnline) {
succeededAt = System.currentTimeMillis();
this.searchNodesOnline = searchNodesOnline;
- atStartUp = false;
-
- if ( ! isWorking)
+ if (! isWorking)
setWorking(true, "Responds correctly");
+ atStartUp = false;
}
/** Changes the state of this node if required */
private void setWorking(boolean working, String explanation) {
if (isWorking == working) return; // Old news
- String explanationToLog;
- if (explanation == null)
- explanationToLog = "";
- else
- explanationToLog = ": " + explanation;
-
- if (working)
- log.info("Putting " + node + " in service" + explanationToLog);
- else if ( ! atStartUp)
- log.info("Taking " + node + " out of service" + explanationToLog);
+ if (working && ! atStartUp)
+ log.info("Putting " + node + " in service:" + explanation);
+ else if (! atStartUp)
+ log.info("Taking " + node + " out of service:" + explanation);
isWorking = working;
}