aboutsummaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2018-03-02 13:06:58 +0100
committerJon Bratseth <bratseth@oath.com>2018-03-02 13:06:58 +0100
commit80fdf13668620793ab8d888be01c6f15304ac3bc (patch)
tree0a76b59626d09ad9e37d0f39482c8849ece314ba /container-search
parente6972d509c3cb3b7f1d53fde9e99ab4bed688fde (diff)
Don't change VIP status when creating a new ClusterSearcher
This should fix the problem where a container cluster briefly goes offline when changing flavor of the cluster. This change leads to a new Clustersearcher instance being constructed. This instance will start issuing ping requests to all downstream nodes. If the first response is form a failing node - which may happen here because the newly added nodes (with new flavor) are not online yet, the cluster monitor will conclude that no nodes are up and take itself offline. This PR defers any decision-making about VIP status until we have status information from all nodes in the cluster.
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java29
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java6
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java21
3 files changed, 33 insertions, 23 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java
index 22890f781fe..4e708e32a2d 100644
--- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java
+++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java
@@ -37,6 +37,7 @@ public class ClusterMonitor implements Runnable, Freezable {
/** A map from Node to corresponding MonitoredNode */
private final Map<VespaBackEndSearcher, NodeMonitor> nodeMonitors = new java.util.IdentityHashMap<>();
+
private ScheduledFuture<?> future;
private boolean isFrozen = false;
@@ -96,21 +97,31 @@ public class ClusterMonitor implements Runnable, Freezable {
private void updateVipStatus() {
if ( ! vipStatus.isPresent()) return;
+ if ( ! hasInformationAboutAllNodes()) return;
- boolean hasWorkingNodesWithDocumentsOnline = false;
- for (NodeMonitor node : nodeMonitors.values()) {
- if (node.isWorking() && node.searchNodesOnline()) {
- hasWorkingNodesWithDocumentsOnline = true;
- break;
- }
- }
- if (hasWorkingNodesWithDocumentsOnline) {
+ if (hasWorkingNodesWithDocumentsOnline()) {
vipStatus.get().addToRotation(this);
} else {
vipStatus.get().removeFromRotation(this);
}
}
+ private boolean hasInformationAboutAllNodes() {
+ for (NodeMonitor monitor : nodeMonitors.values()) {
+ if ( ! monitor.statusIsKnown())
+ return false;
+ }
+ return true;
+ }
+
+ private boolean hasWorkingNodesWithDocumentsOnline() {
+ for (NodeMonitor node : nodeMonitors.values()) {
+ if (node.isWorking() && node.searchNodesOnline())
+ return true;
+ }
+ return false;
+ }
+
/**
* Ping all nodes which needs pinging to discover state changes
*/
@@ -130,7 +141,7 @@ public class ClusterMonitor implements Runnable, Freezable {
}
}
- public void shutdown() throws InterruptedException {
+ public void shutdown() {
if (future != null) {
future.cancel(true);
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
index ca1afbd4171..89f58bc944b 100644
--- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
@@ -591,11 +591,7 @@ public class ClusterSearcher extends Searcher {
@Override
public void deconstruct() {
- try {
- monitor.shutdown();
- } catch (final InterruptedException e) {
- Thread.currentThread().interrupt();
- }
+ monitor.shutdown();
}
ExecutorService getExecutor() {
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java
index b60aecc2e51..5ccaff0f198 100644
--- a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java
+++ b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java
@@ -52,7 +52,7 @@ public class NodeMonitor {
}
// Whether or not dispatch has ever responded successfully
- private boolean atStartUp = true;
+ private boolean statusIsKnown = false;
public VespaBackEndSearcher getNode() {
return node;
@@ -88,23 +88,26 @@ public class NodeMonitor {
this.searchNodesOnline = searchNodesOnline;
if (! isWorking)
setWorking(true, "Responds correctly");
- atStartUp = false;
+ statusIsKnown = true;
}
/** Changes the state of this node if required */
private void setWorking(boolean working, String explanation) {
if (isWorking == working) return; // Old news
- if (working && ! atStartUp)
- log.info("Putting " + node + " in service: " + explanation);
- else if (! atStartUp)
- log.info("Taking " + node + " out of service: " + explanation);
+ if (statusIsKnown) {
+ if (working)
+ log.info("Putting " + node + " in service: " + explanation);
+ else
+ log.info("Taking " + node + " out of service: " + explanation);
+ }
isWorking = working;
}
- boolean searchNodesOnline() {
- return searchNodesOnline;
- }
+ boolean searchNodesOnline() { return searchNodesOnline; }
+
+ /** Returns true if we have had enough time to determine the status of this node since creating the monitor */
+ boolean statusIsKnown() { return statusIsKnown; }
}