diff options
author | Jon Bratseth <bratseth@oath.com> | 2018-03-02 13:06:58 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@oath.com> | 2018-03-02 13:06:58 +0100 |
commit | 80fdf13668620793ab8d888be01c6f15304ac3bc (patch) | |
tree | 0a76b59626d09ad9e37d0f39482c8849ece314ba /container-search | |
parent | e6972d509c3cb3b7f1d53fde9e99ab4bed688fde (diff) |
Don't change VIP status when creating a new ClusterSearcher
This should fix the problem where a container cluster briefly goes offline
when changing flavor of the cluster.
This change leads to a new Clustersearcher instance being constructed.
This instance will start issuing ping requests to all downstream nodes.
If the first response is form a failing node - which may happen here
because the newly added nodes (with new flavor) are not online yet,
the cluster monitor will conclude that no nodes are up and take itself
offline.
This PR defers any decision-making about VIP status until
we have status information from all nodes in the cluster.
Diffstat (limited to 'container-search')
3 files changed, 33 insertions, 23 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java index 22890f781fe..4e708e32a2d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java @@ -37,6 +37,7 @@ public class ClusterMonitor implements Runnable, Freezable { /** A map from Node to corresponding MonitoredNode */ private final Map<VespaBackEndSearcher, NodeMonitor> nodeMonitors = new java.util.IdentityHashMap<>(); + private ScheduledFuture<?> future; private boolean isFrozen = false; @@ -96,21 +97,31 @@ public class ClusterMonitor implements Runnable, Freezable { private void updateVipStatus() { if ( ! vipStatus.isPresent()) return; + if ( ! hasInformationAboutAllNodes()) return; - boolean hasWorkingNodesWithDocumentsOnline = false; - for (NodeMonitor node : nodeMonitors.values()) { - if (node.isWorking() && node.searchNodesOnline()) { - hasWorkingNodesWithDocumentsOnline = true; - break; - } - } - if (hasWorkingNodesWithDocumentsOnline) { + if (hasWorkingNodesWithDocumentsOnline()) { vipStatus.get().addToRotation(this); } else { vipStatus.get().removeFromRotation(this); } } + private boolean hasInformationAboutAllNodes() { + for (NodeMonitor monitor : nodeMonitors.values()) { + if ( ! monitor.statusIsKnown()) + return false; + } + return true; + } + + private boolean hasWorkingNodesWithDocumentsOnline() { + for (NodeMonitor node : nodeMonitors.values()) { + if (node.isWorking() && node.searchNodesOnline()) + return true; + } + return false; + } + /** * Ping all nodes which needs pinging to discover state changes */ @@ -130,7 +141,7 @@ public class ClusterMonitor implements Runnable, Freezable { } } - public void shutdown() throws InterruptedException { + public void shutdown() { if (future != null) { future.cancel(true); } diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java index ca1afbd4171..89f58bc944b 100644 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java @@ -591,11 +591,7 @@ public class ClusterSearcher extends Searcher { @Override public void deconstruct() { - try { - monitor.shutdown(); - } catch (final InterruptedException e) { - Thread.currentThread().interrupt(); - } + monitor.shutdown(); } ExecutorService getExecutor() { diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java index b60aecc2e51..5ccaff0f198 100644 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java @@ -52,7 +52,7 @@ public class NodeMonitor { } // Whether or not dispatch has ever responded successfully - private boolean atStartUp = true; + private boolean statusIsKnown = false; public VespaBackEndSearcher getNode() { return node; @@ -88,23 +88,26 @@ public class NodeMonitor { this.searchNodesOnline = searchNodesOnline; if (! isWorking) setWorking(true, "Responds correctly"); - atStartUp = false; + statusIsKnown = true; } /** Changes the state of this node if required */ private void setWorking(boolean working, String explanation) { if (isWorking == working) return; // Old news - if (working && ! atStartUp) - log.info("Putting " + node + " in service: " + explanation); - else if (! atStartUp) - log.info("Taking " + node + " out of service: " + explanation); + if (statusIsKnown) { + if (working) + log.info("Putting " + node + " in service: " + explanation); + else + log.info("Taking " + node + " out of service: " + explanation); + } isWorking = working; } - boolean searchNodesOnline() { - return searchNodesOnline; - } + boolean searchNodesOnline() { return searchNodesOnline; } + + /** Returns true if we have had enough time to determine the status of this node since creating the monitor */ + boolean statusIsKnown() { return statusIsKnown; } } |