aboutsummaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2019-09-19 23:52:12 +0200
committerGitHub <noreply@github.com>2019-09-19 23:52:12 +0200
commitfcd8c24722ad701a37929b1362153aff327c7354 (patch)
tree49355601ca1c8f339e2a03ba27d5b13f3d0770ef /container-search
parent1cf473012facbf3c185a91ae9c9f80f90f5988c4 (diff)
parentee0100b9d505ab9238ea48d21be6326a26d90936 (diff)
Merge pull request #10739 from vespa-engine/bratseth/vip-logic-take-2
Bratseth/vip logic take 2
Diffstat (limited to 'container-search')
-rw-r--r--container-search/abi-spec.json1
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java16
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java23
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java10
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java5
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java13
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java13
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java9
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/PingFactory.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java134
-rw-r--r--container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java2
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java2
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java9
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java279
14 files changed, 417 insertions, 101 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index 47c27a52e0b..facf894272d 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -2080,6 +2080,7 @@
"public java.lang.Object getNode()",
"public void failed(com.yahoo.search.result.ErrorMessage)",
"public void responded()",
+ "public java.lang.Boolean isKnownWorking()",
"protected synchronized void setWorking(boolean, java.lang.String)"
],
"fields": []
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java
index 59674d25402..dd01d895963 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java
@@ -26,25 +26,25 @@ public abstract class BaseNodeMonitor<T> {
/** The object representing the monitored node */
protected T node;
- protected boolean isWorking=true;
+ protected boolean isWorking = true;
/** Whether this node is quarantined for unstability */
- protected boolean isQuarantined=false;
+ protected boolean isQuarantined = false;
/** The last time this node failed, in ms */
- protected long failedAt=0;
+ protected long failedAt = 0;
/** The last time this node responded (failed or succeeded), in ms */
- protected long respondedAt=0;
+ protected long respondedAt = 0;
/** The last time this node responded successfully */
- protected long succeededAt=0;
+ protected long succeededAt = 0;
/** The configuration of this monitor */
protected MonitorConfiguration configuration;
/** Is the node we monitor part of an internal Vespa cluster or not */
- private boolean internal=false;
+ private boolean internal;
public BaseNodeMonitor(boolean internal) {
this.internal=internal;
@@ -54,10 +54,12 @@ public abstract class BaseNodeMonitor<T> {
/**
* Returns whether this node is currently in a state suitable
- * for receiving traffic. As far as we know, that is
+ * for receiving traffic (default true)
*/
public boolean isWorking() { return isWorking; }
+ /** @deprecated Not used */
+ @Deprecated // TODO: Remove on Vespa 8
public boolean isQuarantined() { return isQuarantined; }
/**
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java
index ac0c8375f04..22c7f59872c 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java
@@ -22,9 +22,9 @@ import java.util.logging.Logger;
*/
public class ClusterMonitor<T> {
- private MonitorConfiguration configuration = new MonitorConfiguration();
+ private final MonitorConfiguration configuration = new MonitorConfiguration();
- private static Logger log=Logger.getLogger(ClusterMonitor.class.getName());
+ private static Logger log = Logger.getLogger(ClusterMonitor.class.getName());
private NodeManager<T> nodeManager;
@@ -33,7 +33,7 @@ public class ClusterMonitor<T> {
private volatile boolean shutdown = false;
/** A map from Node to corresponding MonitoredNode */
- private final Map<T, BaseNodeMonitor<T>> nodeMonitors = Collections.synchronizedMap(new java.util.LinkedHashMap<>());
+ private final Map<T, TrafficNodeMonitor<T>> nodeMonitors = Collections.synchronizedMap(new java.util.LinkedHashMap<>());
public ClusterMonitor(NodeManager<T> manager) {
nodeManager = manager;
@@ -56,8 +56,7 @@ public class ClusterMonitor<T> {
* @param internal whether or not this node is internal to this cluster
*/
public void add(T node, boolean internal) {
- BaseNodeMonitor<T> monitor = new TrafficNodeMonitor<>(node, configuration, internal);
- nodeMonitors.put(node, monitor);
+ nodeMonitors.put(node, new TrafficNodeMonitor<>(node, configuration, internal));
}
/**
@@ -69,22 +68,20 @@ public class ClusterMonitor<T> {
/** Called from ClusterSearcher/NodeManager when a node failed */
public synchronized void failed(T node, ErrorMessage error) {
- BaseNodeMonitor<T> monitor = nodeMonitors.get(node);
- boolean wasWorking = monitor.isWorking();
+ TrafficNodeMonitor<T> monitor = nodeMonitors.get(node);
+ Boolean wasWorking = monitor.isKnownWorking();
monitor.failed(error);
- if (wasWorking && !monitor.isWorking()) {
+ if (wasWorking != monitor.isKnownWorking())
nodeManager.failed(node);
- }
}
/** Called when a node responded */
public synchronized void responded(T node) {
- BaseNodeMonitor<T> monitor = nodeMonitors.get(node);
- boolean wasFailing =! monitor.isWorking();
+ TrafficNodeMonitor<T> monitor = nodeMonitors.get(node);
+ Boolean wasWorking = monitor.isKnownWorking();
monitor.responded();
- if (wasFailing && monitor.isWorking()) {
+ if (wasWorking != monitor.isKnownWorking())
nodeManager.working(monitor.getNode());
- }
}
/**
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java
index ea881ad8b48..830014bca46 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java
@@ -36,6 +36,7 @@ public class TrafficNodeMonitor<T> extends BaseNodeMonitor<T> {
@Override
public void failed(ErrorMessage error) {
respondedAt = now();
+ atStartUp = false;
if (error.getCode() == Error.BACKEND_COMMUNICATION_ERROR.code) {
setWorking(false, "Connection failure: " + error.toString());
@@ -60,9 +61,14 @@ public class TrafficNodeMonitor<T> extends BaseNodeMonitor<T> {
setWorking(true,"Responds correctly");
}
+ /**
+ * Returns whether this node is currently is a state suitable for receiving traffic, or null if not known
+ */
+ public Boolean isKnownWorking() { return atStartUp ? null : isWorking; }
+
/** Thread-safely changes the state of this node if required */
- protected synchronized void setWorking(boolean working,String explanation) {
- if (this.isWorking==working) return; // Old news
+ protected synchronized void setWorking(boolean working, String explanation) {
+ if (this.isWorking == working) return; // Old news
if (explanation==null) {
explanation="";
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
index 58f73ea52cc..af34fc3e106 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
@@ -139,7 +139,7 @@ public class Dispatcher extends AbstractComponent {
return invoker;
}
- // build invoker based on searchpath
+ /** Builds an invoker based on searchpath */
private Optional<SearchInvoker> getSearchPathInvoker(Query query, VespaBackEndSearcher searcher) {
String searchPath = query.getModel().getSearchPath();
if (searchPath == null) return Optional.empty();
@@ -156,7 +156,7 @@ public class Dispatcher extends AbstractComponent {
}
private Optional<SearchInvoker> getInternalInvoker(Query query, VespaBackEndSearcher searcher) {
- Optional<Node> directNode = searchCluster.directDispatchTarget();
+ Optional<Node> directNode = searchCluster.localCorpusDispatchTarget();
if (directNode.isPresent()) {
Node node = directNode.get();
query.trace(false, 2, "Dispatching directly to ", node);
@@ -202,4 +202,5 @@ public class Dispatcher extends AbstractComponent {
metric.add(INTERNAL_METRIC, 1, metricContext);
}
}
+
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
index 31af74a39b2..78c50254a84 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
@@ -48,21 +48,24 @@ public abstract class InvokerFactory {
* @return Optional containing the SearchInvoker or <i>empty</i> if some node in the
* list is invalid and the remaining coverage is not sufficient
*/
- public Optional<SearchInvoker> createSearchInvoker(VespaBackEndSearcher searcher, Query query, OptionalInt groupId, List<Node> nodes,
- boolean acceptIncompleteCoverage) {
+ public Optional<SearchInvoker> createSearchInvoker(VespaBackEndSearcher searcher,
+ Query query,
+ OptionalInt groupId,
+ List<Node> nodes,
+ boolean acceptIncompleteCoverage) {
List<SearchInvoker> invokers = new ArrayList<>(nodes.size());
Set<Integer> failed = null;
for (Node node : nodes) {
boolean nodeAdded = false;
- if (node.isWorking()) {
+ if (node.isWorking() != Boolean.FALSE) {
Optional<SearchInvoker> invoker = createNodeSearchInvoker(searcher, query, node);
- if(invoker.isPresent()) {
+ if (invoker.isPresent()) {
invokers.add(invoker.get());
nodeAdded = true;
}
}
- if (!nodeAdded) {
+ if ( ! nodeAdded) {
if (failed == null) {
failed = new HashSet<>();
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
index bafc72b9b43..0e4e87b9a6a 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
@@ -54,7 +54,7 @@ public class Group {
public int workingNodes() {
int nodesUp = 0;
for (Node node : nodes) {
- if (node.isWorking()) {
+ if (node.isWorking() == Boolean.TRUE) {
nodesUp++;
}
}
@@ -64,7 +64,7 @@ public class Group {
void aggregateActiveDocuments() {
long activeDocumentsInGroup = 0;
for (Node node : nodes) {
- if (node.isWorking()) {
+ if (node.isWorking() == Boolean.TRUE) {
activeDocumentsInGroup += node.getActiveDocuments();
}
}
@@ -77,6 +77,11 @@ public class Group {
return this.activeDocuments.get();
}
+ public boolean isFullCoverageStatusChanged(boolean hasFullCoverageNow) {
+ boolean previousState = hasFullCoverage.getAndSet(hasFullCoverageNow);
+ return previousState != hasFullCoverageNow;
+ }
+
@Override
public String toString() { return "search group " + id; }
@@ -90,8 +95,4 @@ public class Group {
return ((Group) other).id == this.id;
}
- public boolean isFullCoverageStatusChanged(boolean hasFullCoverageNow) {
- boolean previousState = hasFullCoverage.getAndSet(hasFullCoverageNow);
- return previousState != hasFullCoverageNow;
- }
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java
index a71ce0354f9..b47f2fefa5b 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java
@@ -19,6 +19,7 @@ public class Node {
private final int fs4port;
final int group;
+ private final AtomicBoolean statusIsKnown = new AtomicBoolean(false);
private final AtomicBoolean working = new AtomicBoolean(true);
private final AtomicLong activeDocuments = new AtomicLong(0);
@@ -46,11 +47,14 @@ public class Node {
public int group() { return group; }
public void setWorking(boolean working) {
+ this.statusIsKnown.lazySet(true);
this.working.lazySet(working);
}
- /** Returns whether this node is currently responding to requests */
- public boolean isWorking() { return working.get(); }
+ /** Returns whether this node is currently responding to requests, or null if status is not known */
+ public Boolean isWorking() {
+ return statusIsKnown.get() ? working.get() : null;
+ }
/** Updates the active documents on this node */
public void setActiveDocuments(long activeDocuments) {
@@ -77,4 +81,5 @@ public class Node {
@Override
public String toString() { return "search node " + hostname + ":" + fs4port + " in group " + group; }
+
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/PingFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/PingFactory.java
index c9f722ef79b..285a1fcd57e 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/PingFactory.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/PingFactory.java
@@ -6,5 +6,7 @@ import com.yahoo.search.cluster.ClusterMonitor;
import java.util.concurrent.Callable;
public interface PingFactory {
+
Callable<Pong> createPinger(Node node, ClusterMonitor<Node> monitor);
+
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index c091e211aca..c17212b2481 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -55,7 +55,7 @@ public class SearchCluster implements NodeManager<Node> {
* if it only queries this cluster when the local node cannot be used, to avoid unnecessary
* cross-node network traffic.
*/
- private final Optional<Node> directDispatchTarget;
+ private final Optional<Node> localCorpusDispatchTarget;
public SearchCluster(String clusterId, DispatchConfig dispatchConfig, int containerClusterSize, VipStatus vipStatus) {
this.clusterId = clusterId;
@@ -82,8 +82,11 @@ public class SearchCluster implements NodeManager<Node> {
nodesByHostBuilder.put(node.hostname(), node);
this.nodesByHost = nodesByHostBuilder.build();
- this.directDispatchTarget = findDirectDispatchTarget(HostName.getLocalhost(), size, containerClusterSize,
- nodesByHost, groups);
+ this.localCorpusDispatchTarget = findLocalCorpusDispatchTarget(HostName.getLocalhost(),
+ size,
+ containerClusterSize,
+ nodesByHost,
+ groups);
this.clusterMonitor = new ClusterMonitor<>(this);
}
@@ -92,21 +95,18 @@ public class SearchCluster implements NodeManager<Node> {
this.pingFactory = pingFactory;
for (var group : orderedGroups) {
- for (var node : group.nodes()) {
- // cluster monitor will only call working() when the
- // node transitions from down to up, so we need to
- // register the initial (working) state here:
- working(node);
+ for (var node : group.nodes())
clusterMonitor.add(node, true);
- }
}
}
- private static Optional<Node> findDirectDispatchTarget(String selfHostname,
- int searchClusterSize,
- int containerClusterSize,
- ImmutableMultimap<String, Node> nodesByHost,
- ImmutableMap<Integer, Group> groups) {
+ ClusterMonitor<Node> clusterMonitor() { return clusterMonitor; }
+
+ private static Optional<Node> findLocalCorpusDispatchTarget(String selfHostname,
+ int searchClusterSize,
+ int containerClusterSize,
+ ImmutableMultimap<String, Node> nodesByHost,
+ ImmutableMap<Integer, Group> groups) {
// A search node in the search cluster in question is configured on the same host as the currently running container.
// It has all the data <==> No other nodes in the search cluster have the same group id as this node.
// That local search node responds.
@@ -186,70 +186,90 @@ public class SearchCluster implements NodeManager<Node> {
}
/**
- * Returns the recipient we should dispatch queries directly to (bypassing fdispatch),
+ * Returns the single, local node we should dispatch queries directly to,
* or empty if we should not dispatch directly.
*/
- public Optional<Node> directDispatchTarget() {
- if ( directDispatchTarget.isEmpty()) return Optional.empty();
+ public Optional<Node> localCorpusDispatchTarget() {
+ if ( localCorpusDispatchTarget.isEmpty()) return Optional.empty();
// Only use direct dispatch if the local group has sufficient coverage
- Group localSearchGroup = groups.get(directDispatchTarget.get().group());
+ Group localSearchGroup = groups.get(localCorpusDispatchTarget.get().group());
if ( ! localSearchGroup.hasSufficientCoverage()) return Optional.empty();
- // Only use direct dispatch if the local search node is up
- if ( ! directDispatchTarget.get().isWorking()) return Optional.empty();
+ // Only use direct dispatch if the local search node is not down
+ if ( localCorpusDispatchTarget.get().isWorking() == Boolean.FALSE) return Optional.empty();
- return directDispatchTarget;
+ return localCorpusDispatchTarget;
}
- /** Used by the cluster monitor to manage node status */
+ /** Called by the cluster monitor when node state changes to working */
@Override
public void working(Node node) {
node.setWorking(true);
-
- if (usesDirectDispatchTo(node))
- vipStatus.addToRotation(clusterId);
+ updateVipStatusOnNodeChange(node, true);
}
- /** Used by the cluster monitor to manage node status */
+ /** Called by the cluster monitor when node state changes to failed */
@Override
public void failed(Node node) {
node.setWorking(false);
-
- // Take ourselves out if we usually dispatch only to our own host
- if (usesDirectDispatchTo(node))
- vipStatus.removeFromRotation(clusterId);
+ updateVipStatusOnNodeChange(node, true);
}
private void updateSufficientCoverage(Group group, boolean sufficientCoverage) {
- // update VIP status if we direct dispatch to this group and coverage status changed
- boolean isInRotation = vipStatus.isInRotation();
- boolean hasChanged = sufficientCoverage != group.hasSufficientCoverage();
- boolean isDirectDispatchGroupAndChange = usesDirectDispatchTo(group) && hasChanged;
+ if (sufficientCoverage == group.hasSufficientCoverage()) return; // no change
+
group.setHasSufficientCoverage(sufficientCoverage);
- if ((!isInRotation || isDirectDispatchGroupAndChange) && sufficientCoverage) {
- // We will set this cluster in rotation if
- // - not already in rotation and one group has sufficient coverage.
+ updateVipStatusOnCoverageChange(group, sufficientCoverage);
+ }
+
+ private void updateVipStatusOnNodeChange(Node node, boolean nodeIsWorking) {
+ if (localCorpusDispatchTarget.isEmpty()) { // consider entire cluster
+ if (hasInformationAboutAllNodes())
+ setInRotationOnlyIf(hasWorkingNodes());
+ }
+ else if (usesLocalCorpusIn(node)) { // follow the status of this node
+ setInRotationOnlyIf(nodeIsWorking);
+ }
+ }
+
+ private void updateVipStatusOnCoverageChange(Group group, boolean sufficientCoverage) {
+ if ( localCorpusDispatchTarget.isEmpty()) { // consider entire cluster
+ // VIP status does not depend on coverage
+ }
+ else if (usesLocalCorpusIn(group)) { // follow the status of this group
+ setInRotationOnlyIf(sufficientCoverage);
+ }
+ }
+
+ private void setInRotationOnlyIf(boolean inRotation) {
+ if (inRotation)
vipStatus.addToRotation(clusterId);
- } else if (isDirectDispatchGroupAndChange) {
- // We will take it out of rotation if the group is mandatory (direct dispatch to this group)
+ else
vipStatus.removeFromRotation(clusterId);
- }
}
- private boolean usesDirectDispatchTo(Node node) {
- return directDispatchTarget.isPresent() && directDispatchTarget.get().equals(node);
+ private boolean hasInformationAboutAllNodes() {
+ return nodesByHost.values().stream().allMatch(node -> node.isWorking() != null);
+ }
+
+ private boolean hasWorkingNodes() {
+ return nodesByHost.values().stream().anyMatch(node -> node.isWorking() != Boolean.FALSE );
+ }
+
+ private boolean usesLocalCorpusIn(Node node) {
+ return localCorpusDispatchTarget.isPresent() && localCorpusDispatchTarget.get().equals(node);
}
- private boolean usesDirectDispatchTo(Group group) {
- return directDispatchTarget.isPresent() && directDispatchTarget.get().group() == group.id();
+ private boolean usesLocalCorpusIn(Group group) {
+ return localCorpusDispatchTarget.isPresent() && localCorpusDispatchTarget.get().group() == group.id();
}
/** Used by the cluster monitor to manage node status */
@Override
public void ping(Node node, Executor executor) {
- if (pingFactory == null) // not initialized yet
- return;
+ if (pingFactory == null) return; // not initialized yet
+
FutureTask<Pong> futurePong = new FutureTask<>(pingFactory.createPinger(node, clusterMonitor));
executor.execute(futurePong);
Pong pong = getPong(futurePong, node);
@@ -272,9 +292,10 @@ public class SearchCluster implements NodeManager<Node> {
// group will always be marked sufficient for use.
updateSufficientCoverage(group, true);
boolean fullCoverage = isGroupCoverageSufficient(group.workingNodes(), group.nodes().size(), group.getActiveDocuments(),
- group.getActiveDocuments());
+ group.getActiveDocuments());
trackGroupCoverageChanges(0, group, fullCoverage, group.getActiveDocuments());
}
+
private void pingIterationCompletedMultipleGroups() {
int numGroups = orderedGroups.size();
// Update active documents per group and use it to decide if the group should be active
@@ -299,13 +320,7 @@ public class SearchCluster implements NodeManager<Node> {
trackGroupCoverageChanges(i, group, sufficientCoverage, averageDocumentsInOtherGroups);
}
}
- private boolean areAllNodesDownInAllgroups() {
- for(int i = 0; i < groups.size(); i++) {
- Group group = orderedGroups.get(i);
- if (group.workingNodes() > 0) return false;
- }
- return true;
- }
+
/**
* Update statistics after a round of issuing pings.
* Note that this doesn't wait for pings to return, so it will typically accumulate data from
@@ -319,9 +334,6 @@ public class SearchCluster implements NodeManager<Node> {
} else {
pingIterationCompletedMultipleGroups();
}
- if ( areAllNodesDownInAllgroups() ) {
- vipStatus.removeFromRotation(clusterId);
- }
}
private boolean isGroupCoverageSufficient(int workingNodes, int nodesInGroup, long activeDocuments, long averageDocumentsInOtherGroups) {
@@ -397,11 +409,11 @@ public class SearchCluster implements NodeManager<Node> {
if (changed) {
int requiredNodes = groupSize() - dispatchConfig.maxNodesDownPerGroup();
if (fullCoverage) {
- log.info(() -> String.format("Group %d is now good again (%d/%d active docs, coverage %d/%d)", index,
- group.getActiveDocuments(), averageDocuments, group.workingNodes(), groupSize()));
+ log.info(() -> String.format("Group %d is now good again (%d/%d active docs, coverage %d/%d)",
+ index, group.getActiveDocuments(), averageDocuments, group.workingNodes(), groupSize()));
} else {
- log.warning(() -> String.format("Coverage of group %d is only %d/%d (requires %d)", index, group.workingNodes(), groupSize(),
- requiredNodes));
+ log.warning(() -> String.format("Coverage of group %d is only %d/%d (requires %d)",
+ index, group.workingNodes(), groupSize(), requiredNodes));
}
}
}
diff --git a/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java b/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java
index 84c10991293..aa57e7903f9 100644
--- a/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java
@@ -114,7 +114,7 @@ public class ClusteredConnectionTestCase {
connection0.setInService(false);
forcePing(myBackend);
r=new Execution(myBackend, Execution.Context.createContextStub()).search(new SimpleQuery(0));
- assertEquals("No backends in service. Try later",r.hits().getError().getMessage());
+ assertEquals("No backends in service. Try later", r.hits().getError().getMessage());
connection2.setInService(true);
connection1.setInService(true);
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java
index 03a417c9bbb..310f536f961 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java
@@ -69,7 +69,7 @@ public class DispatcherTest {
public void requireThatDispatcherSupportsSingleNodeDirectDispatch() {
SearchCluster cl = new MockSearchCluster("1", 0, 0) {
@Override
- public Optional<Node> directDispatchTarget() {
+ public Optional<Node> localCorpusDispatchTarget() {
return Optional.of(new Node(1, "test", 123, 1));
}
};
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java b/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java
index 7ee62ae9978..e3ff54102d4 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java
@@ -10,6 +10,7 @@ import com.yahoo.search.dispatch.searchcluster.SearchCluster;
import com.yahoo.vespa.config.search.DispatchConfig;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.Optional;
@@ -89,7 +90,7 @@ public class MockSearchCluster extends SearchCluster {
}
@Override
- public Optional<Node> directDispatchTarget() {
+ public Optional<Node> localCorpusDispatchTarget() {
return Optional.empty();
}
@@ -106,8 +107,14 @@ public class MockSearchCluster extends SearchCluster {
public static DispatchConfig createDispatchConfig(Node... nodes) {
return createDispatchConfig(100.0, nodes);
}
+ public static DispatchConfig createDispatchConfig(List<Node> nodes) {
+ return createDispatchConfig(100.0, nodes);
+ }
public static DispatchConfig createDispatchConfig(double minSearchCoverage, Node... nodes) {
+ return createDispatchConfig(minSearchCoverage, Arrays.asList(nodes));
+ }
+ public static DispatchConfig createDispatchConfig(double minSearchCoverage, List<Node> nodes) {
DispatchConfig.Builder builder = new DispatchConfig.Builder();
builder.minActivedocsPercentage(88.0);
builder.minGroupCoverage(99.0);
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
new file mode 100644
index 00000000000..bde0a3c6c02
--- /dev/null
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
@@ -0,0 +1,279 @@
+package com.yahoo.search.dispatch.searchcluster;
+
+import com.yahoo.container.QrSearchersConfig;
+import com.yahoo.container.handler.ClustersStatus;
+import com.yahoo.container.handler.VipStatus;
+import com.yahoo.net.HostName;
+import com.yahoo.prelude.Pong;
+import com.yahoo.search.cluster.ClusterMonitor;
+import com.yahoo.search.dispatch.MockSearchCluster;
+import com.yahoo.search.result.ErrorMessage;
+import org.jetbrains.annotations.NotNull;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executor;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author baldersheim
+ */
+public class SearchClusterTest {
+
+ static class State {
+ class MyExecutor implements Executor {
+ private final List<Runnable> list = new ArrayList<>();
+ @Override
+ public void execute(@NotNull Runnable command) {
+ list.add(command);
+ }
+ void run() {
+ for (Runnable runnable : list) {
+ runnable.run();
+ }
+ list.clear();
+ }
+ }
+ final String clusterId;
+ final int nodesPerGroup;
+ final VipStatus vipStatus;
+ final SearchCluster searchCluster;
+ final List<AtomicInteger> numDocsPerNode;
+ List<AtomicInteger> pingCounts;
+ State(String clusterId, int nodesPergroup, String ... nodeNames) {
+ this(clusterId, nodesPergroup, Arrays.asList(nodeNames));
+ }
+ State(String clusterId, int nodesPergroup, List<String> nodeNames) {
+ this.clusterId = clusterId;
+ this.nodesPerGroup = nodesPergroup;
+ vipStatus = new VipStatus(new QrSearchersConfig.Builder().searchcluster(new QrSearchersConfig.Searchcluster.Builder().name(clusterId)).build(), new ClustersStatus());
+ numDocsPerNode = new ArrayList<>(nodeNames.size());
+ pingCounts = new ArrayList<>(nodeNames.size());
+ List<Node> nodes = new ArrayList<>(nodeNames.size());
+
+ for (String name : nodeNames) {
+ int key = nodes.size() % nodesPergroup;
+ int group = nodes.size() / nodesPergroup;
+ nodes.add(new Node(key, name, 13333, group));
+ numDocsPerNode.add(new AtomicInteger(1));
+ pingCounts.add(new AtomicInteger(0));
+ }
+ searchCluster = new SearchCluster(clusterId, MockSearchCluster.createDispatchConfig(nodes), nodes.size() / nodesPergroup, vipStatus);
+ }
+ void startMonitoring() {
+ searchCluster.startClusterMonitoring(new Factory(nodesPerGroup, numDocsPerNode, pingCounts));
+ }
+ static private int getMaxValue(List<AtomicInteger> list) {
+ int max = list.get(0).get();
+ for (AtomicInteger v : list) {
+ if (v.get() > max) {
+ max = v.get();
+ }
+ }
+ return max;
+ }
+ private static int getMinValue(List<AtomicInteger> list) {
+ int min = list.get(0).get();
+ for (AtomicInteger v : list) {
+ if (v.get() < min) {
+ min = v.get();
+ }
+ }
+ return min;
+ }
+ private void waitAtLeast(int atLeast, List<AtomicInteger> list) {
+ while (getMinValue(list) < atLeast) {
+ ExecutorService executor = Executors.newCachedThreadPool();
+ searchCluster.clusterMonitor().ping(executor);
+ executor.shutdown();
+ try {
+ executor.awaitTermination(60, TimeUnit.SECONDS);
+ } catch (InterruptedException e) {}
+ }
+ }
+ void waitOneFullPingRound() {
+ waitAtLeast(getMaxValue(pingCounts) + 1, pingCounts);
+ }
+ static class Factory implements PingFactory {
+ static class Pinger implements Callable<Pong> {
+ private final AtomicInteger numDocs;
+ private final AtomicInteger pingCount;
+ Pinger(AtomicInteger numDocs, AtomicInteger pingCount) {
+ this.numDocs = numDocs;
+ this.pingCount = pingCount;
+ }
+ @Override
+ public Pong call() throws Exception {
+ int docs = numDocs.get();
+ pingCount.incrementAndGet();
+ return (docs < 0)
+ ? new Pong(ErrorMessage.createBackendCommunicationError("Negative numDocs = " + docs))
+ : new Pong(docs);
+ }
+ }
+
+ private final List<AtomicInteger> activeDocs;
+ private final List<AtomicInteger> pingCounts;
+ private final int numPerGroup;
+ Factory(int numPerGroup, List<AtomicInteger> activeDocs, List<AtomicInteger> pingCounts) {
+ this.numPerGroup = numPerGroup;
+ this.activeDocs = activeDocs;
+ this.pingCounts = pingCounts;
+ }
+
+ @Override
+ public Callable<Pong> createPinger(Node node, ClusterMonitor<Node> monitor) {
+ int index = node.group*numPerGroup + node.key();
+ return new Pinger(activeDocs.get(index), pingCounts.get(index));
+ }
+ }
+ }
+
+ @Test
+ public void requireThatVipStatusIsDefaultDownButComesUpAfterPinging() {
+ State test = new State("cluster.1", 2, "a", "b");
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty());
+
+ assertFalse(test.vipStatus.isInRotation());
+ test.startMonitoring();
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ }
+
+ @Test
+ public void requireThatZeroDocsAreFine() {
+ State test = new State("cluster.1", 2,"a", "b");
+ test.startMonitoring();
+ test.waitOneFullPingRound();
+
+ assertTrue(test.vipStatus.isInRotation());
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty());
+
+ test.numDocsPerNode.get(0).set(-1);
+ test.numDocsPerNode.get(1).set(-1);
+ test.waitOneFullPingRound();
+ assertFalse(test.vipStatus.isInRotation());
+ test.numDocsPerNode.get(0).set(0);
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ }
+
+ @Test
+ public void requireThatVipStatusIsDefaultDownWithLocalDispatch() {
+ State test = new State("cluster.1", 1, HostName.getLocalhost(), "b");
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent());
+
+ assertFalse(test.vipStatus.isInRotation());
+ test.startMonitoring();
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ }
+
+ @Test
+ public void requireThatVipStatusDownWhenLocalIsDown() {
+ State test = new State("cluster.1",1,HostName.getLocalhost(), "b");
+
+ test.startMonitoring();
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent());
+
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ test.numDocsPerNode.get(0).set(-1);
+ test.waitOneFullPingRound();
+ assertFalse(test.vipStatus.isInRotation());
+
+ test.numDocsPerNode.get(0).set(1);
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+
+ test.numDocsPerNode.get(1).set(-1);
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+
+ test.numDocsPerNode.get(0).set(-1);
+ test.numDocsPerNode.get(1).set(-1);
+ test.waitOneFullPingRound();
+ assertFalse(test.vipStatus.isInRotation());
+ test.numDocsPerNode.get(1).set(1);
+ test.waitOneFullPingRound();
+ assertFalse(test.vipStatus.isInRotation());
+ test.numDocsPerNode.get(0).set(1);
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ }
+
+ private void verifyThatVipStatusDownRequireAllNodesDown(int numGroups, int nodesPerGroup) {
+ List<String> nodeNames = generateNodeNames(numGroups, nodesPerGroup);
+ State test = new State("cluster.1", nodesPerGroup, nodeNames);
+ test.startMonitoring();
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty());
+
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+
+ for (int i=0; i < test.numDocsPerNode.size()-1; i++) {
+ test.numDocsPerNode.get(i).set(-1);
+ }
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ test.numDocsPerNode.get(test.numDocsPerNode.size()-1).set(-1);
+ test.waitOneFullPingRound();
+ assertFalse(test.vipStatus.isInRotation());
+ }
+ @Test
+ public void requireThatVipStatusDownRequireAllNodesDown() {
+ verifyThatVipStatusDownRequireAllNodesDown(1,2);
+ verifyThatVipStatusDownRequireAllNodesDown(3, 3);
+ }
+
+ static private List<String> generateNodeNames(int numGroups, int nodesPerGroup) {
+ List<String> nodeNames = new ArrayList<>(numGroups*nodesPerGroup);
+ for (int g = 0; g < numGroups; g++) {
+ for (int n=0; n < nodesPerGroup; n++) {
+ nodeNames.add(new StringBuilder("node.").append(g).append('.').append(n).toString());
+ }
+ }
+ return nodeNames;
+ }
+
+ private void verifyThatVipStatusUpRequireOnlyOneOnlineNode(int numGroups, int nodesPerGroup) {
+ List<String> nodeNames = generateNodeNames(numGroups, nodesPerGroup);
+ State test = new State("cluster.1", nodesPerGroup, nodeNames);
+ test.startMonitoring();
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty());
+
+ for (int i=0; i < test.numDocsPerNode.size()-1; i++) {
+ test.numDocsPerNode.get(i).set(-1);
+ }
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ test.numDocsPerNode.get(test.numDocsPerNode.size()-1).set(-1);
+ test.waitOneFullPingRound();
+ assertFalse(test.vipStatus.isInRotation());
+
+ test.numDocsPerNode.get(0).set(0);
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ }
+ @Test
+ public void requireThatVipStatusUpRequireOnlyOneOnlineNode() {
+ verifyThatVipStatusUpRequireOnlyOneOnlineNode(1, 2);
+ verifyThatVipStatusUpRequireOnlyOneOnlineNode(3, 3);
+ }
+
+}