summaryrefslogtreecommitdiffstats
path: root/container-search/src
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2019-09-19 21:19:48 +0200
committerJon Bratseth <bratseth@verizonmedia.com>2019-09-19 21:19:48 +0200
commit13b9f16b13a370ddaea0460d8d3721e5f2ef1c8f (patch)
tree3300800914affdcd56f4753dbd6c817ed943ce89 /container-search/src
parent7b64e79d96cf9b3f61bd385f0a32e9454a4ef3d2 (diff)
Transition from down to up initially
- Use tri-state logic for working/failing/unknown - Be initially down in test and verify we come up
Diffstat (limited to 'container-search/src')
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java4
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java21
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java3
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java9
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java13
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java4
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java17
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java23
-rw-r--r--container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java1
-rw-r--r--container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java3
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java51
11 files changed, 78 insertions, 71 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java
index d21ef35bcc2..dd01d895963 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java
@@ -54,10 +54,12 @@ public abstract class BaseNodeMonitor<T> {
/**
* Returns whether this node is currently in a state suitable
- * for receiving traffic. As far as we know, that is
+ * for receiving traffic (default true)
*/
public boolean isWorking() { return isWorking; }
+ /** @deprecated Not used */
+ @Deprecated // TODO: Remove on Vespa 8
public boolean isQuarantined() { return isQuarantined; }
/**
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java
index b79f6f49c19..015adcf3490 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java
@@ -33,7 +33,7 @@ public class ClusterMonitor<T> {
private volatile boolean shutdown = false;
/** A map from Node to corresponding MonitoredNode */
- private final Map<T, BaseNodeMonitor<T>> nodeMonitors = Collections.synchronizedMap(new java.util.LinkedHashMap<>());
+ private final Map<T, TrafficNodeMonitor<T>> nodeMonitors = Collections.synchronizedMap(new java.util.LinkedHashMap<>());
public ClusterMonitor(NodeManager<T> manager) {
nodeManager = manager;
@@ -56,8 +56,7 @@ public class ClusterMonitor<T> {
* @param internal whether or not this node is internal to this cluster
*/
public void add(T node, boolean internal) {
- BaseNodeMonitor<T> monitor = new TrafficNodeMonitor<>(node, configuration, internal);
- nodeMonitors.put(node, monitor);
+ nodeMonitors.put(node, new TrafficNodeMonitor<>(node, configuration, internal));
}
/**
@@ -69,24 +68,20 @@ public class ClusterMonitor<T> {
/** Called from ClusterSearcher/NodeManager when a node failed */
public synchronized void failed(T node, ErrorMessage error) {
- nodeManager.statusIsKnown(node);
- BaseNodeMonitor<T> monitor = nodeMonitors.get(node);
- boolean wasWorking = monitor.isWorking();
+ TrafficNodeMonitor<T> monitor = nodeMonitors.get(node);
+ Boolean wasWorking = monitor.isKnownWorking();
monitor.failed(error);
- if (wasWorking && !monitor.isWorking()) {
+ if (wasWorking != monitor.isKnownWorking())
nodeManager.failed(node);
- }
}
/** Called when a node responded */
public synchronized void responded(T node) {
- nodeManager.statusIsKnown(node);
- BaseNodeMonitor<T> monitor = nodeMonitors.get(node);
- boolean wasFailing =! monitor.isWorking();
+ TrafficNodeMonitor<T> monitor = nodeMonitors.get(node);
+ Boolean wasWorking = monitor.isKnownWorking();
monitor.responded();
- if (wasFailing && monitor.isWorking()) {
+ if (wasWorking != monitor.isKnownWorking())
nodeManager.working(monitor.getNode());
- }
}
/**
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java b/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java
index ef10680a4ae..9b20139e3c5 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java
@@ -11,9 +11,6 @@ import java.util.concurrent.Executor;
*/
public interface NodeManager<T> {
- /** Called when we gain evidence about whether or not a node is working */
- default void statusIsKnown(T node) { }
-
/** Called when a failed node is working (ready for production) again */
void working(T node);
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java
index ea881ad8b48..e484811ab38 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java
@@ -60,9 +60,14 @@ public class TrafficNodeMonitor<T> extends BaseNodeMonitor<T> {
setWorking(true,"Responds correctly");
}
+ /**
+ * Returns whether this node is currently is a state suitable for receiving traffic, or null if not known
+ */
+ public Boolean isKnownWorking() { return atStartUp ? null : isWorking; }
+
/** Thread-safely changes the state of this node if required */
- protected synchronized void setWorking(boolean working,String explanation) {
- if (this.isWorking==working) return; // Old news
+ protected synchronized void setWorking(boolean working, String explanation) {
+ if (this.isWorking == working) return; // Old news
if (explanation==null) {
explanation="";
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
index 31af74a39b2..78c50254a84 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
@@ -48,21 +48,24 @@ public abstract class InvokerFactory {
* @return Optional containing the SearchInvoker or <i>empty</i> if some node in the
* list is invalid and the remaining coverage is not sufficient
*/
- public Optional<SearchInvoker> createSearchInvoker(VespaBackEndSearcher searcher, Query query, OptionalInt groupId, List<Node> nodes,
- boolean acceptIncompleteCoverage) {
+ public Optional<SearchInvoker> createSearchInvoker(VespaBackEndSearcher searcher,
+ Query query,
+ OptionalInt groupId,
+ List<Node> nodes,
+ boolean acceptIncompleteCoverage) {
List<SearchInvoker> invokers = new ArrayList<>(nodes.size());
Set<Integer> failed = null;
for (Node node : nodes) {
boolean nodeAdded = false;
- if (node.isWorking()) {
+ if (node.isWorking() != Boolean.FALSE) {
Optional<SearchInvoker> invoker = createNodeSearchInvoker(searcher, query, node);
- if(invoker.isPresent()) {
+ if (invoker.isPresent()) {
invokers.add(invoker.get());
nodeAdded = true;
}
}
- if (!nodeAdded) {
+ if ( ! nodeAdded) {
if (failed == null) {
failed = new HashSet<>();
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
index 93cbd77cef7..0e4e87b9a6a 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
@@ -54,7 +54,7 @@ public class Group {
public int workingNodes() {
int nodesUp = 0;
for (Node node : nodes) {
- if (node.isWorking()) {
+ if (node.isWorking() == Boolean.TRUE) {
nodesUp++;
}
}
@@ -64,7 +64,7 @@ public class Group {
void aggregateActiveDocuments() {
long activeDocumentsInGroup = 0;
for (Node node : nodes) {
- if (node.isWorking()) {
+ if (node.isWorking() == Boolean.TRUE) {
activeDocumentsInGroup += node.getActiveDocuments();
}
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java
index 0dd933fc24e..b47f2fefa5b 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java
@@ -46,16 +46,15 @@ public class Node {
/** Returns the id of this group this node belongs to */
public int group() { return group; }
- /** Note that we know the status of this node */
- public void setStatusIsKnown() { statusIsKnown.lazySet(true); }
-
- /** Returns whether we know the status of this node */
- public boolean getStatusIsKnown() { return statusIsKnown.get(); }
-
- public void setWorking(boolean working) { this.working.lazySet(working); }
+ public void setWorking(boolean working) {
+ this.statusIsKnown.lazySet(true);
+ this.working.lazySet(working);
+ }
- /** Returns whether this node is currently responding to requests */
- public boolean isWorking() { return working.get(); }
+ /** Returns whether this node is currently responding to requests, or null if status is not known */
+ public Boolean isWorking() {
+ return statusIsKnown.get() ? working.get() : null;
+ }
/** Updates the active documents on this node */
public void setActiveDocuments(long activeDocuments) {
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index e2719545a6a..277252d14f0 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -95,13 +95,8 @@ public class SearchCluster implements NodeManager<Node> {
this.pingFactory = pingFactory;
for (var group : orderedGroups) {
- for (var node : group.nodes()) {
- // cluster monitor will only call working() when the
- // node transitions from down to up, so we need to
- // register the initial (working) state here:
- working(node);
+ for (var node : group.nodes())
clusterMonitor.add(node, true);
- }
}
}
@@ -199,18 +194,12 @@ public class SearchCluster implements NodeManager<Node> {
Group localSearchGroup = groups.get(localCorpusDispatchTarget.get().group());
if ( ! localSearchGroup.hasSufficientCoverage()) return Optional.empty();
- // Only use direct dispatch if the local search node is up
- if ( ! localCorpusDispatchTarget.get().isWorking()) return Optional.empty();
+ // Only use direct dispatch if the local search node is not down
+ if ( localCorpusDispatchTarget.get().isWorking() == Boolean.FALSE) return Optional.empty();
return localCorpusDispatchTarget;
}
- /** Called by the cluster monitor whenever we get information (positive or negative) about a node */
- @Override
- public void statusIsKnown(Node node) {
- node.setStatusIsKnown();
- }
-
/** Called by the cluster monitor when node state changes to working */
@Override
public void working(Node node) {
@@ -244,7 +233,7 @@ public class SearchCluster implements NodeManager<Node> {
private void updateVipStatusOnCoverageChange(Group group, boolean sufficientCoverage) {
if ( localCorpusDispatchTarget.isEmpty()) { // consider entire cluster
- if (vipStatus.isInRotation() && sufficientCoverage)
+ if ( ! vipStatus.isInRotation() && sufficientCoverage)
vipStatus.addToRotation(clusterId);
}
else if (usesLocalCorpusIn(group)) { // follow the status of this group
@@ -260,11 +249,11 @@ public class SearchCluster implements NodeManager<Node> {
}
private boolean hasInformationAboutAllNodes() {
- return nodesByHost.values().stream().allMatch(Node::getStatusIsKnown);
+ return nodesByHost.values().stream().allMatch(node -> node.isWorking() != null);
}
private boolean hasWorkingNodes() {
- return nodesByHost.values().stream().anyMatch(Node::isWorking);
+ return nodesByHost.values().stream().anyMatch(node -> node.isWorking() != Boolean.FALSE );
}
private boolean usesLocalCorpusIn(Node node) {
diff --git a/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java b/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java
index a58db5a542e..47221ee00f5 100644
--- a/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java
+++ b/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java
@@ -103,6 +103,7 @@ public class ErrorMessage extends com.yahoo.processing.request.ErrorMessage {
/** Creates an error indicating that there was a general error communicating with a backend service. */
public static ErrorMessage createBackendCommunicationError(String detailedMessage) {
+ if (1==1) throw new RuntimeException();
return new ErrorMessage(BACKEND_COMMUNICATION_ERROR.code, "Backend communication error", detailedMessage);
}
diff --git a/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java b/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java
index 84c10991293..79e96a7c5a2 100644
--- a/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java
@@ -114,7 +114,8 @@ public class ClusteredConnectionTestCase {
connection0.setInService(false);
forcePing(myBackend);
r=new Execution(myBackend, Execution.Context.createContextStub()).search(new SimpleQuery(0));
- assertEquals("No backends in service. Try later",r.hits().getError().getMessage());
+ System.out.println(r.hits().getError().getDetailedMessage());
+ assertEquals("No backends in service. Try later", r.hits().getError().getMessage());
connection2.setInService(true);
connection1.setInService(true);
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
index bbaf512534a..df1049f499d 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
@@ -19,13 +19,16 @@ import java.util.concurrent.atomic.AtomicInteger;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+/**
+ * @author baldersheim
+ */
public class SearchClusterTest {
static class State {
final String clusterId;
final int nodesPerGroup;
final VipStatus vipStatus;
- final SearchCluster sc;
+ final SearchCluster searchCluster;
final List<AtomicInteger> numDocsPerNode;
List<AtomicInteger> pingCounts;
State(String clusterId, int nodesPergroup, String ... nodeNames) {
@@ -35,9 +38,6 @@ public class SearchClusterTest {
this.clusterId = clusterId;
this.nodesPerGroup = nodesPergroup;
vipStatus = new VipStatus(new QrSearchersConfig.Builder().searchcluster(new QrSearchersConfig.Searchcluster.Builder().name(clusterId)).build(), new ClustersStatus());
- assertFalse(vipStatus.isInRotation());
- vipStatus.addToRotation(clusterId);
- assertTrue(vipStatus.isInRotation());
numDocsPerNode = new ArrayList<>(nodeNames.size());
pingCounts = new ArrayList<>(nodeNames.size());
List<Node> nodes = new ArrayList<>(nodeNames.size());
@@ -49,10 +49,10 @@ public class SearchClusterTest {
numDocsPerNode.add(new AtomicInteger(1));
pingCounts.add(new AtomicInteger(0));
}
- sc = new SearchCluster(clusterId, MockSearchCluster.createDispatchConfig(nodes),nodes.size() / nodesPergroup, vipStatus);
+ searchCluster = new SearchCluster(clusterId, MockSearchCluster.createDispatchConfig(nodes), nodes.size() / nodesPergroup, vipStatus);
}
void startMonitoring() {
- sc.startClusterMonitoring(new Factory(nodesPerGroup, numDocsPerNode, pingCounts));
+ searchCluster.startClusterMonitoring(new Factory(nodesPerGroup, numDocsPerNode, pingCounts));
}
private static int getMaxValue(List<AtomicInteger> list) {
int max = list.get(0).get();
@@ -118,19 +118,25 @@ public class SearchClusterTest {
}
@Test
- public void requireThatVipStatusIsDefaultUp() {
+ public void requireThatVipStatusIsDefaultDownButComesUpAfterPinging() {
State test = new State("cluster.1", 2, "a", "b");
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty());
+
+ assertFalse(test.vipStatus.isInRotation());
+ test.startMonitoring();
+ test.waitOneFullPingRound();
assertTrue(test.vipStatus.isInRotation());
- assertTrue(test.sc.localCorpusDispatchTarget().isEmpty());
}
@Test
public void requireThatZeroDocsAreFine() {
State test = new State("cluster.1", 2,"a", "b");
+ test.startMonitoring();
+ test.waitOneFullPingRound();
+
assertTrue(test.vipStatus.isInRotation());
- assertTrue(test.sc.localCorpusDispatchTarget().isEmpty());
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty());
- test.startMonitoring();
test.numDocsPerNode.get(0).set(-1);
test.numDocsPerNode.get(1).set(-1);
test.waitOneFullPingRound();
@@ -141,21 +147,27 @@ public class SearchClusterTest {
}
@Test
- public void requireThatVipStatusIsDefaultUpWithLocalDispatch() {
+ public void requireThatVipStatusIsDefaultDownWithLocalDispatch() {
State test = new State("cluster.1", 1, HostName.getLocalhost(), "b");
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent());
+
+ assertFalse(test.vipStatus.isInRotation());
+ test.startMonitoring();
+ test.waitOneFullPingRound();
assertTrue(test.vipStatus.isInRotation());
- assertTrue(test.sc.localCorpusDispatchTarget().isPresent());
}
@Test
public void requireThatVipStatusDownWhenLocalIsDown() {
State test = new State("cluster.1",1,HostName.getLocalhost(), "b");
- assertTrue(test.vipStatus.isInRotation());
- assertTrue(test.sc.localCorpusDispatchTarget().isPresent());
test.startMonitoring();
test.waitOneFullPingRound();
assertTrue(test.vipStatus.isInRotation());
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent());
+
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
test.numDocsPerNode.get(0).set(-1);
test.waitOneFullPingRound();
assertFalse(test.vipStatus.isInRotation());
@@ -183,10 +195,11 @@ public class SearchClusterTest {
private void verifyThatVipStatusDownRequireAllNodesDown(int numGroups, int nodesPerGroup) {
List<String> nodeNames = generateNodeNames(numGroups, nodesPerGroup);
State test = new State("cluster.1", nodesPerGroup, nodeNames);
+ test.startMonitoring();
+ test.waitOneFullPingRound();
assertTrue(test.vipStatus.isInRotation());
- assertTrue(test.sc.localCorpusDispatchTarget().isEmpty());
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty());
- test.startMonitoring();
test.waitOneFullPingRound();
assertTrue(test.vipStatus.isInRotation());
@@ -214,13 +227,15 @@ public class SearchClusterTest {
}
return nodeNames;
}
+
private void verifyThatVipStatusUpRequireOnlyOneOnlineNode(int numGroups, int nodesPerGroup) {
List<String> nodeNames = generateNodeNames(numGroups, nodesPerGroup);
State test = new State("cluster.1", nodesPerGroup, nodeNames);
+ test.startMonitoring();
+ test.waitOneFullPingRound();
assertTrue(test.vipStatus.isInRotation());
- assertTrue(test.sc.localCorpusDispatchTarget().isEmpty());
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty());
- test.startMonitoring();
for (int i=0; i < test.numDocsPerNode.size()-1; i++) {
test.numDocsPerNode.get(i).set(-1);
}