From d9a0deeb28c911ccd90d2b618602cec96196d254 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 19 Sep 2019 19:15:19 +0200 Subject: Revert "Merge pull request #10737 from vespa-engine/revert-10717-bratseth/vip-logic" This reverts commit ae30a47174b67ca78dc33d21770903b3ee626240, reversing changes made to 992b73092f0d14beb3ae380904d27886fe4dbc89. --- .../com/yahoo/search/cluster/BaseNodeMonitor.java | 12 +- .../com/yahoo/search/cluster/ClusterMonitor.java | 4 +- .../java/com/yahoo/search/cluster/NodeManager.java | 3 + .../java/com/yahoo/search/dispatch/Dispatcher.java | 5 +- .../yahoo/search/dispatch/searchcluster/Group.java | 9 +- .../yahoo/search/dispatch/searchcluster/Node.java | 12 +- .../search/dispatch/searchcluster/PingFactory.java | 2 + .../dispatch/searchcluster/SearchCluster.java | 136 ++++++++++++--------- .../com/yahoo/search/dispatch/DispatcherTest.java | 2 +- .../yahoo/search/dispatch/MockSearchCluster.java | 2 +- 10 files changed, 114 insertions(+), 73 deletions(-) (limited to 'container-search/src') diff --git a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java index 59674d25402..d21ef35bcc2 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java @@ -26,25 +26,25 @@ public abstract class BaseNodeMonitor { /** The object representing the monitored node */ protected T node; - protected boolean isWorking=true; + protected boolean isWorking = true; /** Whether this node is quarantined for unstability */ - protected boolean isQuarantined=false; + protected boolean isQuarantined = false; /** The last time this node failed, in ms */ - protected long failedAt=0; + protected long failedAt = 0; /** The last time this node responded (failed or succeeded), in ms */ - protected long respondedAt=0; + protected long respondedAt = 0; /** The last time this node responded successfully */ - protected long succeededAt=0; + protected long succeededAt = 0; /** The configuration of this monitor */ protected MonitorConfiguration configuration; /** Is the node we monitor part of an internal Vespa cluster or not */ - private boolean internal=false; + private boolean internal; public BaseNodeMonitor(boolean internal) { this.internal=internal; diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java index ac0c8375f04..b79f6f49c19 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java @@ -24,7 +24,7 @@ public class ClusterMonitor { private MonitorConfiguration configuration = new MonitorConfiguration(); - private static Logger log=Logger.getLogger(ClusterMonitor.class.getName()); + private static Logger log = Logger.getLogger(ClusterMonitor.class.getName()); private NodeManager nodeManager; @@ -69,6 +69,7 @@ public class ClusterMonitor { /** Called from ClusterSearcher/NodeManager when a node failed */ public synchronized void failed(T node, ErrorMessage error) { + nodeManager.statusIsKnown(node); BaseNodeMonitor monitor = nodeMonitors.get(node); boolean wasWorking = monitor.isWorking(); monitor.failed(error); @@ -79,6 +80,7 @@ public class ClusterMonitor { /** Called when a node responded */ public synchronized void responded(T node) { + nodeManager.statusIsKnown(node); BaseNodeMonitor monitor = nodeMonitors.get(node); boolean wasFailing =! monitor.isWorking(); monitor.responded(); diff --git a/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java b/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java index 9b20139e3c5..ef10680a4ae 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java @@ -11,6 +11,9 @@ import java.util.concurrent.Executor; */ public interface NodeManager { + /** Called when we gain evidence about whether or not a node is working */ + default void statusIsKnown(T node) { } + /** Called when a failed node is working (ready for production) again */ void working(T node); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java index 58f73ea52cc..af34fc3e106 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java @@ -139,7 +139,7 @@ public class Dispatcher extends AbstractComponent { return invoker; } - // build invoker based on searchpath + /** Builds an invoker based on searchpath */ private Optional getSearchPathInvoker(Query query, VespaBackEndSearcher searcher) { String searchPath = query.getModel().getSearchPath(); if (searchPath == null) return Optional.empty(); @@ -156,7 +156,7 @@ public class Dispatcher extends AbstractComponent { } private Optional getInternalInvoker(Query query, VespaBackEndSearcher searcher) { - Optional directNode = searchCluster.directDispatchTarget(); + Optional directNode = searchCluster.localCorpusDispatchTarget(); if (directNode.isPresent()) { Node node = directNode.get(); query.trace(false, 2, "Dispatching directly to ", node); @@ -202,4 +202,5 @@ public class Dispatcher extends AbstractComponent { metric.add(INTERNAL_METRIC, 1, metricContext); } } + } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java index bafc72b9b43..93cbd77cef7 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java @@ -77,6 +77,11 @@ public class Group { return this.activeDocuments.get(); } + public boolean isFullCoverageStatusChanged(boolean hasFullCoverageNow) { + boolean previousState = hasFullCoverage.getAndSet(hasFullCoverageNow); + return previousState != hasFullCoverageNow; + } + @Override public String toString() { return "search group " + id; } @@ -90,8 +95,4 @@ public class Group { return ((Group) other).id == this.id; } - public boolean isFullCoverageStatusChanged(boolean hasFullCoverageNow) { - boolean previousState = hasFullCoverage.getAndSet(hasFullCoverageNow); - return previousState != hasFullCoverageNow; - } } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java index a71ce0354f9..0dd933fc24e 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java @@ -19,6 +19,7 @@ public class Node { private final int fs4port; final int group; + private final AtomicBoolean statusIsKnown = new AtomicBoolean(false); private final AtomicBoolean working = new AtomicBoolean(true); private final AtomicLong activeDocuments = new AtomicLong(0); @@ -45,9 +46,13 @@ public class Node { /** Returns the id of this group this node belongs to */ public int group() { return group; } - public void setWorking(boolean working) { - this.working.lazySet(working); - } + /** Note that we know the status of this node */ + public void setStatusIsKnown() { statusIsKnown.lazySet(true); } + + /** Returns whether we know the status of this node */ + public boolean getStatusIsKnown() { return statusIsKnown.get(); } + + public void setWorking(boolean working) { this.working.lazySet(working); } /** Returns whether this node is currently responding to requests */ public boolean isWorking() { return working.get(); } @@ -77,4 +82,5 @@ public class Node { @Override public String toString() { return "search node " + hostname + ":" + fs4port + " in group " + group; } + } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/PingFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/PingFactory.java index c9f722ef79b..285a1fcd57e 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/PingFactory.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/PingFactory.java @@ -6,5 +6,7 @@ import com.yahoo.search.cluster.ClusterMonitor; import java.util.concurrent.Callable; public interface PingFactory { + Callable createPinger(Node node, ClusterMonitor monitor); + } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index c091e211aca..2bcac6e2ce4 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -55,7 +55,7 @@ public class SearchCluster implements NodeManager { * if it only queries this cluster when the local node cannot be used, to avoid unnecessary * cross-node network traffic. */ - private final Optional directDispatchTarget; + private final Optional localCorpusDispatchTarget; public SearchCluster(String clusterId, DispatchConfig dispatchConfig, int containerClusterSize, VipStatus vipStatus) { this.clusterId = clusterId; @@ -82,8 +82,11 @@ public class SearchCluster implements NodeManager { nodesByHostBuilder.put(node.hostname(), node); this.nodesByHost = nodesByHostBuilder.build(); - this.directDispatchTarget = findDirectDispatchTarget(HostName.getLocalhost(), size, containerClusterSize, - nodesByHost, groups); + this.localCorpusDispatchTarget = findLocalCorpusDispatchTarget(HostName.getLocalhost(), + size, + containerClusterSize, + nodesByHost, + groups); this.clusterMonitor = new ClusterMonitor<>(this); } @@ -102,11 +105,11 @@ public class SearchCluster implements NodeManager { } } - private static Optional findDirectDispatchTarget(String selfHostname, - int searchClusterSize, - int containerClusterSize, - ImmutableMultimap nodesByHost, - ImmutableMap groups) { + private static Optional findLocalCorpusDispatchTarget(String selfHostname, + int searchClusterSize, + int containerClusterSize, + ImmutableMultimap nodesByHost, + ImmutableMap groups) { // A search node in the search cluster in question is configured on the same host as the currently running container. // It has all the data <==> No other nodes in the search cluster have the same group id as this node. // That local search node responds. @@ -186,70 +189,101 @@ public class SearchCluster implements NodeManager { } /** - * Returns the recipient we should dispatch queries directly to (bypassing fdispatch), + * Returns the single, local node we should dispatch queries directly to, * or empty if we should not dispatch directly. */ - public Optional directDispatchTarget() { - if ( directDispatchTarget.isEmpty()) return Optional.empty(); + public Optional localCorpusDispatchTarget() { + if ( localCorpusDispatchTarget.isEmpty()) return Optional.empty(); // Only use direct dispatch if the local group has sufficient coverage - Group localSearchGroup = groups.get(directDispatchTarget.get().group()); + Group localSearchGroup = groups.get(localCorpusDispatchTarget.get().group()); if ( ! localSearchGroup.hasSufficientCoverage()) return Optional.empty(); // Only use direct dispatch if the local search node is up - if ( ! directDispatchTarget.get().isWorking()) return Optional.empty(); + if ( ! localCorpusDispatchTarget.get().isWorking()) return Optional.empty(); - return directDispatchTarget; + return localCorpusDispatchTarget; } - /** Used by the cluster monitor to manage node status */ + /** Called by the cluster monitor whenever we get information (positive or negative) about a node */ + @Override + public void statusIsKnown(Node node) { + node.setStatusIsKnown(); + } + + /** Called by the cluster monitor when node state changes to working */ @Override public void working(Node node) { node.setWorking(true); - - if (usesDirectDispatchTo(node)) - vipStatus.addToRotation(clusterId); + updateVipStatusOnNodeChange(node, true); } - /** Used by the cluster monitor to manage node status */ + /** Called by the cluster monitor when node state changes to failed */ @Override public void failed(Node node) { node.setWorking(false); - - // Take ourselves out if we usually dispatch only to our own host - if (usesDirectDispatchTo(node)) - vipStatus.removeFromRotation(clusterId); + updateVipStatusOnNodeChange(node, true); } private void updateSufficientCoverage(Group group, boolean sufficientCoverage) { - // update VIP status if we direct dispatch to this group and coverage status changed - boolean isInRotation = vipStatus.isInRotation(); - boolean hasChanged = sufficientCoverage != group.hasSufficientCoverage(); - boolean isDirectDispatchGroupAndChange = usesDirectDispatchTo(group) && hasChanged; + if (sufficientCoverage == group.hasSufficientCoverage()) return; // no change + group.setHasSufficientCoverage(sufficientCoverage); - if ((!isInRotation || isDirectDispatchGroupAndChange) && sufficientCoverage) { - // We will set this cluster in rotation if - // - not already in rotation and one group has sufficient coverage. - vipStatus.addToRotation(clusterId); - } else if (isDirectDispatchGroupAndChange) { - // We will take it out of rotation if the group is mandatory (direct dispatch to this group) - vipStatus.removeFromRotation(clusterId); + updateVipStatusOnCoverageChange(group, sufficientCoverage); + } + + private void updateVipStatusOnNodeChange(Node node, boolean working) { + if (usesLocalCorpusIn(node)) { // follow the status of the local corpus + if (working) + vipStatus.addToRotation(clusterId); + else + vipStatus.removeFromRotation(clusterId); + } + else { + if ( ! hasInformationAboutAllNodes()) return; + + if (hasWorkingNodes()) + vipStatus.addToRotation(clusterId); + else + vipStatus.removeFromRotation(clusterId); } } - private boolean usesDirectDispatchTo(Node node) { - return directDispatchTarget.isPresent() && directDispatchTarget.get().equals(node); + private void updateVipStatusOnCoverageChange(Group group, boolean sufficientCoverage) { + boolean isInRotation = vipStatus.isInRotation(); + if (usesLocalCorpusIn(group)) { // follow the status of the local corpus + if (sufficientCoverage) + vipStatus.addToRotation(clusterId); + else + vipStatus.removeFromRotation(clusterId); + } + else { + if ( ! isInRotation && sufficientCoverage) + vipStatus.addToRotation(clusterId); + } + } + + private boolean hasInformationAboutAllNodes() { + return nodesByHost.values().stream().allMatch(Node::getStatusIsKnown); + } + + private boolean hasWorkingNodes() { + return nodesByHost.values().stream().anyMatch(Node::isWorking); + } + + private boolean usesLocalCorpusIn(Node node) { + return localCorpusDispatchTarget.isPresent() && localCorpusDispatchTarget.get().equals(node); } - private boolean usesDirectDispatchTo(Group group) { - return directDispatchTarget.isPresent() && directDispatchTarget.get().group() == group.id(); + private boolean usesLocalCorpusIn(Group group) { + return localCorpusDispatchTarget.isPresent() && localCorpusDispatchTarget.get().group() == group.id(); } /** Used by the cluster monitor to manage node status */ @Override public void ping(Node node, Executor executor) { - if (pingFactory == null) // not initialized yet - return; + if (pingFactory == null) return; // not initialized yet + FutureTask futurePong = new FutureTask<>(pingFactory.createPinger(node, clusterMonitor)); executor.execute(futurePong); Pong pong = getPong(futurePong, node); @@ -272,9 +306,10 @@ public class SearchCluster implements NodeManager { // group will always be marked sufficient for use. updateSufficientCoverage(group, true); boolean fullCoverage = isGroupCoverageSufficient(group.workingNodes(), group.nodes().size(), group.getActiveDocuments(), - group.getActiveDocuments()); + group.getActiveDocuments()); trackGroupCoverageChanges(0, group, fullCoverage, group.getActiveDocuments()); } + private void pingIterationCompletedMultipleGroups() { int numGroups = orderedGroups.size(); // Update active documents per group and use it to decide if the group should be active @@ -299,13 +334,7 @@ public class SearchCluster implements NodeManager { trackGroupCoverageChanges(i, group, sufficientCoverage, averageDocumentsInOtherGroups); } } - private boolean areAllNodesDownInAllgroups() { - for(int i = 0; i < groups.size(); i++) { - Group group = orderedGroups.get(i); - if (group.workingNodes() > 0) return false; - } - return true; - } + /** * Update statistics after a round of issuing pings. * Note that this doesn't wait for pings to return, so it will typically accumulate data from @@ -319,9 +348,6 @@ public class SearchCluster implements NodeManager { } else { pingIterationCompletedMultipleGroups(); } - if ( areAllNodesDownInAllgroups() ) { - vipStatus.removeFromRotation(clusterId); - } } private boolean isGroupCoverageSufficient(int workingNodes, int nodesInGroup, long activeDocuments, long averageDocumentsInOtherGroups) { @@ -397,11 +423,11 @@ public class SearchCluster implements NodeManager { if (changed) { int requiredNodes = groupSize() - dispatchConfig.maxNodesDownPerGroup(); if (fullCoverage) { - log.info(() -> String.format("Group %d is now good again (%d/%d active docs, coverage %d/%d)", index, - group.getActiveDocuments(), averageDocuments, group.workingNodes(), groupSize())); + log.info(() -> String.format("Group %d is now good again (%d/%d active docs, coverage %d/%d)", + index, group.getActiveDocuments(), averageDocuments, group.workingNodes(), groupSize())); } else { - log.warning(() -> String.format("Coverage of group %d is only %d/%d (requires %d)", index, group.workingNodes(), groupSize(), - requiredNodes)); + log.warning(() -> String.format("Coverage of group %d is only %d/%d (requires %d)", + index, group.workingNodes(), groupSize(), requiredNodes)); } } } diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java index 03a417c9bbb..310f536f961 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java @@ -69,7 +69,7 @@ public class DispatcherTest { public void requireThatDispatcherSupportsSingleNodeDirectDispatch() { SearchCluster cl = new MockSearchCluster("1", 0, 0) { @Override - public Optional directDispatchTarget() { + public Optional localCorpusDispatchTarget() { return Optional.of(new Node(1, "test", 123, 1)); } }; diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java b/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java index 7ee62ae9978..f4bfa766328 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java @@ -89,7 +89,7 @@ public class MockSearchCluster extends SearchCluster { } @Override - public Optional directDispatchTarget() { + public Optional localCorpusDispatchTarget() { return Optional.empty(); } -- cgit v1.2.3 From 8d34d79266b612da690a347608653b59f23b46c6 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 19 Sep 2019 19:15:50 +0200 Subject: Revert "Merge pull request #10736 from vespa-engine/revert-10727-balder/add-searchcluster-test-with-local" This reverts commit 992b73092f0d14beb3ae380904d27886fe4dbc89, reversing changes made to 925ad2648e24ca0db15054beb7450f209712e404. --- .../dispatch/searchcluster/SearchCluster.java | 8 +- .../yahoo/search/dispatch/MockSearchCluster.java | 7 + .../dispatch/searchcluster/SearchClusterTest.java | 243 +++++++++++++++++++++ 3 files changed, 253 insertions(+), 5 deletions(-) create mode 100644 container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java (limited to 'container-search/src') diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 2bcac6e2ce4..6f775e7218e 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -239,9 +239,7 @@ public class SearchCluster implements NodeManager { else vipStatus.removeFromRotation(clusterId); } - else { - if ( ! hasInformationAboutAllNodes()) return; - + else if (localCorpusDispatchTarget.isEmpty() && hasInformationAboutAllNodes()) { if (hasWorkingNodes()) vipStatus.addToRotation(clusterId); else @@ -257,8 +255,8 @@ public class SearchCluster implements NodeManager { else vipStatus.removeFromRotation(clusterId); } - else { - if ( ! isInRotation && sufficientCoverage) + else if ( localCorpusDispatchTarget.isEmpty()) { + if (isInRotation && sufficientCoverage) vipStatus.addToRotation(clusterId); } } diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java b/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java index f4bfa766328..e3ff54102d4 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java @@ -10,6 +10,7 @@ import com.yahoo.search.dispatch.searchcluster.SearchCluster; import com.yahoo.vespa.config.search.DispatchConfig; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Optional; @@ -106,8 +107,14 @@ public class MockSearchCluster extends SearchCluster { public static DispatchConfig createDispatchConfig(Node... nodes) { return createDispatchConfig(100.0, nodes); } + public static DispatchConfig createDispatchConfig(List nodes) { + return createDispatchConfig(100.0, nodes); + } public static DispatchConfig createDispatchConfig(double minSearchCoverage, Node... nodes) { + return createDispatchConfig(minSearchCoverage, Arrays.asList(nodes)); + } + public static DispatchConfig createDispatchConfig(double minSearchCoverage, List nodes) { DispatchConfig.Builder builder = new DispatchConfig.Builder(); builder.minActivedocsPercentage(88.0); builder.minGroupCoverage(99.0); diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java new file mode 100644 index 00000000000..bbaf512534a --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java @@ -0,0 +1,243 @@ +package com.yahoo.search.dispatch.searchcluster; + +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.container.handler.ClustersStatus; +import com.yahoo.container.handler.VipStatus; +import com.yahoo.net.HostName; +import com.yahoo.prelude.Pong; +import com.yahoo.search.cluster.ClusterMonitor; +import com.yahoo.search.dispatch.MockSearchCluster; +import com.yahoo.search.result.ErrorMessage; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class SearchClusterTest { + + static class State { + final String clusterId; + final int nodesPerGroup; + final VipStatus vipStatus; + final SearchCluster sc; + final List numDocsPerNode; + List pingCounts; + State(String clusterId, int nodesPergroup, String ... nodeNames) { + this(clusterId, nodesPergroup, Arrays.asList(nodeNames)); + } + State(String clusterId, int nodesPergroup, List nodeNames) { + this.clusterId = clusterId; + this.nodesPerGroup = nodesPergroup; + vipStatus = new VipStatus(new QrSearchersConfig.Builder().searchcluster(new QrSearchersConfig.Searchcluster.Builder().name(clusterId)).build(), new ClustersStatus()); + assertFalse(vipStatus.isInRotation()); + vipStatus.addToRotation(clusterId); + assertTrue(vipStatus.isInRotation()); + numDocsPerNode = new ArrayList<>(nodeNames.size()); + pingCounts = new ArrayList<>(nodeNames.size()); + List nodes = new ArrayList<>(nodeNames.size()); + + for (String name : nodeNames) { + int key = nodes.size() % nodesPergroup; + int group = nodes.size() / nodesPergroup; + nodes.add(new Node(key, name, 13333, group)); + numDocsPerNode.add(new AtomicInteger(1)); + pingCounts.add(new AtomicInteger(0)); + } + sc = new SearchCluster(clusterId, MockSearchCluster.createDispatchConfig(nodes),nodes.size() / nodesPergroup, vipStatus); + } + void startMonitoring() { + sc.startClusterMonitoring(new Factory(nodesPerGroup, numDocsPerNode, pingCounts)); + } + private static int getMaxValue(List list) { + int max = list.get(0).get(); + for (AtomicInteger v : list) { + if (v.get() > max) { + max = v.get(); + } + } + return max; + } + private static int getMinValue(List list) { + int min = list.get(0).get(); + for (AtomicInteger v : list) { + if (v.get() < min) { + min = v.get(); + } + } + return min; + } + private static void waitAtLeast(int atLeast, List list) { + while (getMinValue(list) < atLeast) { + try { + Thread.sleep(100); + } catch (InterruptedException e) {} + } + } + void waitOneFullPingRound() { + waitAtLeast(getMaxValue(pingCounts) + 1, pingCounts); + } + static class Factory implements PingFactory { + static class Pinger implements Callable { + private final AtomicInteger numDocs; + private final AtomicInteger pingCount; + Pinger(AtomicInteger numDocs, AtomicInteger pingCount) { + this.numDocs = numDocs; + this.pingCount = pingCount; + } + @Override + public Pong call() throws Exception { + int docs = numDocs.get(); + pingCount.incrementAndGet(); + return (docs < 0) + ? new Pong(ErrorMessage.createBackendCommunicationError("Negative numDocs = " + docs)) + : new Pong(docs); + } + } + + private final List activeDocs; + private final List pingCounts; + private final int numPerGroup; + Factory(int numPerGroup, List activeDocs, List pingCounts) { + this.numPerGroup = numPerGroup; + this.activeDocs = activeDocs; + this.pingCounts = pingCounts; + } + + @Override + public Callable createPinger(Node node, ClusterMonitor monitor) { + int index = node.group*numPerGroup + node.key(); + return new Pinger(activeDocs.get(index), pingCounts.get(index)); + } + } + } + + @Test + public void requireThatVipStatusIsDefaultUp() { + State test = new State("cluster.1", 2, "a", "b"); + assertTrue(test.vipStatus.isInRotation()); + assertTrue(test.sc.localCorpusDispatchTarget().isEmpty()); + } + + @Test + public void requireThatZeroDocsAreFine() { + State test = new State("cluster.1", 2,"a", "b"); + assertTrue(test.vipStatus.isInRotation()); + assertTrue(test.sc.localCorpusDispatchTarget().isEmpty()); + + test.startMonitoring(); + test.numDocsPerNode.get(0).set(-1); + test.numDocsPerNode.get(1).set(-1); + test.waitOneFullPingRound(); + assertFalse(test.vipStatus.isInRotation()); + test.numDocsPerNode.get(0).set(0); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + } + + @Test + public void requireThatVipStatusIsDefaultUpWithLocalDispatch() { + State test = new State("cluster.1", 1, HostName.getLocalhost(), "b"); + assertTrue(test.vipStatus.isInRotation()); + assertTrue(test.sc.localCorpusDispatchTarget().isPresent()); + } + + @Test + public void requireThatVipStatusDownWhenLocalIsDown() { + State test = new State("cluster.1",1,HostName.getLocalhost(), "b"); + assertTrue(test.vipStatus.isInRotation()); + assertTrue(test.sc.localCorpusDispatchTarget().isPresent()); + + test.startMonitoring(); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + test.numDocsPerNode.get(0).set(-1); + test.waitOneFullPingRound(); + assertFalse(test.vipStatus.isInRotation()); + + test.numDocsPerNode.get(0).set(1); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + + test.numDocsPerNode.get(1).set(-1); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + + test.numDocsPerNode.get(0).set(-1); + test.numDocsPerNode.get(1).set(-1); + test.waitOneFullPingRound(); + assertFalse(test.vipStatus.isInRotation()); + test.numDocsPerNode.get(1).set(1); + test.waitOneFullPingRound(); + assertFalse(test.vipStatus.isInRotation()); + test.numDocsPerNode.get(0).set(1); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + } + + private void verifyThatVipStatusDownRequireAllNodesDown(int numGroups, int nodesPerGroup) { + List nodeNames = generateNodeNames(numGroups, nodesPerGroup); + State test = new State("cluster.1", nodesPerGroup, nodeNames); + assertTrue(test.vipStatus.isInRotation()); + assertTrue(test.sc.localCorpusDispatchTarget().isEmpty()); + + test.startMonitoring(); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + + for (int i=0; i < test.numDocsPerNode.size()-1; i++) { + test.numDocsPerNode.get(i).set(-1); + } + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + test.numDocsPerNode.get(test.numDocsPerNode.size()-1).set(-1); + test.waitOneFullPingRound(); + assertFalse(test.vipStatus.isInRotation()); + } + @Test + public void requireThatVipStatusDownRequireAllNodesDown() { + verifyThatVipStatusDownRequireAllNodesDown(1,2); + verifyThatVipStatusDownRequireAllNodesDown(3, 3); + } + + static private List generateNodeNames(int numGroups, int nodesPerGroup) { + List nodeNames = new ArrayList<>(numGroups*nodesPerGroup); + for (int g = 0; g < numGroups; g++) { + for (int n=0; n < nodesPerGroup; n++) { + nodeNames.add(new StringBuilder("node.").append(g).append('.').append(n).toString()); + } + } + return nodeNames; + } + private void verifyThatVipStatusUpRequireOnlyOneOnlineNode(int numGroups, int nodesPerGroup) { + List nodeNames = generateNodeNames(numGroups, nodesPerGroup); + State test = new State("cluster.1", nodesPerGroup, nodeNames); + assertTrue(test.vipStatus.isInRotation()); + assertTrue(test.sc.localCorpusDispatchTarget().isEmpty()); + + test.startMonitoring(); + for (int i=0; i < test.numDocsPerNode.size()-1; i++) { + test.numDocsPerNode.get(i).set(-1); + } + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + test.numDocsPerNode.get(test.numDocsPerNode.size()-1).set(-1); + test.waitOneFullPingRound(); + assertFalse(test.vipStatus.isInRotation()); + + test.numDocsPerNode.get(0).set(0); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + } + @Test + public void requireThatVipStatusUpRequireOnlyOneOnlineNode() { + verifyThatVipStatusUpRequireOnlyOneOnlineNode(1, 2); + verifyThatVipStatusUpRequireOnlyOneOnlineNode(3, 3); + } + +} -- cgit v1.2.3 From 7b64e79d96cf9b3f61bd385f0a32e9454a4ef3d2 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 19 Sep 2019 19:16:26 +0200 Subject: Revert "Merge pull request #10735 from vespa-engine/revert-10732-bratseth/clarify" This reverts commit 925ad2648e24ca0db15054beb7450f209712e404, reversing changes made to 71988ae7d715c91464776176c616a46b459f81f4. --- .../dispatch/searchcluster/SearchCluster.java | 36 ++++++++++------------ 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'container-search/src') diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 6f775e7218e..e2719545a6a 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -232,35 +232,33 @@ public class SearchCluster implements NodeManager { updateVipStatusOnCoverageChange(group, sufficientCoverage); } - private void updateVipStatusOnNodeChange(Node node, boolean working) { - if (usesLocalCorpusIn(node)) { // follow the status of the local corpus - if (working) - vipStatus.addToRotation(clusterId); - else - vipStatus.removeFromRotation(clusterId); + private void updateVipStatusOnNodeChange(Node node, boolean nodeIsWorking) { + if (localCorpusDispatchTarget.isEmpty()) { // consider entire cluster + if (hasInformationAboutAllNodes()) + setInRotationOnlyIf(hasWorkingNodes()); } - else if (localCorpusDispatchTarget.isEmpty() && hasInformationAboutAllNodes()) { - if (hasWorkingNodes()) - vipStatus.addToRotation(clusterId); - else - vipStatus.removeFromRotation(clusterId); + else if (usesLocalCorpusIn(node)) { // follow the status of this node + setInRotationOnlyIf(nodeIsWorking); } } private void updateVipStatusOnCoverageChange(Group group, boolean sufficientCoverage) { - boolean isInRotation = vipStatus.isInRotation(); - if (usesLocalCorpusIn(group)) { // follow the status of the local corpus - if (sufficientCoverage) + if ( localCorpusDispatchTarget.isEmpty()) { // consider entire cluster + if (vipStatus.isInRotation() && sufficientCoverage) vipStatus.addToRotation(clusterId); - else - vipStatus.removeFromRotation(clusterId); } - else if ( localCorpusDispatchTarget.isEmpty()) { - if (isInRotation && sufficientCoverage) - vipStatus.addToRotation(clusterId); + else if (usesLocalCorpusIn(group)) { // follow the status of this group + setInRotationOnlyIf(sufficientCoverage); } } + private void setInRotationOnlyIf(boolean inRotation) { + if (inRotation) + vipStatus.addToRotation(clusterId); + else + vipStatus.removeFromRotation(clusterId); + } + private boolean hasInformationAboutAllNodes() { return nodesByHost.values().stream().allMatch(Node::getStatusIsKnown); } -- cgit v1.2.3 From 13b9f16b13a370ddaea0460d8d3721e5f2ef1c8f Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 19 Sep 2019 21:19:48 +0200 Subject: Transition from down to up initially - Use tri-state logic for working/failing/unknown - Be initially down in test and verify we come up --- .../com/yahoo/search/cluster/BaseNodeMonitor.java | 4 +- .../com/yahoo/search/cluster/ClusterMonitor.java | 21 ++++----- .../java/com/yahoo/search/cluster/NodeManager.java | 3 -- .../yahoo/search/cluster/TrafficNodeMonitor.java | 9 +++- .../com/yahoo/search/dispatch/InvokerFactory.java | 13 +++--- .../yahoo/search/dispatch/searchcluster/Group.java | 4 +- .../yahoo/search/dispatch/searchcluster/Node.java | 17 ++++---- .../dispatch/searchcluster/SearchCluster.java | 23 +++------- .../java/com/yahoo/search/result/ErrorMessage.java | 1 + .../cluster/test/ClusteredConnectionTestCase.java | 3 +- .../dispatch/searchcluster/SearchClusterTest.java | 51 ++++++++++++++-------- 11 files changed, 78 insertions(+), 71 deletions(-) (limited to 'container-search/src') diff --git a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java index d21ef35bcc2..dd01d895963 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java @@ -54,10 +54,12 @@ public abstract class BaseNodeMonitor { /** * Returns whether this node is currently in a state suitable - * for receiving traffic. As far as we know, that is + * for receiving traffic (default true) */ public boolean isWorking() { return isWorking; } + /** @deprecated Not used */ + @Deprecated // TODO: Remove on Vespa 8 public boolean isQuarantined() { return isQuarantined; } /** diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java index b79f6f49c19..015adcf3490 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java @@ -33,7 +33,7 @@ public class ClusterMonitor { private volatile boolean shutdown = false; /** A map from Node to corresponding MonitoredNode */ - private final Map> nodeMonitors = Collections.synchronizedMap(new java.util.LinkedHashMap<>()); + private final Map> nodeMonitors = Collections.synchronizedMap(new java.util.LinkedHashMap<>()); public ClusterMonitor(NodeManager manager) { nodeManager = manager; @@ -56,8 +56,7 @@ public class ClusterMonitor { * @param internal whether or not this node is internal to this cluster */ public void add(T node, boolean internal) { - BaseNodeMonitor monitor = new TrafficNodeMonitor<>(node, configuration, internal); - nodeMonitors.put(node, monitor); + nodeMonitors.put(node, new TrafficNodeMonitor<>(node, configuration, internal)); } /** @@ -69,24 +68,20 @@ public class ClusterMonitor { /** Called from ClusterSearcher/NodeManager when a node failed */ public synchronized void failed(T node, ErrorMessage error) { - nodeManager.statusIsKnown(node); - BaseNodeMonitor monitor = nodeMonitors.get(node); - boolean wasWorking = monitor.isWorking(); + TrafficNodeMonitor monitor = nodeMonitors.get(node); + Boolean wasWorking = monitor.isKnownWorking(); monitor.failed(error); - if (wasWorking && !monitor.isWorking()) { + if (wasWorking != monitor.isKnownWorking()) nodeManager.failed(node); - } } /** Called when a node responded */ public synchronized void responded(T node) { - nodeManager.statusIsKnown(node); - BaseNodeMonitor monitor = nodeMonitors.get(node); - boolean wasFailing =! monitor.isWorking(); + TrafficNodeMonitor monitor = nodeMonitors.get(node); + Boolean wasWorking = monitor.isKnownWorking(); monitor.responded(); - if (wasFailing && monitor.isWorking()) { + if (wasWorking != monitor.isKnownWorking()) nodeManager.working(monitor.getNode()); - } } /** diff --git a/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java b/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java index ef10680a4ae..9b20139e3c5 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java @@ -11,9 +11,6 @@ import java.util.concurrent.Executor; */ public interface NodeManager { - /** Called when we gain evidence about whether or not a node is working */ - default void statusIsKnown(T node) { } - /** Called when a failed node is working (ready for production) again */ void working(T node); diff --git a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java index ea881ad8b48..e484811ab38 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java @@ -60,9 +60,14 @@ public class TrafficNodeMonitor extends BaseNodeMonitor { setWorking(true,"Responds correctly"); } + /** + * Returns whether this node is currently is a state suitable for receiving traffic, or null if not known + */ + public Boolean isKnownWorking() { return atStartUp ? null : isWorking; } + /** Thread-safely changes the state of this node if required */ - protected synchronized void setWorking(boolean working,String explanation) { - if (this.isWorking==working) return; // Old news + protected synchronized void setWorking(boolean working, String explanation) { + if (this.isWorking == working) return; // Old news if (explanation==null) { explanation=""; diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java index 31af74a39b2..78c50254a84 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java @@ -48,21 +48,24 @@ public abstract class InvokerFactory { * @return Optional containing the SearchInvoker or empty if some node in the * list is invalid and the remaining coverage is not sufficient */ - public Optional createSearchInvoker(VespaBackEndSearcher searcher, Query query, OptionalInt groupId, List nodes, - boolean acceptIncompleteCoverage) { + public Optional createSearchInvoker(VespaBackEndSearcher searcher, + Query query, + OptionalInt groupId, + List nodes, + boolean acceptIncompleteCoverage) { List invokers = new ArrayList<>(nodes.size()); Set failed = null; for (Node node : nodes) { boolean nodeAdded = false; - if (node.isWorking()) { + if (node.isWorking() != Boolean.FALSE) { Optional invoker = createNodeSearchInvoker(searcher, query, node); - if(invoker.isPresent()) { + if (invoker.isPresent()) { invokers.add(invoker.get()); nodeAdded = true; } } - if (!nodeAdded) { + if ( ! nodeAdded) { if (failed == null) { failed = new HashSet<>(); } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java index 93cbd77cef7..0e4e87b9a6a 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java @@ -54,7 +54,7 @@ public class Group { public int workingNodes() { int nodesUp = 0; for (Node node : nodes) { - if (node.isWorking()) { + if (node.isWorking() == Boolean.TRUE) { nodesUp++; } } @@ -64,7 +64,7 @@ public class Group { void aggregateActiveDocuments() { long activeDocumentsInGroup = 0; for (Node node : nodes) { - if (node.isWorking()) { + if (node.isWorking() == Boolean.TRUE) { activeDocumentsInGroup += node.getActiveDocuments(); } } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java index 0dd933fc24e..b47f2fefa5b 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java @@ -46,16 +46,15 @@ public class Node { /** Returns the id of this group this node belongs to */ public int group() { return group; } - /** Note that we know the status of this node */ - public void setStatusIsKnown() { statusIsKnown.lazySet(true); } - - /** Returns whether we know the status of this node */ - public boolean getStatusIsKnown() { return statusIsKnown.get(); } - - public void setWorking(boolean working) { this.working.lazySet(working); } + public void setWorking(boolean working) { + this.statusIsKnown.lazySet(true); + this.working.lazySet(working); + } - /** Returns whether this node is currently responding to requests */ - public boolean isWorking() { return working.get(); } + /** Returns whether this node is currently responding to requests, or null if status is not known */ + public Boolean isWorking() { + return statusIsKnown.get() ? working.get() : null; + } /** Updates the active documents on this node */ public void setActiveDocuments(long activeDocuments) { diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index e2719545a6a..277252d14f0 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -95,13 +95,8 @@ public class SearchCluster implements NodeManager { this.pingFactory = pingFactory; for (var group : orderedGroups) { - for (var node : group.nodes()) { - // cluster monitor will only call working() when the - // node transitions from down to up, so we need to - // register the initial (working) state here: - working(node); + for (var node : group.nodes()) clusterMonitor.add(node, true); - } } } @@ -199,18 +194,12 @@ public class SearchCluster implements NodeManager { Group localSearchGroup = groups.get(localCorpusDispatchTarget.get().group()); if ( ! localSearchGroup.hasSufficientCoverage()) return Optional.empty(); - // Only use direct dispatch if the local search node is up - if ( ! localCorpusDispatchTarget.get().isWorking()) return Optional.empty(); + // Only use direct dispatch if the local search node is not down + if ( localCorpusDispatchTarget.get().isWorking() == Boolean.FALSE) return Optional.empty(); return localCorpusDispatchTarget; } - /** Called by the cluster monitor whenever we get information (positive or negative) about a node */ - @Override - public void statusIsKnown(Node node) { - node.setStatusIsKnown(); - } - /** Called by the cluster monitor when node state changes to working */ @Override public void working(Node node) { @@ -244,7 +233,7 @@ public class SearchCluster implements NodeManager { private void updateVipStatusOnCoverageChange(Group group, boolean sufficientCoverage) { if ( localCorpusDispatchTarget.isEmpty()) { // consider entire cluster - if (vipStatus.isInRotation() && sufficientCoverage) + if ( ! vipStatus.isInRotation() && sufficientCoverage) vipStatus.addToRotation(clusterId); } else if (usesLocalCorpusIn(group)) { // follow the status of this group @@ -260,11 +249,11 @@ public class SearchCluster implements NodeManager { } private boolean hasInformationAboutAllNodes() { - return nodesByHost.values().stream().allMatch(Node::getStatusIsKnown); + return nodesByHost.values().stream().allMatch(node -> node.isWorking() != null); } private boolean hasWorkingNodes() { - return nodesByHost.values().stream().anyMatch(Node::isWorking); + return nodesByHost.values().stream().anyMatch(node -> node.isWorking() != Boolean.FALSE ); } private boolean usesLocalCorpusIn(Node node) { diff --git a/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java b/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java index a58db5a542e..47221ee00f5 100644 --- a/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java +++ b/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java @@ -103,6 +103,7 @@ public class ErrorMessage extends com.yahoo.processing.request.ErrorMessage { /** Creates an error indicating that there was a general error communicating with a backend service. */ public static ErrorMessage createBackendCommunicationError(String detailedMessage) { + if (1==1) throw new RuntimeException(); return new ErrorMessage(BACKEND_COMMUNICATION_ERROR.code, "Backend communication error", detailedMessage); } diff --git a/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java b/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java index 84c10991293..79e96a7c5a2 100644 --- a/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java @@ -114,7 +114,8 @@ public class ClusteredConnectionTestCase { connection0.setInService(false); forcePing(myBackend); r=new Execution(myBackend, Execution.Context.createContextStub()).search(new SimpleQuery(0)); - assertEquals("No backends in service. Try later",r.hits().getError().getMessage()); + System.out.println(r.hits().getError().getDetailedMessage()); + assertEquals("No backends in service. Try later", r.hits().getError().getMessage()); connection2.setInService(true); connection1.setInService(true); diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java index bbaf512534a..df1049f499d 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java @@ -19,13 +19,16 @@ import java.util.concurrent.atomic.AtomicInteger; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +/** + * @author baldersheim + */ public class SearchClusterTest { static class State { final String clusterId; final int nodesPerGroup; final VipStatus vipStatus; - final SearchCluster sc; + final SearchCluster searchCluster; final List numDocsPerNode; List pingCounts; State(String clusterId, int nodesPergroup, String ... nodeNames) { @@ -35,9 +38,6 @@ public class SearchClusterTest { this.clusterId = clusterId; this.nodesPerGroup = nodesPergroup; vipStatus = new VipStatus(new QrSearchersConfig.Builder().searchcluster(new QrSearchersConfig.Searchcluster.Builder().name(clusterId)).build(), new ClustersStatus()); - assertFalse(vipStatus.isInRotation()); - vipStatus.addToRotation(clusterId); - assertTrue(vipStatus.isInRotation()); numDocsPerNode = new ArrayList<>(nodeNames.size()); pingCounts = new ArrayList<>(nodeNames.size()); List nodes = new ArrayList<>(nodeNames.size()); @@ -49,10 +49,10 @@ public class SearchClusterTest { numDocsPerNode.add(new AtomicInteger(1)); pingCounts.add(new AtomicInteger(0)); } - sc = new SearchCluster(clusterId, MockSearchCluster.createDispatchConfig(nodes),nodes.size() / nodesPergroup, vipStatus); + searchCluster = new SearchCluster(clusterId, MockSearchCluster.createDispatchConfig(nodes), nodes.size() / nodesPergroup, vipStatus); } void startMonitoring() { - sc.startClusterMonitoring(new Factory(nodesPerGroup, numDocsPerNode, pingCounts)); + searchCluster.startClusterMonitoring(new Factory(nodesPerGroup, numDocsPerNode, pingCounts)); } private static int getMaxValue(List list) { int max = list.get(0).get(); @@ -118,19 +118,25 @@ public class SearchClusterTest { } @Test - public void requireThatVipStatusIsDefaultUp() { + public void requireThatVipStatusIsDefaultDownButComesUpAfterPinging() { State test = new State("cluster.1", 2, "a", "b"); + assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty()); + + assertFalse(test.vipStatus.isInRotation()); + test.startMonitoring(); + test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); - assertTrue(test.sc.localCorpusDispatchTarget().isEmpty()); } @Test public void requireThatZeroDocsAreFine() { State test = new State("cluster.1", 2,"a", "b"); + test.startMonitoring(); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); - assertTrue(test.sc.localCorpusDispatchTarget().isEmpty()); + assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty()); - test.startMonitoring(); test.numDocsPerNode.get(0).set(-1); test.numDocsPerNode.get(1).set(-1); test.waitOneFullPingRound(); @@ -141,19 +147,25 @@ public class SearchClusterTest { } @Test - public void requireThatVipStatusIsDefaultUpWithLocalDispatch() { + public void requireThatVipStatusIsDefaultDownWithLocalDispatch() { State test = new State("cluster.1", 1, HostName.getLocalhost(), "b"); + assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent()); + + assertFalse(test.vipStatus.isInRotation()); + test.startMonitoring(); + test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); - assertTrue(test.sc.localCorpusDispatchTarget().isPresent()); } @Test public void requireThatVipStatusDownWhenLocalIsDown() { State test = new State("cluster.1",1,HostName.getLocalhost(), "b"); - assertTrue(test.vipStatus.isInRotation()); - assertTrue(test.sc.localCorpusDispatchTarget().isPresent()); test.startMonitoring(); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent()); + test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); test.numDocsPerNode.get(0).set(-1); @@ -183,10 +195,11 @@ public class SearchClusterTest { private void verifyThatVipStatusDownRequireAllNodesDown(int numGroups, int nodesPerGroup) { List nodeNames = generateNodeNames(numGroups, nodesPerGroup); State test = new State("cluster.1", nodesPerGroup, nodeNames); + test.startMonitoring(); + test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); - assertTrue(test.sc.localCorpusDispatchTarget().isEmpty()); + assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty()); - test.startMonitoring(); test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); @@ -214,13 +227,15 @@ public class SearchClusterTest { } return nodeNames; } + private void verifyThatVipStatusUpRequireOnlyOneOnlineNode(int numGroups, int nodesPerGroup) { List nodeNames = generateNodeNames(numGroups, nodesPerGroup); State test = new State("cluster.1", nodesPerGroup, nodeNames); + test.startMonitoring(); + test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); - assertTrue(test.sc.localCorpusDispatchTarget().isEmpty()); + assertTrue(test.searchCluster.localCorpusDispatchTarget().isEmpty()); - test.startMonitoring(); for (int i=0; i < test.numDocsPerNode.size()-1; i++) { test.numDocsPerNode.get(i).set(-1); } -- cgit v1.2.3 From 07e81bc908de70196f83578a9dc99c9fc7cc0df7 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 19 Sep 2019 21:39:25 +0200 Subject: Not at startup after we get failing info --- .../src/main/java/com/yahoo/search/cluster/ClusterSearcher.java | 1 + .../src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java | 1 + container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java | 1 - .../java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java | 1 - 4 files changed, 2 insertions(+), 2 deletions(-) (limited to 'container-search/src') diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java index e99199d85f7..cabe3acdacd 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java @@ -82,6 +82,7 @@ public abstract class ClusterSearcher extends PingableSearcher implements Nod pong = new Pong(ErrorMessage.createUnspecifiedError("Ping was interrupted: " + p)); logThrowable = e; } catch (ExecutionException e) { + e.printStackTrace(); pong = new Pong(ErrorMessage.createUnspecifiedError("Execution was interrupted: " + p)); logThrowable = e; } catch (LinkageError e) { // Typically Osgi woes diff --git a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java index e484811ab38..830014bca46 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java @@ -36,6 +36,7 @@ public class TrafficNodeMonitor extends BaseNodeMonitor { @Override public void failed(ErrorMessage error) { respondedAt = now(); + atStartUp = false; if (error.getCode() == Error.BACKEND_COMMUNICATION_ERROR.code) { setWorking(false, "Connection failure: " + error.toString()); diff --git a/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java b/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java index 47221ee00f5..a58db5a542e 100644 --- a/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java +++ b/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java @@ -103,7 +103,6 @@ public class ErrorMessage extends com.yahoo.processing.request.ErrorMessage { /** Creates an error indicating that there was a general error communicating with a backend service. */ public static ErrorMessage createBackendCommunicationError(String detailedMessage) { - if (1==1) throw new RuntimeException(); return new ErrorMessage(BACKEND_COMMUNICATION_ERROR.code, "Backend communication error", detailedMessage); } diff --git a/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java b/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java index 79e96a7c5a2..aa57e7903f9 100644 --- a/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/cluster/test/ClusteredConnectionTestCase.java @@ -114,7 +114,6 @@ public class ClusteredConnectionTestCase { connection0.setInService(false); forcePing(myBackend); r=new Execution(myBackend, Execution.Context.createContextStub()).search(new SimpleQuery(0)); - System.out.println(r.hits().getError().getDetailedMessage()); assertEquals("No backends in service. Try later", r.hits().getError().getMessage()); connection2.setInService(true); -- cgit v1.2.3 From 9b401405b4f9f02b6ebd04d240cd2412c4054597 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 19 Sep 2019 21:43:30 +0200 Subject: No need to set up depending on coverage --- .../java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'container-search/src') diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 277252d14f0..44df0656361 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -233,8 +233,7 @@ public class SearchCluster implements NodeManager { private void updateVipStatusOnCoverageChange(Group group, boolean sufficientCoverage) { if ( localCorpusDispatchTarget.isEmpty()) { // consider entire cluster - if ( ! vipStatus.isInRotation() && sufficientCoverage) - vipStatus.addToRotation(clusterId); + // VIP status does not depend on coverage } else if (usesLocalCorpusIn(group)) { // follow the status of this group setInRotationOnlyIf(sufficientCoverage); -- cgit v1.2.3 From 3da089f71cfc38431ef4508a99d9b075b955fbc0 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 19 Sep 2019 22:09:32 +0200 Subject: Remove printStaxkTrace --- .../src/main/java/com/yahoo/search/cluster/ClusterSearcher.java | 1 - 1 file changed, 1 deletion(-) (limited to 'container-search/src') diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java index cabe3acdacd..e99199d85f7 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java @@ -82,7 +82,6 @@ public abstract class ClusterSearcher extends PingableSearcher implements Nod pong = new Pong(ErrorMessage.createUnspecifiedError("Ping was interrupted: " + p)); logThrowable = e; } catch (ExecutionException e) { - e.printStackTrace(); pong = new Pong(ErrorMessage.createUnspecifiedError("Execution was interrupted: " + p)); logThrowable = e; } catch (LinkageError e) { // Typically Osgi woes -- cgit v1.2.3 From ee0100b9d505ab9238ea48d21be6326a26d90936 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Thu, 19 Sep 2019 23:37:34 +0200 Subject: Drive the ping ourselves to avoid waiting for the monitor thread. --- .../com/yahoo/search/cluster/ClusterMonitor.java | 2 +- .../dispatch/searchcluster/SearchCluster.java | 2 ++ .../dispatch/searchcluster/SearchClusterTest.java | 27 +++++++++++++++++++--- 3 files changed, 27 insertions(+), 4 deletions(-) (limited to 'container-search/src') diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java index 015adcf3490..22c7f59872c 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java @@ -22,7 +22,7 @@ import java.util.logging.Logger; */ public class ClusterMonitor { - private MonitorConfiguration configuration = new MonitorConfiguration(); + private final MonitorConfiguration configuration = new MonitorConfiguration(); private static Logger log = Logger.getLogger(ClusterMonitor.class.getName()); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 44df0656361..c17212b2481 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -100,6 +100,8 @@ public class SearchCluster implements NodeManager { } } + ClusterMonitor clusterMonitor() { return clusterMonitor; } + private static Optional findLocalCorpusDispatchTarget(String selfHostname, int searchClusterSize, int containerClusterSize, diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java index df1049f499d..bde0a3c6c02 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java @@ -8,12 +8,17 @@ import com.yahoo.prelude.Pong; import com.yahoo.search.cluster.ClusterMonitor; import com.yahoo.search.dispatch.MockSearchCluster; import com.yahoo.search.result.ErrorMessage; +import org.jetbrains.annotations.NotNull; import org.junit.Test; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.Callable; +import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import static org.junit.Assert.assertFalse; @@ -25,6 +30,19 @@ import static org.junit.Assert.assertTrue; public class SearchClusterTest { static class State { + class MyExecutor implements Executor { + private final List list = new ArrayList<>(); + @Override + public void execute(@NotNull Runnable command) { + list.add(command); + } + void run() { + for (Runnable runnable : list) { + runnable.run(); + } + list.clear(); + } + } final String clusterId; final int nodesPerGroup; final VipStatus vipStatus; @@ -54,7 +72,7 @@ public class SearchClusterTest { void startMonitoring() { searchCluster.startClusterMonitoring(new Factory(nodesPerGroup, numDocsPerNode, pingCounts)); } - private static int getMaxValue(List list) { + static private int getMaxValue(List list) { int max = list.get(0).get(); for (AtomicInteger v : list) { if (v.get() > max) { @@ -72,10 +90,13 @@ public class SearchClusterTest { } return min; } - private static void waitAtLeast(int atLeast, List list) { + private void waitAtLeast(int atLeast, List list) { while (getMinValue(list) < atLeast) { + ExecutorService executor = Executors.newCachedThreadPool(); + searchCluster.clusterMonitor().ping(executor); + executor.shutdown(); try { - Thread.sleep(100); + executor.awaitTermination(60, TimeUnit.SECONDS); } catch (InterruptedException e) {} } } -- cgit v1.2.3