diff options
author | Olli Virtanen <olli.virtanen@oath.com> | 2019-03-22 09:51:14 +0100 |
---|---|---|
committer | Olli Virtanen <olli.virtanen@oath.com> | 2019-03-22 09:51:14 +0100 |
commit | 37bf79fdedacad3b46796667e29d86a302f98fc0 (patch) | |
tree | e6024f6cdb1aaaa3ee5f888aef3da8473c5cd926 /container-search | |
parent | 519d59334ca8c3e314e71f83de618e375a7c2d6c (diff) |
Report partial group connection failures through trace, not error
Diffstat (limited to 'container-search')
7 files changed, 127 insertions, 122 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FS4InvokerFactory.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FS4InvokerFactory.java index 66088faed79..ac99675c9c5 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FS4InvokerFactory.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FS4InvokerFactory.java @@ -7,22 +7,18 @@ import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.dispatch.FillInvoker; import com.yahoo.search.dispatch.InterleavedFillInvoker; -import com.yahoo.search.dispatch.InterleavedSearchInvoker; import com.yahoo.search.dispatch.InvokerFactory; import com.yahoo.search.dispatch.SearchInvoker; import com.yahoo.search.dispatch.searchcluster.Node; import com.yahoo.search.dispatch.searchcluster.SearchCluster; import com.yahoo.search.result.Hit; -import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.OptionalInt; import java.util.Set; /** @@ -33,84 +29,29 @@ import java.util.Set; */ public class FS4InvokerFactory extends InvokerFactory { private final FS4ResourcePool fs4ResourcePool; - private final SearchCluster searchCluster; private final ImmutableMap<Integer, Node> nodesByKey; public FS4InvokerFactory(FS4ResourcePool fs4ResourcePool, SearchCluster searchCluster) { + super(searchCluster); this.fs4ResourcePool = fs4ResourcePool; - this.searchCluster = searchCluster; ImmutableMap.Builder<Integer, Node> builder = ImmutableMap.builder(); searchCluster.groups().values().forEach(group -> group.nodes().forEach(node -> builder.put(node.key(), node))); this.nodesByKey = builder.build(); } - public SearchInvoker createSearchInvoker(VespaBackEndSearcher searcher, Query query, Node node) { + public SearchInvoker createDirectSearchInvoker(VespaBackEndSearcher searcher, Query query, Node node) { Backend backend = fs4ResourcePool.getBackend(node.hostname(), node.fs4port()); return new FS4SearchInvoker(searcher, query, backend.openChannel(), Optional.of(node)); } - /** - * Create a {@link SearchInvoker} for a list of content nodes. - * - * @param searcher - * the searcher processing the query - * @param query - * the search query being processed - * @param groupId - * the id of the node group to which the nodes belong - * @param nodes - * pre-selected list of content nodes - * @param acceptIncompleteCoverage - * if some of the nodes are unavailable and this parameter is - * <b>false</b>, verify that the remaining set of nodes has enough - * coverage - * @return Optional containing the SearchInvoker or <i>empty</i> if some node in the - * list is invalid and the remaining coverage is not sufficient - */ @Override - public Optional<SearchInvoker> createSearchInvoker(VespaBackEndSearcher searcher, Query query, OptionalInt groupId, List<Node> nodes, - boolean acceptIncompleteCoverage) { - List<SearchInvoker> invokers = new ArrayList<>(nodes.size()); - Set<Integer> failed = null; - for (Node node : nodes) { - boolean nodeAdded = false; - if (node.isWorking()) { - Backend backend = fs4ResourcePool.getBackend(node.hostname(), node.fs4port()); - if (backend.probeConnection()) { - invokers.add(new FS4SearchInvoker(searcher, query, backend.openChannel(), Optional.of(node))); - nodeAdded = true; - } - } - - if (!nodeAdded) { - if (failed == null) { - failed = new HashSet<>(); - } - failed.add(node.key()); - } - } - - if (failed != null) { - List<Node> success = new ArrayList<>(nodes.size() - failed.size()); - for (Node node : nodes) { - if (!failed.contains(node.key())) { - success.add(node); - } - } - if (!searchCluster.isPartialGroupCoverageSufficient(groupId, success)) { - if (acceptIncompleteCoverage) { - invokers.add(createCoverageErrorInvoker(nodes, failed)); - } else { - return Optional.empty(); - } - } - } - - if (invokers.size() == 1) { - return Optional.of(invokers.get(0)); + protected Optional<SearchInvoker> createNodeSearchInvoker(VespaBackEndSearcher searcher, Query query, Node node) { + Backend backend = fs4ResourcePool.getBackend(node.hostname(), node.fs4port()); + if (backend.probeConnection()) { + return Optional.of(new FS4SearchInvoker(searcher, query, backend.openChannel(), Optional.of(node))); } else { - return Optional.of(new InterleavedSearchInvoker(invokers, searcher, searchCluster)); + return Optional.empty(); } } diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastSearcher.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastSearcher.java index 68f8bd9d9ea..e61ed1715b6 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastSearcher.java @@ -213,7 +213,7 @@ public class FastSearcher extends VespaBackEndSearcher { Optional<Node> direct = getDirectNode(query); if(direct.isPresent()) { - return dispatcher.getFS4InvokerFactory().createSearchInvoker(this, query, direct.get()); + return dispatcher.getFS4InvokerFactory().createDirectSearchInvoker(this, query, direct.get()); } return new FS4SearchInvoker(this, query, dispatchBackend.openChannel(), Optional.empty()); } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java index 4c71290e5f9..927eeb9749a 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java @@ -40,6 +40,7 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM private final VespaBackEndSearcher searcher; private final SearchCluster searchCluster; private final LinkedBlockingQueue<SearchInvoker> availableForProcessing; + private final Set<Integer> alreadyFailedNodes; private Query query; private boolean adaptiveTimeoutCalculated = false; @@ -59,13 +60,14 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM private boolean trimResult = false; - public InterleavedSearchInvoker(Collection<SearchInvoker> invokers, VespaBackEndSearcher searcher, SearchCluster searchCluster) { + public InterleavedSearchInvoker(Collection<SearchInvoker> invokers, VespaBackEndSearcher searcher, SearchCluster searchCluster, Set<Integer> alreadyFailedNodes) { super(Optional.empty()); this.invokers = Collections.newSetFromMap(new IdentityHashMap<>()); this.invokers.addAll(invokers); this.searcher = searcher; this.searchCluster = searchCluster; this.availableForProcessing = newQueue(); + this.alreadyFailedNodes = alreadyFailedNodes; } /** @@ -116,7 +118,7 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM if (result == null) { result = new Result(query); } - insertTimeoutErrors(); + insertNetworkErrors(); result.setCoverage(createCoverage()); trimResult(execution); Result ret = result; @@ -134,14 +136,34 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM } } - private void insertTimeoutErrors() { + private void insertNetworkErrors() { + // Network errors will be reported as errors only when all nodes fail, otherwise they are just traced + boolean asErrors = answeredNodes == 0; + if (!invokers.isEmpty()) { String keys = invokers.stream().map(SearchInvoker::distributionKey).map(dk -> dk.map(i -> i.toString()).orElse("(unspecified)")) .collect(Collectors.joining(", ")); - result.hits().addError(ErrorMessage.createTimeout("Backend communication timeout on nodes with distribution-keys: " + keys)); + if (asErrors) { + result.hits().addError(ErrorMessage + .createTimeout("Backend communication timeout on all nodes in group (distribution-keys: " + keys + ")")); + } else { + query.trace("Backend communication timeout on nodes with distribution-keys: " + keys, 5); + } timedOut = true; } + if (alreadyFailedNodes != null) { + var message = "Connection failure on nodes with distribution-keys: " + + alreadyFailedNodes.stream().map(v -> Integer.toString(v)).collect(Collectors.joining(", ")); + if (asErrors) { + result.hits().addError(ErrorMessage.createBackendCommunicationError(message)); + } else { + query.trace(message, 5); + } + int failed = alreadyFailedNodes.size(); + askedNodes += failed; + answeredNodes += failed; + } } private long nextTimeout() { @@ -204,6 +226,7 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM private Coverage createCoverage() { adjustDegradedCoverage(); + Coverage coverage = new Coverage(answeredDocs, answeredActiveDocs, answeredNodesParticipated, 1); coverage.setNodesTried(askedNodes); coverage.setSoonActive(answeredSoonActiveDocs); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java index ca471fb2baa..815a2a257ea 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java @@ -5,9 +5,12 @@ import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.dispatch.searchcluster.Node; +import com.yahoo.search.dispatch.searchcluster.SearchCluster; import com.yahoo.search.result.Coverage; import com.yahoo.search.result.ErrorMessage; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.OptionalInt; @@ -17,11 +20,78 @@ import java.util.Set; * @author ollivir */ public abstract class InvokerFactory { - public abstract Optional<SearchInvoker> createSearchInvoker(VespaBackEndSearcher searcher, Query query, OptionalInt groupId, - List<Node> nodes, boolean acceptIncompleteCoverage); + protected final SearchCluster searchCluster; + + public InvokerFactory(SearchCluster searchCluster) { + this.searchCluster = searchCluster; + } + + protected abstract Optional<SearchInvoker> createNodeSearchInvoker(VespaBackEndSearcher searcher, Query query, Node node); public abstract Optional<FillInvoker> createFillInvoker(VespaBackEndSearcher searcher, Result result); + /** + * Create a {@link SearchInvoker} for a list of content nodes. + * + * @param searcher + * the searcher processing the query + * @param query + * the search query being processed + * @param groupId + * the id of the node group to which the nodes belong + * @param nodes + * pre-selected list of content nodes + * @param acceptIncompleteCoverage + * if some of the nodes are unavailable and this parameter is + * <b>false</b>, verify that the remaining set of nodes has enough + * coverage + * @return Optional containing the SearchInvoker or <i>empty</i> if some node in the + * list is invalid and the remaining coverage is not sufficient + */ + public Optional<SearchInvoker> createSearchInvoker(VespaBackEndSearcher searcher, Query query, OptionalInt groupId, List<Node> nodes, + boolean acceptIncompleteCoverage) { + List<SearchInvoker> invokers = new ArrayList<>(nodes.size()); + Set<Integer> failed = null; + for (Node node : nodes) { + boolean nodeAdded = false; + if (node.isWorking()) { + Optional<SearchInvoker> invoker = createNodeSearchInvoker(searcher, query, node); + if(invoker.isPresent()) { + invokers.add(invoker.get()); + nodeAdded = true; + } + } + + if (!nodeAdded) { + if (failed == null) { + failed = new HashSet<>(); + } + failed.add(node.key()); + } + } + + if (failed != null) { + List<Node> success = new ArrayList<>(nodes.size() - failed.size()); + for (Node node : nodes) { + if (!failed.contains(node.key())) { + success.add(node); + } + } + if (!searchCluster.isPartialGroupCoverageSufficient(groupId, success) && !acceptIncompleteCoverage) { + return Optional.empty(); + } + if(invokers.size() == 0) { + return Optional.of(createCoverageErrorInvoker(nodes, failed)); + } + } + + if (invokers.size() == 1 && failed == null) { + return Optional.of(invokers.get(0)); + } else { + return Optional.of(new InterleavedSearchInvoker(invokers, searcher, searchCluster, failed)); + } + } + protected static SearchInvoker createCoverageErrorInvoker(List<Node> nodes, Set<Integer> failed) { StringBuilder down = new StringBuilder("Connection failure on nodes with distribution-keys: "); int count = 0; diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java index c8019278710..f17e7d63431 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java @@ -7,18 +7,12 @@ import com.yahoo.processing.request.CompoundName; import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.dispatch.FillInvoker; -import com.yahoo.search.dispatch.InterleavedSearchInvoker; import com.yahoo.search.dispatch.InvokerFactory; import com.yahoo.search.dispatch.SearchInvoker; import com.yahoo.search.dispatch.searchcluster.Node; import com.yahoo.search.dispatch.searchcluster.SearchCluster; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; import java.util.Optional; -import java.util.OptionalInt; -import java.util.Set; /** * @author ollivir @@ -28,50 +22,15 @@ public class RpcInvokerFactory extends InvokerFactory { private final static CompoundName dispatchSummaries = new CompoundName("dispatch.summaries"); private final RpcResourcePool rpcResourcePool; - private final SearchCluster searchCluster; public RpcInvokerFactory(RpcResourcePool rpcResourcePool, SearchCluster searchCluster) { + super(searchCluster); this.rpcResourcePool = rpcResourcePool; - this.searchCluster = searchCluster; } @Override - public Optional<SearchInvoker> createSearchInvoker(VespaBackEndSearcher searcher, Query query, OptionalInt groupId, List<Node> nodes, - boolean acceptIncompleteCoverage) { - List<SearchInvoker> invokers = new ArrayList<>(nodes.size()); - Set<Integer> failed = null; - for (Node node : nodes) { - if (node.isWorking()) { - invokers.add(new RpcSearchInvoker(searcher, node, rpcResourcePool)); - } else { - if (failed == null) { - failed = new HashSet<>(); - } - failed.add(node.key()); - } - } - - if (failed != null) { - List<Node> success = new ArrayList<>(nodes.size() - failed.size()); - for (Node node : nodes) { - if (!failed.contains(node.key())) { - success.add(node); - } - } - if (!searchCluster.isPartialGroupCoverageSufficient(groupId, success)) { - if (acceptIncompleteCoverage) { - invokers.add(createCoverageErrorInvoker(nodes, failed)); - } else { - return Optional.empty(); - } - } - } - - if (invokers.size() == 1) { - return Optional.of(invokers.get(0)); - } else { - return Optional.of(new InterleavedSearchInvoker(invokers, searcher, searchCluster)); - } + protected Optional<SearchInvoker> createNodeSearchInvoker(VespaBackEndSearcher searcher, Query query, Node node) { + return Optional.of(new RpcSearchInvoker(searcher, node, rpcResourcePool)); } @Override diff --git a/container-search/src/test/java/com/yahoo/prelude/fastsearch/FS4SearchInvokerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/fastsearch/FS4SearchInvokerTestCase.java index 80f62f879ab..8fd3bc1f3eb 100644 --- a/container-search/src/test/java/com/yahoo/prelude/fastsearch/FS4SearchInvokerTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/fastsearch/FS4SearchInvokerTestCase.java @@ -31,7 +31,7 @@ public class FS4SearchInvokerTestCase { var searcher = mockSearcher(); var cluster = new MockSearchCluster("?", 1, 1); var fs4invoker = new FS4SearchInvoker(searcher, query, mockFailingChannel(), Optional.empty()); - var interleave = new InterleavedSearchInvoker(Collections.singleton(fs4invoker), searcher, cluster); + var interleave = new InterleavedSearchInvoker(Collections.singleton(fs4invoker), searcher, cluster, null); long start = System.currentTimeMillis(); interleave.search(query, QueryPacket.create(null, null), null); diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java index 8c5976a2815..f84f35020d2 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java @@ -15,8 +15,10 @@ import java.time.Instant; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; +import java.util.Optional; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; +import java.util.stream.StreamSupport; import static com.yahoo.container.handler.Coverage.DEGRADED_BY_MATCH_PHASE; import static com.yahoo.container.handler.Coverage.DEGRADED_BY_TIMEOUT; @@ -25,7 +27,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -64,7 +66,9 @@ public class InterleavedSearchInvokerTest { Result result = invoker.search(query, null, null); assertTrue("All test scenario events processed", expectedEvents.isEmpty()); - assertNotNull("Result is marked as an error", result.hits().getErrorHit()); + assertNull("Result is not marked as an error", result.hits().getErrorHit()); + var message = findTrace(result, "Backend communication timeout"); + assertThat("Timeout should be reported in a trace message", message.isPresent()); assertTrue("Degradation reason is a normal timeout", result.getCoverage(false).isDegradedByTimeout()); } @@ -81,7 +85,9 @@ public class InterleavedSearchInvokerTest { Result result = invoker.search(query, null, null); assertTrue("All test scenario events processed", expectedEvents.isEmpty()); - assertNotNull("Result is marked as an error", result.hits().getErrorHit()); + assertNull("Result is not marked as an error", result.hits().getErrorHit()); + var message = findTrace(result, "Backend communication timeout"); + assertThat("Timeout should be reported in a trace message", message.isPresent()); assertTrue("Degradataion reason is an adaptive timeout", result.getCoverage(false).isDegradedByAdapativeTimeout()); } @@ -203,7 +209,7 @@ public class InterleavedSearchInvokerTest { invokers.add(new MockInvoker(i)); } - return new InterleavedSearchInvoker(invokers, null, searchCluster) { + return new InterleavedSearchInvoker(invokers, null, searchCluster, null) { @Override protected long currentTime() { return clock.millis(); @@ -235,6 +241,11 @@ public class InterleavedSearchInvokerTest { return coverage; } + private static Optional<String> findTrace(Result result, String prefix) { + var strings = result.getQuery().getContext(false).getTrace().traceNode().descendants(String.class).spliterator(); + return StreamSupport.stream(strings, false).filter(s -> s.startsWith(prefix)).findFirst(); + } + private class Event { Long expectedTimeout; long delay; @@ -270,6 +281,7 @@ public class InterleavedSearchInvokerTest { public TestQuery() { super(); setTimeout(5000); + setTraceLevel(5); } @Override |