diff options
author | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-04-12 09:08:58 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-12 09:08:58 +0200 |
commit | 77cae36a5e25dc030911c68a3490d53b24258c90 (patch) | |
tree | 268347e1fa56b7570da938c2a084cca20fabfb0c | |
parent | d4eeb7d873937b00869cd92a3538184b9863bd86 (diff) | |
parent | 760169f47990044d9d767df527007e1f26a4f1cf (diff) |
Merge pull request #26707 from vespa-engine/bjorncs/revert
Bjorncs/revert
7 files changed, 116 insertions, 122 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java index 88a37ea5a02..92ce6abb319 100644 --- a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java +++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java @@ -6,11 +6,11 @@ import com.yahoo.search.Query; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeSet; import static com.yahoo.text.Lowercase.toLowerCase; @@ -32,6 +32,16 @@ public class IndexFacts { private Map<String, List<String>> clusterByDocument; + private static class DocumentTypeListOffset { + public final int offset; + public final SearchDefinition searchDefinition; + + public DocumentTypeListOffset(int offset, SearchDefinition searchDefinition) { + this.offset = offset; + this.searchDefinition = searchDefinition; + } + } + /** A Map of all known search definitions indexed by name */ private Map<String, SearchDefinition> searchDefinitions = new LinkedHashMap<>(); @@ -100,32 +110,34 @@ public class IndexFacts { private boolean isIndexFromDocumentTypes(String indexName, List<String> documentTypes) { if ( ! isInitialized()) return true; - if (documentTypes.isEmpty()) return unionSearchDefinition.getIndex(indexName) != null; + if (documentTypes.isEmpty()) { + return unionSearchDefinition.getIndex(indexName) != null; + } - for (String docName : documentTypes) { - SearchDefinition sd = searchDefinitions.get(docName); - if (sd != null) { - Index index = sd.getIndex(indexName); - if (index != null) return true; + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + while (sd != null) { + Index index = sd.searchDefinition.getIndex(indexName); + if (index != null) { + return true; } + sd = chooseSearchDefinition(documentTypes, sd.offset); } + return false; } private String getCanonicNameFromDocumentTypes(String indexName, List<String> documentTypes) { if (!isInitialized()) return indexName; - String lowerCased = toLowerCase(indexName); if (documentTypes.isEmpty()) { - Index index = unionSearchDefinition.getIndexByLowerCase(lowerCased); + Index index = unionSearchDefinition.getIndexByLowerCase(toLowerCase(indexName)); return index == null ? indexName : index.getName(); } - for (String docName : documentTypes) { - SearchDefinition sd = searchDefinitions.get(docName); - if (sd != null) { - Index index = sd.getIndexByLowerCase(lowerCased); - if (index != null) return index.getName(); - } + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + while (sd != null) { + Index index = sd.searchDefinition.getIndexByLowerCase(toLowerCase(indexName)); + if (index != null) return index.getName(); + sd = chooseSearchDefinition(documentTypes, sd.offset); } return indexName; } @@ -146,12 +158,13 @@ public class IndexFacts { return index; } - for (String docName : documentTypes) { - SearchDefinition sd = searchDefinitions.get(docName); - if (sd != null) { - Index index = sd.getIndex(canonicName); - if (index != null) return index; - } + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + + while (sd != null) { + Index index = sd.searchDefinition.getIndex(canonicName); + + if (index != null) return index; + sd = chooseSearchDefinition(documentTypes, sd.offset); } return Index.nullIndex; } @@ -174,7 +187,7 @@ public class IndexFacts { * Given a search list which is a mixture of document types and cluster * names, and a restrict list which is a list of document types, return a * set of all valid document types for this combination. Most use-cases for - * fetching index settings will involve calling this method with the + * fetching index settings will involve calling this method with the the * incoming query's {@link com.yahoo.search.query.Model#getSources()} and * {@link com.yahoo.search.query.Model#getRestrict()} as input parameters * before calling any other method of this class. @@ -183,20 +196,20 @@ public class IndexFacts { * @param restrict the restrict list for a query * @return a (possibly empty) set of valid document types */ - private Set<String> resolveDocumentTypes(Collection<String> sources, Set<String> restrict, + private Set<String> resolveDocumentTypes(Collection<String> sources, Collection<String> restrict, Set<String> candidateDocumentTypes) { sources = emptyCollectionIfNull(sources); - restrict = emptySetIfNull(restrict); + restrict = emptyCollectionIfNull(restrict); if (sources.isEmpty()) { if ( ! restrict.isEmpty()) { - return Set.copyOf(restrict); + return new TreeSet<>(restrict); } else { return candidateDocumentTypes; } } - Set<String> toSearch = new HashSet<>(); + Set<String> toSearch = new TreeSet<>(); for (String source : sources) { // source: a document type or a cluster containing them List<String> clusterDocTypes = clusters.get(source); if (clusterDocTypes == null) { // source was a document type @@ -222,8 +235,21 @@ public class IndexFacts { private Collection<String> emptyCollectionIfNull(Collection<String> collection) { return collection == null ? List.of() : collection; } - private Set<String> emptySetIfNull(Set<String> collection) { - return collection == null ? Set.of() : collection; + + /** + * Chooses the correct search definition, default if in doubt. + * + * @return the search definition to use + */ + private DocumentTypeListOffset chooseSearchDefinition(List<String> documentTypes, int index) { + while (index < documentTypes.size()) { + String docName = documentTypes.get(index++); + SearchDefinition sd = searchDefinitions.get(docName); + if (sd != null) { + return new DocumentTypeListOffset(index, sd); + } + } + return null; } /** @@ -253,6 +279,10 @@ public class IndexFacts { return frozen; } + private void ensureNotFrozen() { + if (frozen) throw new IllegalStateException("Tried to modify frozen IndexFacts instance."); + } + public String getDefaultPosition(String sdName) { SearchDefinition sd; if (sdName == null) { @@ -270,16 +300,12 @@ public class IndexFacts { return new Session(query); } - public Session newSession() { - return new Session(Set.of(), Set.of()); - } - - public Session newSession(Collection<String> sources, Set<String> restrict) { + public Session newSession(Collection<String> sources, Collection<String> restrict) { return new Session(sources, restrict); } public Session newSession(Collection<String> sources, - Set<String> restrict, + Collection<String> restrict, Set<String> candidateDocumentTypes) { return new Session(sources, restrict, candidateDocumentTypes); } @@ -297,12 +323,12 @@ public class IndexFacts { documentTypes = List.copyOf(resolveDocumentTypes(query)); } - private Session(Collection<String> sources, Set<String> restrict) { + private Session(Collection<String> sources, Collection<String> restrict) { // Assumption: Search definition name equals document name. documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, searchDefinitions.keySet())); } - private Session(Collection<String> sources, Set<String> restrict, Set<String> candidateDocumentTypes) { + private Session(Collection<String> sources, Collection<String> restrict, Set<String> candidateDocumentTypes) { documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, candidateDocumentTypes)); } diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java index f0e3e3f3e44..46332d632fe 100644 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java @@ -1,8 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.cluster; -import com.yahoo.component.ComponentId; import com.yahoo.component.annotation.Inject; +import com.yahoo.component.ComponentId; import com.yahoo.component.chain.dependencies.After; import com.yahoo.component.provider.ComponentRegistry; import com.yahoo.container.QrSearchersConfig; @@ -28,6 +28,10 @@ import com.yahoo.vespa.streamingvisitors.VdsStreamingSearcher; import com.yahoo.yolean.Exceptions; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import java.util.UUID; @@ -53,7 +57,8 @@ public class ClusterSearcher extends Searcher { private final String searchClusterName; - private final SchemaResolver schemaResolver; + // The set of document types contained in this search cluster + private final Set<String> schemas; private final long maxQueryTimeout; // in milliseconds private final long maxQueryCacheTimeout; // in milliseconds @@ -79,7 +84,7 @@ public class ClusterSearcher extends Searcher { searchClusterName = clusterConfig.clusterName(); QrSearchersConfig.Searchcluster searchClusterConfig = getSearchClusterConfigFromClusterName(qrsConfig, searchClusterName); this.globalPhaseRanker = searchClusterConfig.globalphase() ? globalPhaseRanker : null; - this.schemaResolver = new SchemaResolver(documentDbConfig); + schemas = new LinkedHashSet<>(); maxQueryTimeout = ParameterParser.asMilliSeconds(clusterConfig.maxQueryTimeout(), DEFAULT_MAX_QUERY_TIMEOUT); maxQueryCacheTimeout = ParameterParser.asMilliSeconds(clusterConfig.maxQueryCacheTimeout(), DEFAULT_MAX_QUERY_CACHE_TIMEOUT); @@ -88,6 +93,9 @@ public class ClusterSearcher extends Searcher { .com().yahoo().prelude().fastsearch().FastSearcher().docsum() .defaultclass()); + for (DocumentdbInfoConfig.Documentdb docDb : documentDbConfig.documentdb()) + schemas.add(docDb.name()); + String uniqueServerId = UUID.randomUUID().toString(); if (searchClusterConfig.indexingmode() == STREAMING) { server = vdsCluster(uniqueServerId, searchClusterIndex, @@ -149,7 +157,7 @@ public class ClusterSearcher extends Searcher { /** Do not use, for internal testing purposes only. **/ ClusterSearcher(Set<String> schemas, VespaBackEndSearcher searcher, Executor executor) { - this.schemaResolver = new SchemaResolver(schemas); + this.schemas = schemas; searchClusterName = "testScenario"; maxQueryTimeout = DEFAULT_MAX_QUERY_TIMEOUT; maxQueryCacheTimeout = DEFAULT_MAX_QUERY_CACHE_TIMEOUT; @@ -221,9 +229,8 @@ public class ClusterSearcher extends Searcher { } private Result doSearch(Searcher searcher, Query query, Execution execution) { - var schemas = schemaResolver.resolve(query, execution); if (schemas.size() > 1) { - return searchMultipleDocumentTypes(searcher, query, execution, schemas); + return searchMultipleDocumentTypes(searcher, query, execution); } else { String docType = schemas.iterator().next(); query.getModel().setRestrict(docType); @@ -259,7 +266,8 @@ public class ClusterSearcher extends Searcher { } } - private Result searchMultipleDocumentTypes(Searcher searcher, Query query, Execution execution, Set<String> schemas) { + private Result searchMultipleDocumentTypes(Searcher searcher, Query query, Execution execution) { + Set<String> schemas = resolveSchemas(query, execution.context().getIndexFacts()); List<Query> queries = createQueries(query, schemas); if (queries.size() == 1) { return perSchemaSearch(searcher, queries.get(0), execution); @@ -293,7 +301,25 @@ public class ClusterSearcher extends Searcher { } Set<String> resolveSchemas(Query query, IndexFacts indexFacts) { - return schemaResolver.resolve(query, indexFacts); + Set<String> restrict = query.getModel().getRestrict(); + if (restrict == null || restrict.isEmpty()) { + Set<String> sources = query.getModel().getSources(); + return (sources == null || sources.isEmpty()) + ? schemas + : new HashSet<>(indexFacts.newSession(sources, Collections.emptyList(), schemas).documentTypes()); + } else { + return filterValidDocumentTypes(restrict); + } + } + + private Set<String> filterValidDocumentTypes(Collection<String> restrict) { + Set<String> retval = new LinkedHashSet<>(); + for (String docType : restrict) { + if (docType != null && schemas.contains(docType)) { + retval.add(docType); + } + } + return retval; } private List<Query> createQueries(Query query, Set<String> docTypes) { diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/SchemaResolver.java b/container-search/src/main/java/com/yahoo/prelude/cluster/SchemaResolver.java deleted file mode 100644 index 3a2125d1d38..00000000000 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/SchemaResolver.java +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -package com.yahoo.prelude.cluster; - -import com.yahoo.prelude.IndexFacts; -import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; -import com.yahoo.search.Query; -import com.yahoo.search.searchchain.Execution; - -import java.util.Collection; -import java.util.LinkedHashSet; -import java.util.Set; - -/** - * Resolves schemas from query and execution context - * - * @author bjorncs - */ -class SchemaResolver { - - private final Set<String> schemas; - - SchemaResolver(DocumentdbInfoConfig cfg) { - this(cfg.documentdb().stream().map(DocumentdbInfoConfig.Documentdb::name).toList()); - } - - SchemaResolver(Collection<String> schemas) { - this.schemas = new LinkedHashSet<>(schemas); - } - - Set<String> resolve(Query query, Execution execution) { - return resolve(query, execution.context().getIndexFacts()); - } - - Set<String> resolve(Query query, IndexFacts indexFacts) { - if (schemas.size() == 1) return Set.of(schemas.iterator().next()); - var restrict = query.getModel().getRestrict(); - if (restrict == null || restrict.isEmpty()) { - Set<String> sources = query.getModel().getSources(); - return (sources == null || sources.isEmpty()) - ? schemas - : new LinkedHashSet<>(indexFacts.newSession(sources, Set.of(), schemas).documentTypes()); - } else { - return filterValidDocumentTypes(restrict); - } - } - - private Set<String> filterValidDocumentTypes(Collection<String> restrict) { - Set<String> retval = new LinkedHashSet<>(); - for (String docType : restrict) { - if (docType != null && schemas.contains(docType)) { - retval.add(docType); - } - } - return retval; - } - -} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java index 2bd408220cd..e3b2278475b 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java @@ -6,6 +6,7 @@ import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.query.Item; import com.yahoo.search.query.parser.Parser; +import java.util.Collections; import java.util.Set; /** @@ -22,7 +23,7 @@ public interface CustomParser extends Parser { Set<String> toSearch, IndexFacts indexFacts, String defaultIndexName) { if (indexFacts == null) indexFacts = new IndexFacts(); - return parse(queryToParse, filterToParse, parsingLanguage, indexFacts.newSession(toSearch, Set.of()), defaultIndexName); + return parse(queryToParse, filterToParse, parsingLanguage, indexFacts.newSession(toSearch, Collections.emptySet()), defaultIndexName); } Item parse(String queryToParse, String filterToParse, Language parsingLanguage, diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java index 9952ec64d13..c1d415b8e27 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java @@ -8,6 +8,7 @@ import com.yahoo.prelude.Index; import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.query.Substring; +import java.util.Collections; import java.util.List; import static com.yahoo.prelude.query.parser.Token.Kind.*; @@ -62,7 +63,7 @@ public final class Tokenizer { * @return a read-only list of tokens. This list can only be used by this thread */ public List<Token> tokenize(String string) { - return tokenize(string, new IndexFacts().newSession()); + return tokenize(string, new IndexFacts().newSession(Collections.emptySet(), Collections.emptySet())); } /** @@ -170,10 +171,13 @@ public final class Tokenizer { // this is a heuristic to check whether we probably have reached the end of an URL element for (int i = tokens.size() - 1; i >= 0; --i) { switch (tokens.get(i).kind) { - case COLON -> { if (i == indexLastExplicitlyChangedAt) return false; } - case SPACE -> { return true; } - default -> { } - // do nothing + case COLON: + if (i == indexLastExplicitlyChangedAt) return false; + break; + case SPACE: + return true; + default: + // do nothing } } // really not sure whether we should choose false instead, on cause of the guard at diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java index 3a6be1521e2..1ff5574ec03 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java @@ -13,6 +13,7 @@ import com.yahoo.prelude.query.parser.Tokenizer; import org.junit.jupiter.api.Test; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import static com.yahoo.prelude.query.parser.Token.Kind.COLON; @@ -28,9 +29,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.SPACE; import static com.yahoo.prelude.query.parser.Token.Kind.STAR; import static com.yahoo.prelude.query.parser.Token.Kind.UNDERSCORE; import static com.yahoo.prelude.query.parser.Token.Kind.WORD; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; /** * Tests the tokenizer @@ -284,7 +283,7 @@ public class TokenizerTestCase { sd.addIndex(index2); IndexFacts facts = new IndexFacts(new IndexModel(sd)); - IndexFacts.Session session = facts.newSession(); + IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*& b:c", "default", session); // tokenizer.print(); @@ -329,7 +328,7 @@ public class TokenizerTestCase { IndexFacts facts = new IndexFacts(new IndexModel(sd)); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); - IndexFacts.Session session = facts.newSession(); + IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*&", session); assertEquals(new Token(WORD, "normal"), tokens.get(0)); assertEquals(new Token(SPACE, " "), tokens.get(1)); @@ -366,7 +365,7 @@ public class TokenizerTestCase { IndexFacts facts = new IndexFacts(new IndexModel(sd)); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); - IndexFacts.Session session = facts.newSession(); + IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*", session); assertEquals(new Token(WORD, "normal"), tokens.get(0)); assertEquals(new Token(SPACE, " "), tokens.get(1)); @@ -403,7 +402,7 @@ public class TokenizerTestCase { IndexFacts facts = new IndexFacts(new IndexModel(sd)); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); - IndexFacts.Session session = facts.newSession(); + IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:!/%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*&b:", session); assertEquals(new Token(WORD, "normal"), tokens.get(0)); assertEquals(new Token(SPACE, " "), tokens.get(1)); @@ -440,7 +439,7 @@ public class TokenizerTestCase { sd.addIndex(index2); IndexFacts indexFacts = new IndexFacts(new IndexModel(sd)); - IndexFacts.Session facts = indexFacts.newSession(); + IndexFacts.Session facts = indexFacts.newSession(Collections.emptySet(), Collections.emptySet()); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:foo) testexact2:bar", facts); diff --git a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java index dbcb393c922..e6c5a18c9da 100644 --- a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java @@ -15,12 +15,8 @@ import org.junit.jupiter.api.Test; import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.Set; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; /** * Tests using synthetic index names for IndexFacts class. @@ -184,7 +180,7 @@ public class IndexFactsTestCase { query.getModel().getSources().add("one"); query.getModel().getRestrict().add("two"); - IndexFacts.Session indexFacts = createIndexFacts().newSession(List.of("clusterOne"), Set.of()); + IndexFacts.Session indexFacts = createIndexFacts().newSession(List.of("clusterOne"), List.of()); assertTrue(indexFacts.isIndex("a")); assertFalse(indexFacts.isIndex("b")); assertTrue(indexFacts.isIndex("d")); |