From 760169f47990044d9d767df527007e1f26a4f1cf Mon Sep 17 00:00:00 2001 From: Bjørn Christian Seime Date: Tue, 11 Apr 2023 16:13:31 +0200 Subject: Revert "- HashMap over TreeMap when order des not matter." This reverts commit b1733875a7303d71abfe384da2d6589af742d779. --- .../main/java/com/yahoo/prelude/IndexFacts.java | 98 ++++++++++++++-------- .../com/yahoo/prelude/cluster/ClusterSearcher.java | 3 +- .../yahoo/prelude/query/parser/CustomParser.java | 3 +- .../com/yahoo/prelude/query/parser/Tokenizer.java | 14 ++-- .../query/parser/test/TokenizerTestCase.java | 15 ++-- .../com/yahoo/prelude/test/IndexFactsTestCase.java | 8 +- 6 files changed, 84 insertions(+), 57 deletions(-) (limited to 'container-search') diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java index 88a37ea5a02..92ce6abb319 100644 --- a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java +++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java @@ -6,11 +6,11 @@ import com.yahoo.search.Query; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeSet; import static com.yahoo.text.Lowercase.toLowerCase; @@ -32,6 +32,16 @@ public class IndexFacts { private Map> clusterByDocument; + private static class DocumentTypeListOffset { + public final int offset; + public final SearchDefinition searchDefinition; + + public DocumentTypeListOffset(int offset, SearchDefinition searchDefinition) { + this.offset = offset; + this.searchDefinition = searchDefinition; + } + } + /** A Map of all known search definitions indexed by name */ private Map searchDefinitions = new LinkedHashMap<>(); @@ -100,32 +110,34 @@ public class IndexFacts { private boolean isIndexFromDocumentTypes(String indexName, List documentTypes) { if ( ! isInitialized()) return true; - if (documentTypes.isEmpty()) return unionSearchDefinition.getIndex(indexName) != null; + if (documentTypes.isEmpty()) { + return unionSearchDefinition.getIndex(indexName) != null; + } - for (String docName : documentTypes) { - SearchDefinition sd = searchDefinitions.get(docName); - if (sd != null) { - Index index = sd.getIndex(indexName); - if (index != null) return true; + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + while (sd != null) { + Index index = sd.searchDefinition.getIndex(indexName); + if (index != null) { + return true; } + sd = chooseSearchDefinition(documentTypes, sd.offset); } + return false; } private String getCanonicNameFromDocumentTypes(String indexName, List documentTypes) { if (!isInitialized()) return indexName; - String lowerCased = toLowerCase(indexName); if (documentTypes.isEmpty()) { - Index index = unionSearchDefinition.getIndexByLowerCase(lowerCased); + Index index = unionSearchDefinition.getIndexByLowerCase(toLowerCase(indexName)); return index == null ? indexName : index.getName(); } - for (String docName : documentTypes) { - SearchDefinition sd = searchDefinitions.get(docName); - if (sd != null) { - Index index = sd.getIndexByLowerCase(lowerCased); - if (index != null) return index.getName(); - } + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + while (sd != null) { + Index index = sd.searchDefinition.getIndexByLowerCase(toLowerCase(indexName)); + if (index != null) return index.getName(); + sd = chooseSearchDefinition(documentTypes, sd.offset); } return indexName; } @@ -146,12 +158,13 @@ public class IndexFacts { return index; } - for (String docName : documentTypes) { - SearchDefinition sd = searchDefinitions.get(docName); - if (sd != null) { - Index index = sd.getIndex(canonicName); - if (index != null) return index; - } + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + + while (sd != null) { + Index index = sd.searchDefinition.getIndex(canonicName); + + if (index != null) return index; + sd = chooseSearchDefinition(documentTypes, sd.offset); } return Index.nullIndex; } @@ -174,7 +187,7 @@ public class IndexFacts { * Given a search list which is a mixture of document types and cluster * names, and a restrict list which is a list of document types, return a * set of all valid document types for this combination. Most use-cases for - * fetching index settings will involve calling this method with the + * fetching index settings will involve calling this method with the the * incoming query's {@link com.yahoo.search.query.Model#getSources()} and * {@link com.yahoo.search.query.Model#getRestrict()} as input parameters * before calling any other method of this class. @@ -183,20 +196,20 @@ public class IndexFacts { * @param restrict the restrict list for a query * @return a (possibly empty) set of valid document types */ - private Set resolveDocumentTypes(Collection sources, Set restrict, + private Set resolveDocumentTypes(Collection sources, Collection restrict, Set candidateDocumentTypes) { sources = emptyCollectionIfNull(sources); - restrict = emptySetIfNull(restrict); + restrict = emptyCollectionIfNull(restrict); if (sources.isEmpty()) { if ( ! restrict.isEmpty()) { - return Set.copyOf(restrict); + return new TreeSet<>(restrict); } else { return candidateDocumentTypes; } } - Set toSearch = new HashSet<>(); + Set toSearch = new TreeSet<>(); for (String source : sources) { // source: a document type or a cluster containing them List clusterDocTypes = clusters.get(source); if (clusterDocTypes == null) { // source was a document type @@ -222,8 +235,21 @@ public class IndexFacts { private Collection emptyCollectionIfNull(Collection collection) { return collection == null ? List.of() : collection; } - private Set emptySetIfNull(Set collection) { - return collection == null ? Set.of() : collection; + + /** + * Chooses the correct search definition, default if in doubt. + * + * @return the search definition to use + */ + private DocumentTypeListOffset chooseSearchDefinition(List documentTypes, int index) { + while (index < documentTypes.size()) { + String docName = documentTypes.get(index++); + SearchDefinition sd = searchDefinitions.get(docName); + if (sd != null) { + return new DocumentTypeListOffset(index, sd); + } + } + return null; } /** @@ -253,6 +279,10 @@ public class IndexFacts { return frozen; } + private void ensureNotFrozen() { + if (frozen) throw new IllegalStateException("Tried to modify frozen IndexFacts instance."); + } + public String getDefaultPosition(String sdName) { SearchDefinition sd; if (sdName == null) { @@ -270,16 +300,12 @@ public class IndexFacts { return new Session(query); } - public Session newSession() { - return new Session(Set.of(), Set.of()); - } - - public Session newSession(Collection sources, Set restrict) { + public Session newSession(Collection sources, Collection restrict) { return new Session(sources, restrict); } public Session newSession(Collection sources, - Set restrict, + Collection restrict, Set candidateDocumentTypes) { return new Session(sources, restrict, candidateDocumentTypes); } @@ -297,12 +323,12 @@ public class IndexFacts { documentTypes = List.copyOf(resolveDocumentTypes(query)); } - private Session(Collection sources, Set restrict) { + private Session(Collection sources, Collection restrict) { // Assumption: Search definition name equals document name. documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, searchDefinitions.keySet())); } - private Session(Collection sources, Set restrict, Set candidateDocumentTypes) { + private Session(Collection sources, Collection restrict, Set candidateDocumentTypes) { documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, candidateDocumentTypes)); } diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java index 0bda1ce75ad..46332d632fe 100644 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java @@ -29,6 +29,7 @@ import com.yahoo.yolean.Exceptions; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; @@ -305,7 +306,7 @@ public class ClusterSearcher extends Searcher { Set sources = query.getModel().getSources(); return (sources == null || sources.isEmpty()) ? schemas - : new HashSet<>(indexFacts.newSession(sources, Set.of(), schemas).documentTypes()); + : new HashSet<>(indexFacts.newSession(sources, Collections.emptyList(), schemas).documentTypes()); } else { return filterValidDocumentTypes(restrict); } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java index 2bd408220cd..e3b2278475b 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java @@ -6,6 +6,7 @@ import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.query.Item; import com.yahoo.search.query.parser.Parser; +import java.util.Collections; import java.util.Set; /** @@ -22,7 +23,7 @@ public interface CustomParser extends Parser { Set toSearch, IndexFacts indexFacts, String defaultIndexName) { if (indexFacts == null) indexFacts = new IndexFacts(); - return parse(queryToParse, filterToParse, parsingLanguage, indexFacts.newSession(toSearch, Set.of()), defaultIndexName); + return parse(queryToParse, filterToParse, parsingLanguage, indexFacts.newSession(toSearch, Collections.emptySet()), defaultIndexName); } Item parse(String queryToParse, String filterToParse, Language parsingLanguage, diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java index 9952ec64d13..c1d415b8e27 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java @@ -8,6 +8,7 @@ import com.yahoo.prelude.Index; import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.query.Substring; +import java.util.Collections; import java.util.List; import static com.yahoo.prelude.query.parser.Token.Kind.*; @@ -62,7 +63,7 @@ public final class Tokenizer { * @return a read-only list of tokens. This list can only be used by this thread */ public List tokenize(String string) { - return tokenize(string, new IndexFacts().newSession()); + return tokenize(string, new IndexFacts().newSession(Collections.emptySet(), Collections.emptySet())); } /** @@ -170,10 +171,13 @@ public final class Tokenizer { // this is a heuristic to check whether we probably have reached the end of an URL element for (int i = tokens.size() - 1; i >= 0; --i) { switch (tokens.get(i).kind) { - case COLON -> { if (i == indexLastExplicitlyChangedAt) return false; } - case SPACE -> { return true; } - default -> { } - // do nothing + case COLON: + if (i == indexLastExplicitlyChangedAt) return false; + break; + case SPACE: + return true; + default: + // do nothing } } // really not sure whether we should choose false instead, on cause of the guard at diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java index 3a6be1521e2..1ff5574ec03 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java @@ -13,6 +13,7 @@ import com.yahoo.prelude.query.parser.Tokenizer; import org.junit.jupiter.api.Test; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import static com.yahoo.prelude.query.parser.Token.Kind.COLON; @@ -28,9 +29,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.SPACE; import static com.yahoo.prelude.query.parser.Token.Kind.STAR; import static com.yahoo.prelude.query.parser.Token.Kind.UNDERSCORE; import static com.yahoo.prelude.query.parser.Token.Kind.WORD; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; /** * Tests the tokenizer @@ -284,7 +283,7 @@ public class TokenizerTestCase { sd.addIndex(index2); IndexFacts facts = new IndexFacts(new IndexModel(sd)); - IndexFacts.Session session = facts.newSession(); + IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); List tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*& b:c", "default", session); // tokenizer.print(); @@ -329,7 +328,7 @@ public class TokenizerTestCase { IndexFacts facts = new IndexFacts(new IndexModel(sd)); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); - IndexFacts.Session session = facts.newSession(); + IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); List tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*&", session); assertEquals(new Token(WORD, "normal"), tokens.get(0)); assertEquals(new Token(SPACE, " "), tokens.get(1)); @@ -366,7 +365,7 @@ public class TokenizerTestCase { IndexFacts facts = new IndexFacts(new IndexModel(sd)); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); - IndexFacts.Session session = facts.newSession(); + IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); List tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*", session); assertEquals(new Token(WORD, "normal"), tokens.get(0)); assertEquals(new Token(SPACE, " "), tokens.get(1)); @@ -403,7 +402,7 @@ public class TokenizerTestCase { IndexFacts facts = new IndexFacts(new IndexModel(sd)); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); - IndexFacts.Session session = facts.newSession(); + IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); List tokens = tokenizer.tokenize("normal a:b (normal testexact1:!/%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*&b:", session); assertEquals(new Token(WORD, "normal"), tokens.get(0)); assertEquals(new Token(SPACE, " "), tokens.get(1)); @@ -440,7 +439,7 @@ public class TokenizerTestCase { sd.addIndex(index2); IndexFacts indexFacts = new IndexFacts(new IndexModel(sd)); - IndexFacts.Session facts = indexFacts.newSession(); + IndexFacts.Session facts = indexFacts.newSession(Collections.emptySet(), Collections.emptySet()); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); List tokens = tokenizer.tokenize("normal a:b (normal testexact1:foo) testexact2:bar", facts); diff --git a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java index dbcb393c922..e6c5a18c9da 100644 --- a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java @@ -15,12 +15,8 @@ import org.junit.jupiter.api.Test; import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.Set; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; /** * Tests using synthetic index names for IndexFacts class. @@ -184,7 +180,7 @@ public class IndexFactsTestCase { query.getModel().getSources().add("one"); query.getModel().getRestrict().add("two"); - IndexFacts.Session indexFacts = createIndexFacts().newSession(List.of("clusterOne"), Set.of()); + IndexFacts.Session indexFacts = createIndexFacts().newSession(List.of("clusterOne"), List.of()); assertTrue(indexFacts.isIndex("a")); assertFalse(indexFacts.isIndex("b")); assertTrue(indexFacts.isIndex("d")); -- cgit v1.2.3