Revert "- HashMap over TreeMap when order des not matter."

This reverts commit b1733875a7303d71abfe384da2d6589af742d779.
author: Bjørn Christian Seime <bjorncs@yahooinc.com> 2023-04-11 16:13:31 +0200
committer: Bjørn Christian Seime <bjorncs@yahooinc.com> 2023-04-11 16:13:31 +0200
commit: 760169f47990044d9d767df527007e1f26a4f1cf (patch)
tree: d6c0b8348aa7c9c7d5362f58d931d9f54087730a
parent: 724ab1e6c2852cd17ce0c0c01151309e34dcf18c (diff)
6 files changed, 84 insertions, 57 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
index 88a37ea5a02..92ce6abb319 100644
--- a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
+++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
@@ -6,11 +6,11 @@ import com.yahoo.search.Query;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeSet;
 
 import static com.yahoo.text.Lowercase.toLowerCase;
 
@@ -32,6 +32,16 @@ public class IndexFacts {
 
     private Map<String, List<String>> clusterByDocument;
 
+    private static class DocumentTypeListOffset {
+        public final int offset;
+        public final SearchDefinition searchDefinition;
+
+        public DocumentTypeListOffset(int offset, SearchDefinition searchDefinition) {
+            this.offset = offset;
+            this.searchDefinition = searchDefinition;
+        }
+    }
+
     /** A Map of all known search definitions indexed by name */
     private Map<String, SearchDefinition> searchDefinitions = new LinkedHashMap<>();
 
@@ -100,32 +110,34 @@ public class IndexFacts {
     private boolean isIndexFromDocumentTypes(String indexName, List<String> documentTypes) {
         if ( ! isInitialized()) return true;
 
-        if (documentTypes.isEmpty()) return unionSearchDefinition.getIndex(indexName) != null;
+        if (documentTypes.isEmpty()) {
+            return unionSearchDefinition.getIndex(indexName) != null;
+        }
 
-        for (String docName : documentTypes) {
-            SearchDefinition sd = searchDefinitions.get(docName);
-            if (sd != null) {
-                Index index = sd.getIndex(indexName);
-                if (index != null) return true;
+        DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+        while (sd != null) {
+            Index index = sd.searchDefinition.getIndex(indexName);
+            if (index != null) {
+                return true;
             }
+            sd = chooseSearchDefinition(documentTypes, sd.offset);
         }
+
         return false;
     }
 
     private String getCanonicNameFromDocumentTypes(String indexName, List<String> documentTypes) {
         if (!isInitialized()) return indexName;
-        String lowerCased = toLowerCase(indexName);
 
         if (documentTypes.isEmpty()) {
-            Index index = unionSearchDefinition.getIndexByLowerCase(lowerCased);
+            Index index = unionSearchDefinition.getIndexByLowerCase(toLowerCase(indexName));
             return index == null ? indexName : index.getName();
         }
-        for (String docName : documentTypes) {
-            SearchDefinition sd = searchDefinitions.get(docName);
-            if (sd != null) {
-                Index index = sd.getIndexByLowerCase(lowerCased);
-                if (index != null) return index.getName();
-            }
+        DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+        while (sd != null) {
+            Index index = sd.searchDefinition.getIndexByLowerCase(toLowerCase(indexName));
+            if (index != null) return index.getName();
+            sd = chooseSearchDefinition(documentTypes, sd.offset);
         }
         return indexName;
     }
@@ -146,12 +158,13 @@ public class IndexFacts {
             return index;
         }
 
-        for (String docName : documentTypes) {
-            SearchDefinition sd = searchDefinitions.get(docName);
-            if (sd != null) {
-                Index index = sd.getIndex(canonicName);
-                if (index != null) return index;
-            }
+        DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+
+        while (sd != null) {
+            Index index = sd.searchDefinition.getIndex(canonicName);
+
+            if (index != null) return index;
+            sd = chooseSearchDefinition(documentTypes, sd.offset);
         }
         return Index.nullIndex;
     }
@@ -174,7 +187,7 @@ public class IndexFacts {
      * Given a search list which is a mixture of document types and cluster
      * names, and a restrict list which is a list of document types, return a
      * set of all valid document types for this combination. Most use-cases for
-     * fetching index settings will involve calling this method with the
+     * fetching index settings will involve calling this method with the the
      * incoming query's {@link com.yahoo.search.query.Model#getSources()} and
      * {@link com.yahoo.search.query.Model#getRestrict()} as input parameters
      * before calling any other method of this class.
@@ -183,20 +196,20 @@ public class IndexFacts {
      * @param restrict the restrict list for a query
      * @return a (possibly empty) set of valid document types
      */
-    private Set<String> resolveDocumentTypes(Collection<String> sources, Set<String> restrict,
+    private Set<String> resolveDocumentTypes(Collection<String> sources, Collection<String> restrict,
                                              Set<String> candidateDocumentTypes) {
         sources = emptyCollectionIfNull(sources);
-        restrict = emptySetIfNull(restrict);
+        restrict = emptyCollectionIfNull(restrict);
 
         if (sources.isEmpty()) {
             if ( ! restrict.isEmpty()) {
-                return Set.copyOf(restrict);
+                return new TreeSet<>(restrict);
             } else {
                 return candidateDocumentTypes;
             }
         }
 
-        Set<String> toSearch = new HashSet<>();
+        Set<String> toSearch = new TreeSet<>();
         for (String source : sources) { // source: a document type or a cluster containing them
             List<String> clusterDocTypes = clusters.get(source);
             if (clusterDocTypes == null) { // source was a document type
@@ -222,8 +235,21 @@ public class IndexFacts {
     private Collection<String> emptyCollectionIfNull(Collection<String> collection) {
         return collection == null ? List.of() : collection;
     }
-    private Set<String> emptySetIfNull(Set<String> collection) {
-        return collection == null ? Set.of() : collection;
+
+    /**
+     * Chooses the correct search definition, default if in doubt.
+     *
+     * @return the search definition to use
+     */
+    private DocumentTypeListOffset chooseSearchDefinition(List<String> documentTypes, int index) {
+        while (index < documentTypes.size()) {
+            String docName = documentTypes.get(index++);
+            SearchDefinition sd = searchDefinitions.get(docName);
+            if (sd != null) {
+                return new DocumentTypeListOffset(index, sd);
+            }
+        }
+        return null;
     }
 
     /**
@@ -253,6 +279,10 @@ public class IndexFacts {
         return frozen;
     }
 
+    private void ensureNotFrozen() {
+        if (frozen) throw new IllegalStateException("Tried to modify frozen IndexFacts instance.");
+    }
+
     public String getDefaultPosition(String sdName) {
         SearchDefinition sd;
         if (sdName == null) {
@@ -270,16 +300,12 @@ public class IndexFacts {
         return new Session(query);
     }
 
-    public Session newSession() {
-        return new Session(Set.of(), Set.of());
-    }
-
-    public Session newSession(Collection<String> sources, Set<String> restrict) {
+    public Session newSession(Collection<String> sources, Collection<String> restrict) {
         return new Session(sources, restrict);
     }
 
     public Session newSession(Collection<String> sources,
-                              Set<String> restrict,
+                              Collection<String> restrict,
                               Set<String> candidateDocumentTypes) {
         return new Session(sources, restrict, candidateDocumentTypes);
     }
@@ -297,12 +323,12 @@ public class IndexFacts {
             documentTypes = List.copyOf(resolveDocumentTypes(query));
         }
 
-        private Session(Collection<String> sources, Set<String> restrict) {
+        private Session(Collection<String> sources, Collection<String> restrict) {
             // Assumption: Search definition name equals document name.
             documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, searchDefinitions.keySet()));
         }
 
-        private Session(Collection<String> sources, Set<String> restrict, Set<String> candidateDocumentTypes) {
+        private Session(Collection<String> sources, Collection<String> restrict, Set<String> candidateDocumentTypes) {
             documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, candidateDocumentTypes));
         }
 
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
index 0bda1ce75ad..46332d632fe 100644
--- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
@@ -29,6 +29,7 @@ import com.yahoo.yolean.Exceptions;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -305,7 +306,7 @@ public class ClusterSearcher extends Searcher {
             Set<String> sources = query.getModel().getSources();
             return (sources == null || sources.isEmpty())
                     ? schemas
-                    : new HashSet<>(indexFacts.newSession(sources, Set.of(), schemas).documentTypes());
+                    : new HashSet<>(indexFacts.newSession(sources, Collections.emptyList(), schemas).documentTypes());
         } else {
             return filterValidDocumentTypes(restrict);
         }
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java
index 2bd408220cd..e3b2278475b 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java
@@ -6,6 +6,7 @@ import com.yahoo.prelude.IndexFacts;
 import com.yahoo.prelude.query.Item;
 import com.yahoo.search.query.parser.Parser;
 
+import java.util.Collections;
 import java.util.Set;
 
 /**
@@ -22,7 +23,7 @@ public interface CustomParser extends Parser {
                        Set<String> toSearch, IndexFacts indexFacts, String defaultIndexName) {
         if (indexFacts == null)
             indexFacts = new IndexFacts();
-        return parse(queryToParse, filterToParse, parsingLanguage, indexFacts.newSession(toSearch, Set.of()), defaultIndexName);
+        return parse(queryToParse, filterToParse, parsingLanguage, indexFacts.newSession(toSearch, Collections.emptySet()), defaultIndexName);
     }
 
     Item parse(String queryToParse, String filterToParse, Language parsingLanguage,
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
index 9952ec64d13..c1d415b8e27 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
@@ -8,6 +8,7 @@ import com.yahoo.prelude.Index;
 import com.yahoo.prelude.IndexFacts;
 import com.yahoo.prelude.query.Substring;
 
+import java.util.Collections;
 import java.util.List;
 
 import static com.yahoo.prelude.query.parser.Token.Kind.*;
@@ -62,7 +63,7 @@ public final class Tokenizer {
      * @return a read-only list of tokens. This list can only be used by this thread
      */
     public List<Token> tokenize(String string) {
-        return tokenize(string, new IndexFacts().newSession());
+        return tokenize(string, new IndexFacts().newSession(Collections.emptySet(), Collections.emptySet()));
     }
 
     /**
@@ -170,10 +171,13 @@ public final class Tokenizer {
         // this is a heuristic to check whether we probably have reached the end of an URL element
         for (int i = tokens.size() - 1; i >= 0; --i) {
             switch (tokens.get(i).kind) {
-                case COLON -> { if (i == indexLastExplicitlyChangedAt) return false; }
-                case SPACE -> { return true; }
-                default -> { }
-                // do nothing
+                case COLON:
+                    if (i == indexLastExplicitlyChangedAt) return false;
+                    break;
+                case SPACE:
+                    return true;
+                default:
+                    // do nothing
             }
         }
         // really not sure whether we should choose false instead, on cause of the guard at
diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java
index 3a6be1521e2..1ff5574ec03 100644
--- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java
@@ -13,6 +13,7 @@ import com.yahoo.prelude.query.parser.Tokenizer;
 import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 import static com.yahoo.prelude.query.parser.Token.Kind.COLON;
@@ -28,9 +29,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.SPACE;
 import static com.yahoo.prelude.query.parser.Token.Kind.STAR;
 import static com.yahoo.prelude.query.parser.Token.Kind.UNDERSCORE;
 import static com.yahoo.prelude.query.parser.Token.Kind.WORD;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 /**
  * Tests the tokenizer
@@ -284,7 +283,7 @@ public class TokenizerTestCase {
         sd.addIndex(index2);
 
         IndexFacts facts = new IndexFacts(new IndexModel(sd));
-        IndexFacts.Session session = facts.newSession();
+        IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet());
         Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
         List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*& b:c", "default", session);
         // tokenizer.print();
@@ -329,7 +328,7 @@ public class TokenizerTestCase {
 
         IndexFacts facts = new IndexFacts(new IndexModel(sd));
         Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
-        IndexFacts.Session session = facts.newSession();
+        IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet());
         List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*&", session);
         assertEquals(new Token(WORD, "normal"), tokens.get(0));
         assertEquals(new Token(SPACE, " "), tokens.get(1));
@@ -366,7 +365,7 @@ public class TokenizerTestCase {
 
         IndexFacts facts = new IndexFacts(new IndexModel(sd));
         Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
-        IndexFacts.Session session = facts.newSession();
+        IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet());
         List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*", session);
         assertEquals(new Token(WORD, "normal"), tokens.get(0));
         assertEquals(new Token(SPACE, " "), tokens.get(1));
@@ -403,7 +402,7 @@ public class TokenizerTestCase {
 
         IndexFacts facts = new IndexFacts(new IndexModel(sd));
         Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
-        IndexFacts.Session session = facts.newSession();
+        IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet());
         List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:!/%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*&b:", session);
         assertEquals(new Token(WORD, "normal"), tokens.get(0));
         assertEquals(new Token(SPACE, " "), tokens.get(1));
@@ -440,7 +439,7 @@ public class TokenizerTestCase {
         sd.addIndex(index2);
 
         IndexFacts indexFacts = new IndexFacts(new IndexModel(sd));
-        IndexFacts.Session facts = indexFacts.newSession();
+        IndexFacts.Session facts = indexFacts.newSession(Collections.emptySet(), Collections.emptySet());
 
         Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
         List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:foo) testexact2:bar", facts);
diff --git a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java
index dbcb393c922..e6c5a18c9da 100644
--- a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java
@@ -15,12 +15,8 @@ import org.junit.jupiter.api.Test;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 /**
  * Tests using synthetic index names for IndexFacts class.
@@ -184,7 +180,7 @@ public class IndexFactsTestCase {
         query.getModel().getSources().add("one");
         query.getModel().getRestrict().add("two");
 
-        IndexFacts.Session indexFacts = createIndexFacts().newSession(List.of("clusterOne"), Set.of());
+        IndexFacts.Session indexFacts = createIndexFacts().newSession(List.of("clusterOne"), List.of());
         assertTrue(indexFacts.isIndex("a"));
         assertFalse(indexFacts.isIndex("b"));
         assertTrue(indexFacts.isIndex("d"));
author	Bjørn Christian Seime <bjorncs@yahooinc.com>	2023-04-11 16:13:31 +0200
committer	Bjørn Christian Seime <bjorncs@yahooinc.com>	2023-04-11 16:13:31 +0200
commit	760169f47990044d9d767df527007e1f26a4f1cf (patch)
tree	d6c0b8348aa7c9c7d5362f58d931d9f54087730a
parent	724ab1e6c2852cd17ce0c0c01151309e34dcf18c (diff)