summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-04-11 16:13:31 +0200
committerBjørn Christian Seime <bjorncs@yahooinc.com>2023-04-11 16:13:31 +0200
commit760169f47990044d9d767df527007e1f26a4f1cf (patch)
treed6c0b8348aa7c9c7d5362f58d931d9f54087730a
parent724ab1e6c2852cd17ce0c0c01151309e34dcf18c (diff)
Revert "- HashMap over TreeMap when order des not matter."
This reverts commit b1733875a7303d71abfe384da2d6589af742d779.
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/IndexFacts.java98
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java3
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java3
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java14
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java15
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java8
6 files changed, 84 insertions, 57 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
index 88a37ea5a02..92ce6abb319 100644
--- a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
+++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
@@ -6,11 +6,11 @@ import com.yahoo.search.Query;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeSet;
import static com.yahoo.text.Lowercase.toLowerCase;
@@ -32,6 +32,16 @@ public class IndexFacts {
private Map<String, List<String>> clusterByDocument;
+ private static class DocumentTypeListOffset {
+ public final int offset;
+ public final SearchDefinition searchDefinition;
+
+ public DocumentTypeListOffset(int offset, SearchDefinition searchDefinition) {
+ this.offset = offset;
+ this.searchDefinition = searchDefinition;
+ }
+ }
+
/** A Map of all known search definitions indexed by name */
private Map<String, SearchDefinition> searchDefinitions = new LinkedHashMap<>();
@@ -100,32 +110,34 @@ public class IndexFacts {
private boolean isIndexFromDocumentTypes(String indexName, List<String> documentTypes) {
if ( ! isInitialized()) return true;
- if (documentTypes.isEmpty()) return unionSearchDefinition.getIndex(indexName) != null;
+ if (documentTypes.isEmpty()) {
+ return unionSearchDefinition.getIndex(indexName) != null;
+ }
- for (String docName : documentTypes) {
- SearchDefinition sd = searchDefinitions.get(docName);
- if (sd != null) {
- Index index = sd.getIndex(indexName);
- if (index != null) return true;
+ DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+ while (sd != null) {
+ Index index = sd.searchDefinition.getIndex(indexName);
+ if (index != null) {
+ return true;
}
+ sd = chooseSearchDefinition(documentTypes, sd.offset);
}
+
return false;
}
private String getCanonicNameFromDocumentTypes(String indexName, List<String> documentTypes) {
if (!isInitialized()) return indexName;
- String lowerCased = toLowerCase(indexName);
if (documentTypes.isEmpty()) {
- Index index = unionSearchDefinition.getIndexByLowerCase(lowerCased);
+ Index index = unionSearchDefinition.getIndexByLowerCase(toLowerCase(indexName));
return index == null ? indexName : index.getName();
}
- for (String docName : documentTypes) {
- SearchDefinition sd = searchDefinitions.get(docName);
- if (sd != null) {
- Index index = sd.getIndexByLowerCase(lowerCased);
- if (index != null) return index.getName();
- }
+ DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+ while (sd != null) {
+ Index index = sd.searchDefinition.getIndexByLowerCase(toLowerCase(indexName));
+ if (index != null) return index.getName();
+ sd = chooseSearchDefinition(documentTypes, sd.offset);
}
return indexName;
}
@@ -146,12 +158,13 @@ public class IndexFacts {
return index;
}
- for (String docName : documentTypes) {
- SearchDefinition sd = searchDefinitions.get(docName);
- if (sd != null) {
- Index index = sd.getIndex(canonicName);
- if (index != null) return index;
- }
+ DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+
+ while (sd != null) {
+ Index index = sd.searchDefinition.getIndex(canonicName);
+
+ if (index != null) return index;
+ sd = chooseSearchDefinition(documentTypes, sd.offset);
}
return Index.nullIndex;
}
@@ -174,7 +187,7 @@ public class IndexFacts {
* Given a search list which is a mixture of document types and cluster
* names, and a restrict list which is a list of document types, return a
* set of all valid document types for this combination. Most use-cases for
- * fetching index settings will involve calling this method with the
+ * fetching index settings will involve calling this method with the the
* incoming query's {@link com.yahoo.search.query.Model#getSources()} and
* {@link com.yahoo.search.query.Model#getRestrict()} as input parameters
* before calling any other method of this class.
@@ -183,20 +196,20 @@ public class IndexFacts {
* @param restrict the restrict list for a query
* @return a (possibly empty) set of valid document types
*/
- private Set<String> resolveDocumentTypes(Collection<String> sources, Set<String> restrict,
+ private Set<String> resolveDocumentTypes(Collection<String> sources, Collection<String> restrict,
Set<String> candidateDocumentTypes) {
sources = emptyCollectionIfNull(sources);
- restrict = emptySetIfNull(restrict);
+ restrict = emptyCollectionIfNull(restrict);
if (sources.isEmpty()) {
if ( ! restrict.isEmpty()) {
- return Set.copyOf(restrict);
+ return new TreeSet<>(restrict);
} else {
return candidateDocumentTypes;
}
}
- Set<String> toSearch = new HashSet<>();
+ Set<String> toSearch = new TreeSet<>();
for (String source : sources) { // source: a document type or a cluster containing them
List<String> clusterDocTypes = clusters.get(source);
if (clusterDocTypes == null) { // source was a document type
@@ -222,8 +235,21 @@ public class IndexFacts {
private Collection<String> emptyCollectionIfNull(Collection<String> collection) {
return collection == null ? List.of() : collection;
}
- private Set<String> emptySetIfNull(Set<String> collection) {
- return collection == null ? Set.of() : collection;
+
+ /**
+ * Chooses the correct search definition, default if in doubt.
+ *
+ * @return the search definition to use
+ */
+ private DocumentTypeListOffset chooseSearchDefinition(List<String> documentTypes, int index) {
+ while (index < documentTypes.size()) {
+ String docName = documentTypes.get(index++);
+ SearchDefinition sd = searchDefinitions.get(docName);
+ if (sd != null) {
+ return new DocumentTypeListOffset(index, sd);
+ }
+ }
+ return null;
}
/**
@@ -253,6 +279,10 @@ public class IndexFacts {
return frozen;
}
+ private void ensureNotFrozen() {
+ if (frozen) throw new IllegalStateException("Tried to modify frozen IndexFacts instance.");
+ }
+
public String getDefaultPosition(String sdName) {
SearchDefinition sd;
if (sdName == null) {
@@ -270,16 +300,12 @@ public class IndexFacts {
return new Session(query);
}
- public Session newSession() {
- return new Session(Set.of(), Set.of());
- }
-
- public Session newSession(Collection<String> sources, Set<String> restrict) {
+ public Session newSession(Collection<String> sources, Collection<String> restrict) {
return new Session(sources, restrict);
}
public Session newSession(Collection<String> sources,
- Set<String> restrict,
+ Collection<String> restrict,
Set<String> candidateDocumentTypes) {
return new Session(sources, restrict, candidateDocumentTypes);
}
@@ -297,12 +323,12 @@ public class IndexFacts {
documentTypes = List.copyOf(resolveDocumentTypes(query));
}
- private Session(Collection<String> sources, Set<String> restrict) {
+ private Session(Collection<String> sources, Collection<String> restrict) {
// Assumption: Search definition name equals document name.
documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, searchDefinitions.keySet()));
}
- private Session(Collection<String> sources, Set<String> restrict, Set<String> candidateDocumentTypes) {
+ private Session(Collection<String> sources, Collection<String> restrict, Set<String> candidateDocumentTypes) {
documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, candidateDocumentTypes));
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
index 0bda1ce75ad..46332d632fe 100644
--- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
@@ -29,6 +29,7 @@ import com.yahoo.yolean.Exceptions;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
@@ -305,7 +306,7 @@ public class ClusterSearcher extends Searcher {
Set<String> sources = query.getModel().getSources();
return (sources == null || sources.isEmpty())
? schemas
- : new HashSet<>(indexFacts.newSession(sources, Set.of(), schemas).documentTypes());
+ : new HashSet<>(indexFacts.newSession(sources, Collections.emptyList(), schemas).documentTypes());
} else {
return filterValidDocumentTypes(restrict);
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java
index 2bd408220cd..e3b2278475b 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java
@@ -6,6 +6,7 @@ import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.query.Item;
import com.yahoo.search.query.parser.Parser;
+import java.util.Collections;
import java.util.Set;
/**
@@ -22,7 +23,7 @@ public interface CustomParser extends Parser {
Set<String> toSearch, IndexFacts indexFacts, String defaultIndexName) {
if (indexFacts == null)
indexFacts = new IndexFacts();
- return parse(queryToParse, filterToParse, parsingLanguage, indexFacts.newSession(toSearch, Set.of()), defaultIndexName);
+ return parse(queryToParse, filterToParse, parsingLanguage, indexFacts.newSession(toSearch, Collections.emptySet()), defaultIndexName);
}
Item parse(String queryToParse, String filterToParse, Language parsingLanguage,
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
index 9952ec64d13..c1d415b8e27 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
@@ -8,6 +8,7 @@ import com.yahoo.prelude.Index;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.query.Substring;
+import java.util.Collections;
import java.util.List;
import static com.yahoo.prelude.query.parser.Token.Kind.*;
@@ -62,7 +63,7 @@ public final class Tokenizer {
* @return a read-only list of tokens. This list can only be used by this thread
*/
public List<Token> tokenize(String string) {
- return tokenize(string, new IndexFacts().newSession());
+ return tokenize(string, new IndexFacts().newSession(Collections.emptySet(), Collections.emptySet()));
}
/**
@@ -170,10 +171,13 @@ public final class Tokenizer {
// this is a heuristic to check whether we probably have reached the end of an URL element
for (int i = tokens.size() - 1; i >= 0; --i) {
switch (tokens.get(i).kind) {
- case COLON -> { if (i == indexLastExplicitlyChangedAt) return false; }
- case SPACE -> { return true; }
- default -> { }
- // do nothing
+ case COLON:
+ if (i == indexLastExplicitlyChangedAt) return false;
+ break;
+ case SPACE:
+ return true;
+ default:
+ // do nothing
}
}
// really not sure whether we should choose false instead, on cause of the guard at
diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java
index 3a6be1521e2..1ff5574ec03 100644
--- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java
@@ -13,6 +13,7 @@ import com.yahoo.prelude.query.parser.Tokenizer;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import static com.yahoo.prelude.query.parser.Token.Kind.COLON;
@@ -28,9 +29,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.SPACE;
import static com.yahoo.prelude.query.parser.Token.Kind.STAR;
import static com.yahoo.prelude.query.parser.Token.Kind.UNDERSCORE;
import static com.yahoo.prelude.query.parser.Token.Kind.WORD;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
/**
* Tests the tokenizer
@@ -284,7 +283,7 @@ public class TokenizerTestCase {
sd.addIndex(index2);
IndexFacts facts = new IndexFacts(new IndexModel(sd));
- IndexFacts.Session session = facts.newSession();
+ IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet());
Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*& b:c", "default", session);
// tokenizer.print();
@@ -329,7 +328,7 @@ public class TokenizerTestCase {
IndexFacts facts = new IndexFacts(new IndexModel(sd));
Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
- IndexFacts.Session session = facts.newSession();
+ IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet());
List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*&", session);
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
@@ -366,7 +365,7 @@ public class TokenizerTestCase {
IndexFacts facts = new IndexFacts(new IndexModel(sd));
Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
- IndexFacts.Session session = facts.newSession();
+ IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet());
List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*", session);
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
@@ -403,7 +402,7 @@ public class TokenizerTestCase {
IndexFacts facts = new IndexFacts(new IndexModel(sd));
Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
- IndexFacts.Session session = facts.newSession();
+ IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet());
List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:!/%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*&b:", session);
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
@@ -440,7 +439,7 @@ public class TokenizerTestCase {
sd.addIndex(index2);
IndexFacts indexFacts = new IndexFacts(new IndexModel(sd));
- IndexFacts.Session facts = indexFacts.newSession();
+ IndexFacts.Session facts = indexFacts.newSession(Collections.emptySet(), Collections.emptySet());
Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:foo) testexact2:bar", facts);
diff --git a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java
index dbcb393c922..e6c5a18c9da 100644
--- a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java
@@ -15,12 +15,8 @@ import org.junit.jupiter.api.Test;
import java.util.Collection;
import java.util.List;
import java.util.Map;
-import java.util.Set;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
/**
* Tests using synthetic index names for IndexFacts class.
@@ -184,7 +180,7 @@ public class IndexFactsTestCase {
query.getModel().getSources().add("one");
query.getModel().getRestrict().add("two");
- IndexFacts.Session indexFacts = createIndexFacts().newSession(List.of("clusterOne"), Set.of());
+ IndexFacts.Session indexFacts = createIndexFacts().newSession(List.of("clusterOne"), List.of());
assertTrue(indexFacts.isIndex("a"));
assertFalse(indexFacts.isIndex("b"));
assertTrue(indexFacts.isIndex("d"));