summaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
diff options
context:
space:
mode:
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/IndexFacts.java')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/IndexFacts.java440
1 files changed, 440 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
new file mode 100644
index 00000000000..9a079c0d23b
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
@@ -0,0 +1,440 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.prelude;
+
+
+import com.google.common.collect.ImmutableList;
+import com.yahoo.language.process.StemMode;
+import com.yahoo.search.Query;
+
+import java.util.*;
+
+import static com.yahoo.text.Lowercase.toLowerCase;
+
+/**
+ * A central repository for information about indices. Standard usage is
+ *
+ * <pre><code>
+ * IndexFacts.Session session = indexFacts.newSession(query); // once when starting to process a query
+ * session.getIndex(indexName).[get index info]
+ * </code></pre>
+ *
+ * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ */
+// TODO: We should replace this with a better representation of search definitions
+// which is immutable, models clusters and search definitions inside clusters properly,
+// and uses better names.
+public class IndexFacts {
+
+ private Map<String, List<String>> clusterByDocument;
+
+ private static class DocumentTypeListOffset {
+ public final int offset;
+ public final SearchDefinition searchDefinition;
+
+ public DocumentTypeListOffset(int offset, SearchDefinition searchDefinition) {
+ this.offset = offset;
+ this.searchDefinition = searchDefinition;
+ }
+ }
+
+ /** A Map of all known search definitions indexed by name */
+ private Map<String, SearchDefinition> searchDefinitions = new LinkedHashMap<>();
+
+ /** A map of document types contained in each cluster indexed by cluster name */
+ private Map<String, List<String>> clusters = new LinkedHashMap<>();
+
+ /**
+ * The name of the default search definition, which is the union of all
+ * known document types.
+ */
+ public static final String unionName = "unionOfAllKnown";
+
+ /** A search definition which contains the union of all settings. */
+ private SearchDefinition unionSearchDefinition=new SearchDefinition(unionName);
+
+ private boolean frozen;
+
+ /** Whether this has (any) NGram indexes. Calculated at freeze time. */
+ private boolean hasNGramIndices;
+
+ public IndexFacts() {}
+
+ @SuppressWarnings({"deprecation"})
+ public IndexFacts(IndexModel indexModel) {
+ if (indexModel.getSearchDefinitions() != null && indexModel.getUnionSearchDefinition() != null) {
+ setSearchDefinitions(indexModel.getSearchDefinitions(), indexModel.getUnionSearchDefinition());
+ }
+ if (indexModel.getMasterClusters() != null) {
+ setMasterClusters(indexModel.getMasterClusters());
+ }
+ }
+
+ private void setMasterClusters(Map<String, List<String>> clusters) {
+ // TODO: clusters should probably be a separate class
+ this.clusters = clusters;
+ clusterByDocument = invert(clusters);
+ }
+
+ private static Map<String, List<String>> invert(Map<String, List<String>> clusters) {
+ Map<String, List<String>> result = new HashMap<>();
+ for (Map.Entry<String,List<String>> entry : clusters.entrySet()) {
+ for (String value : entry.getValue()) {
+ addEntry(result, value, entry.getKey());
+ }
+ }
+ return result;
+ }
+
+ private static void addEntry(Map<String, List<String>> result, String key, String value) {
+ List<String> values = result.get(key);
+ if (values == null) {
+ values = new ArrayList<>();
+ result.put(key, values);
+ }
+ values.add(value);
+ }
+
+ // Assumes that document names are equal to the search definition that contain them.
+ public List<String> clustersHavingSearchDefinition(String searchDefinitionName) {
+ if (clusterByDocument == null)
+ return Collections.emptyList();
+
+ List<String> clusters = clusterByDocument.get(searchDefinitionName);
+ return clusters != null ? clusters : Collections.<String>emptyList();
+ }
+
+ /**
+ * Public only for testing.
+ */
+ public void setClusters(Map<String, List<String>> clusters) {
+ ensureNotFrozen();
+ this.clusters = clusters;
+ clusterByDocument = invert(clusters);
+ }
+
+ public void setSearchDefinitions(Map<String, SearchDefinition> searchDefinitions,
+ SearchDefinition unionSearchDefinition) {
+ ensureNotFrozen();
+ this.searchDefinitions = searchDefinitions;
+ this.unionSearchDefinition = unionSearchDefinition;
+ }
+
+ private boolean isInitialized() {
+ return searchDefinitions.size() > 0;
+ }
+
+ private boolean isIndexFromDocumentTypes(String indexName, List<String> documentTypes) {
+ if (!isInitialized()) return true;
+
+ if (documentTypes.isEmpty()) {
+ return unionSearchDefinition.getIndex(indexName) != null;
+ }
+
+ DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+ while (sd != null) {
+ Index index = sd.searchDefinition.getIndex(indexName);
+ if (index != null) {
+ return true;
+ }
+ sd = chooseSearchDefinition(documentTypes, sd.offset);
+ }
+
+ return false;
+ }
+
+ private String getCanonicNameFromDocumentTypes(String indexName, List<String> documentTypes) {
+ if (!isInitialized()) return indexName;
+
+ if (documentTypes.isEmpty()) {
+ Index index = unionSearchDefinition.getIndexByLowerCase(toLowerCase(indexName));
+ return index == null ? indexName : index.getName();
+ }
+ DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+ while (sd != null) {
+ Index index = sd.searchDefinition.getIndexByLowerCase(toLowerCase(indexName));
+ if (index != null) return index.getName();
+ sd = chooseSearchDefinition(documentTypes, sd.offset);
+ }
+ return indexName;
+ }
+
+ private Index getIndexFromDocumentTypes(String indexName, List<String> documentTypes) {
+ if (indexName==null || indexName.isEmpty())
+ indexName="default";
+
+ return getIndexByCanonicNameFromDocumentTypes(indexName, documentTypes);
+ }
+
+ private Index getIndexByCanonicNameFromDocumentTypes(String canonicName, List<String> documentTypes) {
+ if ( ! isInitialized()) return Index.nullIndex;
+
+ if (documentTypes.isEmpty()) {
+ Index index = unionSearchDefinition.getIndex(canonicName);
+ if (index == null) return Index.nullIndex;
+ return index;
+ }
+
+ DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+ while (sd != null) {
+ Index index = sd.searchDefinition.getIndex(canonicName);
+
+ if (index != null) return index;
+ sd = chooseSearchDefinition(documentTypes, sd.offset);
+ }
+ return Index.nullIndex;
+ }
+
+ /** Calls resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict()) */
+ private Set<String> resolveDocumentTypes(Query query) {
+ // Assumption: Search definition name equals document name.
+ return resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict(),
+ searchDefinitions.keySet());
+ }
+
+ /**
+ * Given a search list which is a mixture of document types and cluster
+ * names, and a restrict list which is a list of document types, return a
+ * set of all valid document types for this combination. Most use-cases for
+ * fetching index settings will involve calling this method with the the
+ * incoming query's {@link com.yahoo.search.query.Model#getSources()} and
+ * {@link com.yahoo.search.query.Model#getRestrict()} as input parameters
+ * before calling any other method of this class.
+ *
+ * @param sources the search list for a query
+ * @param restrict the restrict list for a query
+ * @return a (possibly empty) set of valid document types
+ */
+ private Set<String> resolveDocumentTypes(Collection<String> sources, Collection<String> restrict,
+ Set<String> candidateDocumentTypes) {
+ sources = emptyCollectionIfNull(sources);
+ restrict = emptyCollectionIfNull(restrict);
+
+ if (sources.isEmpty()) {
+ if ( ! restrict.isEmpty()) {
+ return new TreeSet<>(restrict);
+ } else {
+ return candidateDocumentTypes;
+ }
+ }
+
+ Set<String> toSearch = new TreeSet<>();
+ for (String source : sources) { // source: a document type or a cluster containing them
+ List<String> clusterDocTypes = clusters.get(source);
+ if (clusterDocTypes == null) { // source was a document type
+ if (candidateDocumentTypes.contains(source)) {
+ toSearch.add(source);
+ }
+ } else { // source was a cluster, having document types
+ for (String documentType : clusterDocTypes) {
+ if (candidateDocumentTypes.contains(documentType)) {
+ toSearch.add(documentType);
+ }
+ }
+ }
+ }
+
+ if ( ! restrict.isEmpty()) {
+ toSearch.retainAll(restrict);
+ }
+
+ return toSearch;
+ }
+
+ private Collection<String> emptyCollectionIfNull(Collection<String> collection) {
+ return collection == null ? Collections.<String>emptyList() : collection;
+ }
+
+ /**
+ * Chooses the correct search definition, default if in doubt.
+ *
+ * @return the search definition to use
+ */
+ private DocumentTypeListOffset chooseSearchDefinition(List<String> documentTypes, int index) {
+ while (index < documentTypes.size()) {
+ String docName = documentTypes.get(index++);
+ SearchDefinition sd = searchDefinitions.get(docName);
+ if (sd != null) {
+ return new DocumentTypeListOffset(index, sd);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Freeze this to prevent further changes.
+ */
+ public void freeze() {
+ hasNGramIndices = hasNGramIndices();
+ // TODO: Freeze content!
+ frozen = true;
+ }
+
+ /** Whether this contains any index which has isNGram()==true. This is free to ask on a frozen instance. */
+ public boolean hasNGramIndices() {
+ if (frozen) return hasNGramIndices;
+ for (Map.Entry<String,SearchDefinition> searchDefinition : searchDefinitions.entrySet()) {
+ for (Index index : searchDefinition.getValue().indices().values())
+ if (index.isNGram()) return true;
+ }
+ return false;
+ }
+
+ /**
+ * @return whether it is permissible to update this object
+ */
+ public boolean isFrozen() {
+ return frozen;
+ }
+
+ private void ensureNotFrozen() {
+ if (frozen) {
+ throw new IllegalStateException("Tried to modify frozen IndexFacts instance.");
+ }
+ }
+
+
+ /**
+ * Add a string to be accepted as an index name when parsing a
+ * query.
+ *
+ * For testing only.
+ *
+ * @param sdName name of search definition containing index, if null, modify default set
+ * @param indexName name of index, actual or otherwise
+ */
+ public void addIndex(String sdName, String indexName) {
+ ensureNotFrozen();
+
+ SearchDefinition sd;
+ if (sdName == null) {
+ sd = unionSearchDefinition;
+ } else if (searchDefinitions.containsKey(sdName)) {
+ sd = searchDefinitions.get(sdName);
+ } else {
+ sd = new SearchDefinition(sdName);
+ searchDefinitions.put(sdName, sd);
+ }
+ sd.getOrCreateIndex(indexName);
+ unionSearchDefinition.getOrCreateIndex(indexName);
+ }
+
+ /**
+ * Adds an index to the specified index, and the default index settings,
+ * overriding any current settings for this index
+ */
+ public void addIndex(String sdName, Index index) {
+ ensureNotFrozen();
+
+ SearchDefinition sd;
+ if (sdName == null) {
+ sd = unionSearchDefinition;
+ } else if (searchDefinitions.containsKey(sdName)) {
+ sd = searchDefinitions.get(sdName);
+ } else {
+ sd = new SearchDefinition(sdName);
+ searchDefinitions.put(sdName, sd);
+ }
+ sd.addIndex(index);
+ unionSearchDefinition.addIndex(index);
+ }
+
+ public String getDefaultPosition(String sdName) {
+ SearchDefinition sd;
+ if (sdName == null) {
+ sd = unionSearchDefinition;
+ } else if (searchDefinitions.containsKey(sdName)) {
+ sd = searchDefinitions.get(sdName);
+ } else {
+ return null;
+ }
+
+ return sd.getDefaultPosition();
+ }
+
+ public Session newSession(Query query) {
+ return new Session(query);
+ }
+
+ public Session newSession(Collection<String> sources, Collection<String> restrict) {
+ return new Session(sources, restrict);
+ }
+
+ public Session newSession(Collection<String> sources, Collection<String> restrict,
+ Set<String> candidateDocumentTypes) {
+ return new Session(sources, restrict, candidateDocumentTypes);
+ }
+
+ /**
+ * Create an instance of this to look up index facts with a given query.
+ * Note that if the model.source or model.restrict parameters of the query
+ * is changed another session should be created. This is immutable.
+ */
+ public class Session {
+
+ private final List<String> documentTypes;
+
+ private Session(Query query) {
+ documentTypes = ImmutableList.copyOf(resolveDocumentTypes(query));
+ }
+
+ private Session(Collection<String> sources, Collection<String> restrict) {
+ // Assumption: Search definition name equals document name.
+ documentTypes = ImmutableList.copyOf(resolveDocumentTypes(sources, restrict, searchDefinitions.keySet()));
+ }
+
+ private Session(Collection<String> sources, Collection<String> restrict, Set<String> candidateDocumentTypes) {
+ documentTypes = ImmutableList.copyOf(resolveDocumentTypes(sources, restrict, candidateDocumentTypes));
+ }
+
+ /**
+ * Returns the index for this name.
+ *
+ * @param indexName the name of the index. If this is null or empty the index
+ * named "default" is returned
+ * @return the index best matching the input parameters or the nullIndex
+ * (never null) if none is found
+ */
+ public Index getIndex(String indexName) {
+ return IndexFacts.this.getIndexFromDocumentTypes(indexName, documentTypes);
+ }
+
+ /** Returns an index given from a given search definition */
+ // Note: This does not take the context into account currently.
+ // Ideally, we should be able to resolve the right search definition name
+ // in the context of the searched clusters, but this cannot be modelled
+ // currently by the flat structure in IndexFacts.
+ // That can be fixed without changing this API.
+ public Index getIndex(String indexName, String documentType) {
+ return IndexFacts.this.getIndexFromDocumentTypes(indexName, Collections.singletonList(documentType));
+ }
+
+ /**
+ * Returns the canonical form of the index name (Which may be the same as
+ * the input).
+ *
+ * @param indexName index name or alias
+ */
+ public String getCanonicName(String indexName) {
+ return IndexFacts.this.getCanonicNameFromDocumentTypes(indexName, documentTypes);
+ }
+
+ /**
+ * Returns whether the given name is an index.
+ *
+ * @param indexName index name candidate
+ */
+ public boolean isIndex(String indexName) {
+ return IndexFacts.this.isIndexFromDocumentTypes(indexName, documentTypes);
+ }
+
+ /** Returns an immutable list of the document types this has resolved to */
+ public List<String> documentTypes() { return documentTypes; }
+
+ @Override
+ public String toString() {
+ return "index facts for search definitions " + documentTypes;
+ }
+
+ }
+
+}