// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude; import com.yahoo.search.Query; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import static com.yahoo.text.Lowercase.toLowerCase; /** * A central repository for information about indices. Standard usage is * *

 * IndexFacts.Session session = indexFacts.newSession(query); // once when starting to process a query
 * session.getIndex(indexName).[get index info]
 * 
* * @author Steinar Knutsen */ // TODO: We should replace this with a better representation of search definitions // which is immutable, models clusters and search definitions inside clusters properly, // and uses better names. -bratseth public class IndexFacts { private Map> clusterByDocument; private static class DocumentTypeListOffset { public final int offset; public final SearchDefinition searchDefinition; public DocumentTypeListOffset(int offset, SearchDefinition searchDefinition) { this.offset = offset; this.searchDefinition = searchDefinition; } } /** A Map of all known search definitions indexed by name */ private Map searchDefinitions = new LinkedHashMap<>(); /** A map of document types contained in each cluster indexed by cluster name */ private Map> clusters = new LinkedHashMap<>(); /** * The name of the default search definition, which is the union of all * known document types. */ static final String unionName = "unionOfAllKnown"; /** A search definition which contains the union of all settings. */ private SearchDefinition unionSearchDefinition = new SearchDefinition(unionName); private boolean frozen; /** Whether this has (any) NGram indexes. Calculated at freeze time. */ private boolean hasNGramIndices; public IndexFacts() {} public IndexFacts(IndexModel indexModel) { if (indexModel.getSearchDefinitions() != null) { this.searchDefinitions = indexModel.getSearchDefinitions(); this.unionSearchDefinition = indexModel.getUnionSearchDefinition(); } if (indexModel.getMasterClusters() != null) { setMasterClusters(indexModel.getMasterClusters()); } } private void setMasterClusters(Map> clusters) { // TODO: clusters should probably be a separate class this.clusters = clusters; clusterByDocument = invert(clusters); } private static Map> invert(Map> clusters) { Map> result = new HashMap<>(); for (Map.Entry> entry : clusters.entrySet()) { for (String value : entry.getValue()) { addEntry(result, value, entry.getKey()); } } return result; } private static void addEntry(Map> result, String key, String value) { List values = result.computeIfAbsent(key, k -> new ArrayList<>()); values.add(value); } // Assumes that document names are equal to the search definition that contain them. public List clustersHavingSearchDefinition(String searchDefinitionName) { if (clusterByDocument == null) return List.of(); List clusters = clusterByDocument.get(searchDefinitionName); return clusters != null ? clusters : List.of(); } private boolean isInitialized() { return searchDefinitions.size() > 0; } private boolean isIndexFromDocumentTypes(String indexName, List documentTypes) { if ( ! isInitialized()) return true; if (documentTypes.isEmpty()) { return unionSearchDefinition.getIndex(indexName) != null; } DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); while (sd != null) { Index index = sd.searchDefinition.getIndex(indexName); if (index != null) { return true; } sd = chooseSearchDefinition(documentTypes, sd.offset); } return false; } private String getCanonicNameFromDocumentTypes(String indexName, List documentTypes) { if (!isInitialized()) return indexName; if (documentTypes.isEmpty()) { Index index = unionSearchDefinition.getIndexByLowerCase(toLowerCase(indexName)); return index == null ? indexName : index.getName(); } DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); while (sd != null) { Index index = sd.searchDefinition.getIndexByLowerCase(toLowerCase(indexName)); if (index != null) return index.getName(); sd = chooseSearchDefinition(documentTypes, sd.offset); } return indexName; } private Index getIndexFromDocumentTypes(String indexName, List documentTypes) { if (indexName == null || indexName.isEmpty()) indexName = "default"; return getIndexByCanonicNameFromDocumentTypes(indexName, documentTypes); } private Index getIndexByCanonicNameFromDocumentTypes(String canonicName, List documentTypes) { if ( ! isInitialized()) return Index.nullIndex; if (documentTypes.isEmpty()) { Index index = unionSearchDefinition.getIndex(canonicName); if (index == null) return Index.nullIndex; return index; } DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); while (sd != null) { Index index = sd.searchDefinition.getIndex(canonicName); if (index != null) return index; sd = chooseSearchDefinition(documentTypes, sd.offset); } return Index.nullIndex; } private Collection getIndexes(String documentType) { if ( ! isInitialized()) return List.of(); SearchDefinition sd = searchDefinitions.get(documentType); if (sd == null) return List.of(); return sd.indices().values(); } /** Calls resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict()) */ private Set resolveDocumentTypes(Query query) { // Assumption: Search definition name equals document name. return resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict(), searchDefinitions.keySet()); } /** * Given a search list which is a mixture of document types and cluster * names, and a restrict list which is a list of document types, return a * set of all valid document types for this combination. Most use-cases for * fetching index settings will involve calling this method with the the * incoming query's {@link com.yahoo.search.query.Model#getSources()} and * {@link com.yahoo.search.query.Model#getRestrict()} as input parameters * before calling any other method of this class. * * @param sources the search list for a query * @param restrict the restrict list for a query * @return a (possibly empty) set of valid document types */ private Set resolveDocumentTypes(Collection sources, Collection restrict, Set candidateDocumentTypes) { sources = emptyCollectionIfNull(sources); restrict = emptyCollectionIfNull(restrict); if (sources.isEmpty()) { if ( ! restrict.isEmpty()) { return new TreeSet<>(restrict); } else { return candidateDocumentTypes; } } Set toSearch = new TreeSet<>(); for (String source : sources) { // source: a document type or a cluster containing them List clusterDocTypes = clusters.get(source); if (clusterDocTypes == null) { // source was a document type if (candidateDocumentTypes.contains(source)) { toSearch.add(source); } } else { // source was a cluster, having document types for (String documentType : clusterDocTypes) { if (candidateDocumentTypes.contains(documentType)) { toSearch.add(documentType); } } } } if ( ! restrict.isEmpty()) { toSearch.retainAll(restrict); } return toSearch; } private Collection emptyCollectionIfNull(Collection collection) { return collection == null ? List.of() : collection; } /** * Chooses the correct search definition, default if in doubt. * * @return the search definition to use */ private DocumentTypeListOffset chooseSearchDefinition(List documentTypes, int index) { while (index < documentTypes.size()) { String docName = documentTypes.get(index++); SearchDefinition sd = searchDefinitions.get(docName); if (sd != null) { return new DocumentTypeListOffset(index, sd); } } return null; } /** * Freeze this to prevent further changes. * * @return this for chaining */ public IndexFacts freeze() { hasNGramIndices = hasNGramIndices(); // TODO: Freeze content! frozen = true; return this; } /** Whether this contains any index which has isNGram()==true. This is free to ask on a frozen instance. */ public boolean hasNGramIndices() { if (frozen) return hasNGramIndices; for (Map.Entry searchDefinition : searchDefinitions.entrySet()) { for (Index index : searchDefinition.getValue().indices().values()) if (index.isNGram()) return true; } return false; } /** Returns whether it is permissible to update this object */ public boolean isFrozen() { return frozen; } private void ensureNotFrozen() { if (frozen) throw new IllegalStateException("Tried to modify frozen IndexFacts instance."); } public String getDefaultPosition(String sdName) { SearchDefinition sd; if (sdName == null) { sd = unionSearchDefinition; } else if (searchDefinitions.containsKey(sdName)) { sd = searchDefinitions.get(sdName); } else { return null; } return sd.getDefaultPosition(); } public Session newSession(Query query) { return new Session(query); } public Session newSession(Collection sources, Collection restrict) { return new Session(sources, restrict); } public Session newSession(Collection sources, Collection restrict, Set candidateDocumentTypes) { return new Session(sources, restrict, candidateDocumentTypes); } /** * Create an instance of this to look up index facts with a given query. * Note that if the model.source or model.restrict parameters of the query * is changed another session should be created. This is immutable. */ public class Session { private final List documentTypes; private Session(Query query) { documentTypes = List.copyOf(resolveDocumentTypes(query)); } private Session(Collection sources, Collection restrict) { // Assumption: Search definition name equals document name. documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, searchDefinitions.keySet())); } private Session(Collection sources, Collection restrict, Set candidateDocumentTypes) { documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, candidateDocumentTypes)); } /** * Returns the index for this name. * * @param indexName the name of the index. If this is null or empty the index named "default" is returned * @return the index best matching the input parameters or the null Index (never null) if none is found */ public Index getIndex(String indexName) { return IndexFacts.this.getIndexFromDocumentTypes(indexName, documentTypes); } /** Returns an index given from a given search definition */ // Note: This does not take the context into account currently. // Ideally, we should be able to resolve the right search definition name // in the context of the searched clusters, but this cannot be modelled // currently by the flat structure in IndexFacts. // That can be fixed without changing this API. public Index getIndex(String indexName, String documentType) { return IndexFacts.this.getIndexFromDocumentTypes(indexName, List.of(documentType)); } /** Returns all the indexes of a given search definition */ public Collection getIndexes(String documentType) { return IndexFacts.this.getIndexes(documentType); } /** * Returns the canonical form of the index name (Which may be the same as * the input). * * @param indexName index name or alias */ public String getCanonicName(String indexName) { return IndexFacts.this.getCanonicNameFromDocumentTypes(indexName, documentTypes); } /** * Returns whether the given name is an index. * * @param indexName index name candidate */ public boolean isIndex(String indexName) { return IndexFacts.this.isIndexFromDocumentTypes(indexName, documentTypes); } /** Returns an immutable list of the document types this has resolved to */ public List documentTypes() { return documentTypes; } @Override public String toString() { return "index facts for search definitions " + documentTypes; } } }