1 files changed, 440 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
new file mode 100644
index 00000000000..9a079c0d23b
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
@@ -0,0 +1,440 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.prelude;
+
+
+import com.google.common.collect.ImmutableList;
+import com.yahoo.language.process.StemMode;
+import com.yahoo.search.Query;
+
+import java.util.*;
+
+import static com.yahoo.text.Lowercase.toLowerCase;
+
+/**
+ * A central repository for information about indices. Standard usage is
+ *
+ * <pre><code>
+ * IndexFacts.Session session = indexFacts.newSession(query); // once when starting to process a query
+ * session.getIndex(indexName).[get index info]
+ * </code></pre>
+ *
+ * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ */
+// TODO: We should replace this with a better representation of search definitions
+//       which is immutable, models clusters and search definitions inside clusters properly,
+//       and uses better names.
+public class IndexFacts {
+
+    private Map<String, List<String>> clusterByDocument;
+
+    private static class DocumentTypeListOffset {
+        public final int offset;
+        public final SearchDefinition searchDefinition;
+
+        public DocumentTypeListOffset(int offset, SearchDefinition searchDefinition) {
+            this.offset = offset;
+            this.searchDefinition = searchDefinition;
+        }
+    }
+
+    /** A Map of all known search definitions indexed by name */
+    private Map<String, SearchDefinition> searchDefinitions = new LinkedHashMap<>();
+
+    /** A map of document types contained in each cluster indexed by cluster name */
+    private Map<String, List<String>> clusters = new LinkedHashMap<>();
+
+    /**
+     * The name of the default search definition, which is the union of all
+     * known document types.
+     */
+    public static final String unionName = "unionOfAllKnown";
+
+    /** A search definition which contains the union of all settings. */
+    private SearchDefinition unionSearchDefinition=new SearchDefinition(unionName);
+
+    private boolean frozen;
+
+    /** Whether this has (any) NGram indexes. Calculated at freeze time. */
+    private boolean hasNGramIndices;
+
+    public IndexFacts() {}
+
+    @SuppressWarnings({"deprecation"})
+    public IndexFacts(IndexModel indexModel) {
+        if (indexModel.getSearchDefinitions() != null && indexModel.getUnionSearchDefinition() != null) {
+            setSearchDefinitions(indexModel.getSearchDefinitions(), indexModel.getUnionSearchDefinition());
+        }
+        if (indexModel.getMasterClusters() != null) {
+            setMasterClusters(indexModel.getMasterClusters());
+        }
+    }
+
+    private void setMasterClusters(Map<String, List<String>> clusters) {
+        // TODO: clusters should probably be a separate class
+        this.clusters = clusters;
+        clusterByDocument = invert(clusters);
+    }
+
+    private static Map<String, List<String>> invert(Map<String, List<String>> clusters) {
+        Map<String, List<String>> result = new HashMap<>();
+        for (Map.Entry<String,List<String>> entry : clusters.entrySet()) {
+            for (String value : entry.getValue()) {
+                addEntry(result, value, entry.getKey());
+            }
+        }
+        return result;
+    }
+
+    private static void addEntry(Map<String, List<String>> result, String key, String value) {
+        List<String> values = result.get(key);
+        if (values == null) {
+            values = new ArrayList<>();
+            result.put(key, values);
+        }
+        values.add(value);
+    }
+
+    // Assumes that document names are equal to the search definition that contain them.
+    public List<String> clustersHavingSearchDefinition(String searchDefinitionName) {
+        if (clusterByDocument == null)
+            return Collections.emptyList();
+
+        List<String> clusters = clusterByDocument.get(searchDefinitionName);
+        return clusters != null ? clusters : Collections.<String>emptyList();
+    }
+
+    /**
+     * Public only for testing.
+     */
+    public void setClusters(Map<String, List<String>> clusters) {
+        ensureNotFrozen();
+        this.clusters = clusters;
+        clusterByDocument = invert(clusters);
+    }
+
+    public void setSearchDefinitions(Map<String, SearchDefinition> searchDefinitions,
+                                     SearchDefinition unionSearchDefinition) {
+        ensureNotFrozen();
+        this.searchDefinitions = searchDefinitions;
+        this.unionSearchDefinition = unionSearchDefinition;
+    }
+
+    private boolean isInitialized() {
+        return searchDefinitions.size() > 0;
+    }
+
+    private boolean isIndexFromDocumentTypes(String indexName, List<String> documentTypes) {
+        if (!isInitialized()) return true;
+
+        if (documentTypes.isEmpty()) {
+            return unionSearchDefinition.getIndex(indexName) != null;
+        }
+
+        DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+        while (sd != null) {
+            Index index = sd.searchDefinition.getIndex(indexName);
+            if (index != null) {
+                return true;
+            }
+            sd = chooseSearchDefinition(documentTypes, sd.offset);
+        }
+
+        return false;
+    }
+
+    private String getCanonicNameFromDocumentTypes(String indexName, List<String> documentTypes) {
+        if (!isInitialized()) return indexName;
+
+        if (documentTypes.isEmpty()) {
+            Index index = unionSearchDefinition.getIndexByLowerCase(toLowerCase(indexName));
+            return index == null ? indexName : index.getName();
+        }
+        DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+        while (sd != null) {
+            Index index = sd.searchDefinition.getIndexByLowerCase(toLowerCase(indexName));
+            if (index != null) return index.getName();
+            sd = chooseSearchDefinition(documentTypes, sd.offset);
+        }
+        return indexName;
+    }
+
+    private Index getIndexFromDocumentTypes(String indexName, List<String> documentTypes) {
+        if (indexName==null || indexName.isEmpty())
+            indexName="default";
+
+        return getIndexByCanonicNameFromDocumentTypes(indexName, documentTypes);
+    }
+
+    private Index getIndexByCanonicNameFromDocumentTypes(String canonicName, List<String> documentTypes) {
+        if ( ! isInitialized()) return Index.nullIndex;
+
+        if (documentTypes.isEmpty()) {
+            Index index = unionSearchDefinition.getIndex(canonicName);
+            if (index == null) return Index.nullIndex;
+            return index;
+        }
+
+        DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
+        while (sd != null) {
+            Index index = sd.searchDefinition.getIndex(canonicName);
+
+            if (index != null) return index;
+            sd = chooseSearchDefinition(documentTypes, sd.offset);
+        }
+        return Index.nullIndex;
+    }
+
+    /** Calls resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict()) */
+    private Set<String> resolveDocumentTypes(Query query) {
+        // Assumption: Search definition name equals document name.
+        return resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict(),
+                                    searchDefinitions.keySet());
+    }
+
+    /**
+     * Given a search list which is a mixture of document types and cluster
+     * names, and a restrict list which is a list of document types, return a
+     * set of all valid document types for this combination. Most use-cases for
+     * fetching index settings will involve calling this method with the the
+     * incoming query's {@link com.yahoo.search.query.Model#getSources()} and
+     * {@link com.yahoo.search.query.Model#getRestrict()} as input parameters
+     * before calling any other method of this class.
+     *
+     * @param sources the search list for a query
+     * @param restrict the restrict list for a query
+     * @return a (possibly empty) set of valid document types
+     */
+    private Set<String> resolveDocumentTypes(Collection<String> sources, Collection<String> restrict,
+                                             Set<String> candidateDocumentTypes) {
+        sources = emptyCollectionIfNull(sources);
+        restrict = emptyCollectionIfNull(restrict);
+
+        if (sources.isEmpty()) {
+            if ( ! restrict.isEmpty()) {
+                return new TreeSet<>(restrict);
+            } else {
+                return candidateDocumentTypes;
+            }
+        }
+
+        Set<String> toSearch = new TreeSet<>();
+        for (String source : sources) { // source: a document type or a cluster containing them
+            List<String> clusterDocTypes = clusters.get(source);
+            if (clusterDocTypes == null) { // source was a document type
+                if (candidateDocumentTypes.contains(source)) {
+                    toSearch.add(source);
+                }
+            } else { // source was a cluster, having document types
+                for (String documentType : clusterDocTypes) {
+                    if (candidateDocumentTypes.contains(documentType)) {
+                        toSearch.add(documentType);
+                    }
+                }
+            }
+        }
+
+        if ( ! restrict.isEmpty()) {
+            toSearch.retainAll(restrict);
+        }
+
+        return toSearch;
+    }
+
+    private Collection<String> emptyCollectionIfNull(Collection<String> collection) {
+        return collection == null ? Collections.<String>emptyList() : collection;
+    }
+
+    /**
+     * Chooses the correct search definition, default if in doubt.
+     *
+     * @return the search definition to use
+     */
+    private DocumentTypeListOffset chooseSearchDefinition(List<String> documentTypes, int index) {
+        while (index < documentTypes.size()) {
+            String docName = documentTypes.get(index++);
+            SearchDefinition sd = searchDefinitions.get(docName);
+            if (sd != null) {
+                return new DocumentTypeListOffset(index, sd);
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Freeze this to prevent further changes.
+     */
+    public void freeze() {
+        hasNGramIndices = hasNGramIndices();
+        // TODO: Freeze content!
+        frozen = true;
+    }
+
+    /** Whether this contains any index which has isNGram()==true. This is free to ask on a frozen instance. */
+    public boolean hasNGramIndices() {
+        if (frozen) return hasNGramIndices;
+        for (Map.Entry<String,SearchDefinition> searchDefinition : searchDefinitions.entrySet()) {
+            for (Index index : searchDefinition.getValue().indices().values())
+                if (index.isNGram()) return true;
+        }
+        return false;
+    }
+
+    /**
+     * @return whether it is permissible to update this object
+     */
+    public boolean isFrozen() {
+        return frozen;
+    }
+
+    private void ensureNotFrozen() {
+        if (frozen) {
+            throw new IllegalStateException("Tried to modify frozen IndexFacts instance.");
+        }
+    }
+
+
+    /**
+     * Add a string to be accepted as an index name when parsing a
+     * query.
+     *
+     * For testing only.
+     *
+     * @param sdName name of search definition containing index, if null, modify default set
+     * @param indexName name of index, actual or otherwise
+     */
+    public void addIndex(String sdName, String indexName) {
+        ensureNotFrozen();
+
+        SearchDefinition sd;
+        if (sdName == null) {
+            sd = unionSearchDefinition;
+        } else if (searchDefinitions.containsKey(sdName)) {
+            sd = searchDefinitions.get(sdName);
+        } else {
+            sd = new SearchDefinition(sdName);
+            searchDefinitions.put(sdName, sd);
+        }
+        sd.getOrCreateIndex(indexName);
+        unionSearchDefinition.getOrCreateIndex(indexName);
+    }
+
+    /**
+     * Adds an index to the specified index, and the default index settings,
+     * overriding any current settings for this index
+     */
+    public void addIndex(String sdName, Index index) {
+        ensureNotFrozen();
+
+        SearchDefinition sd;
+        if (sdName == null) {
+            sd = unionSearchDefinition;
+        } else if (searchDefinitions.containsKey(sdName)) {
+            sd = searchDefinitions.get(sdName);
+        } else {
+            sd = new SearchDefinition(sdName);
+            searchDefinitions.put(sdName, sd);
+        }
+        sd.addIndex(index);
+        unionSearchDefinition.addIndex(index);
+    }
+
+    public String getDefaultPosition(String sdName) {
+        SearchDefinition sd;
+        if (sdName == null) {
+            sd = unionSearchDefinition;
+        } else if (searchDefinitions.containsKey(sdName)) {
+            sd = searchDefinitions.get(sdName);
+        } else {
+            return null;
+        }
+
+        return sd.getDefaultPosition();
+    }
+
+    public Session newSession(Query query) {
+        return new Session(query);
+    }
+
+    public Session newSession(Collection<String> sources, Collection<String> restrict) {
+        return new Session(sources, restrict);
+    }
+
+    public Session newSession(Collection<String> sources, Collection<String> restrict,
+                              Set<String> candidateDocumentTypes) {
+        return new Session(sources, restrict, candidateDocumentTypes);
+    }
+
+    /**
+     * Create an instance of this to look up index facts with a given query.
+     * Note that if the model.source or model.restrict parameters of the query
+     * is changed another session should be created. This is immutable.
+     */
+    public class Session {
+
+        private final List<String> documentTypes;
+
+        private Session(Query query) {
+            documentTypes = ImmutableList.copyOf(resolveDocumentTypes(query));
+        }
+
+        private Session(Collection<String> sources, Collection<String> restrict) {
+            // Assumption: Search definition name equals document name.
+            documentTypes = ImmutableList.copyOf(resolveDocumentTypes(sources, restrict, searchDefinitions.keySet()));
+        }
+
+        private Session(Collection<String> sources, Collection<String> restrict, Set<String> candidateDocumentTypes) {
+            documentTypes = ImmutableList.copyOf(resolveDocumentTypes(sources, restrict, candidateDocumentTypes));
+        }
+
+        /**
+         * Returns the index for this name.
+         *
+         * @param indexName the name of the index. If this is null or empty the index
+         *                  named "default" is returned
+         * @return the index best matching the input parameters or the nullIndex
+         *         (never null) if none is found
+         */
+        public Index getIndex(String indexName) {
+            return IndexFacts.this.getIndexFromDocumentTypes(indexName, documentTypes);
+        }
+
+        /** Returns an index given from a given search definition */
+        // Note: This does not take the context into account currently.
+        // Ideally, we should be able to resolve the right search definition name
+        // in the context of the searched clusters, but this cannot be modelled
+        // currently by the flat structure in IndexFacts.
+        // That can be fixed without changing this API.
+        public Index getIndex(String indexName, String documentType) {
+            return IndexFacts.this.getIndexFromDocumentTypes(indexName, Collections.singletonList(documentType));
+        }
+
+        /**
+         * Returns the canonical form of the index name (Which may be the same as
+         * the input).
+         *
+         * @param indexName index name or alias
+         */
+        public String getCanonicName(String indexName) {
+            return IndexFacts.this.getCanonicNameFromDocumentTypes(indexName, documentTypes);
+        }
+
+        /**
+         * Returns whether the given name is an index.
+         *
+         * @param indexName index name candidate
+         */
+        public boolean isIndex(String indexName) {
+            return IndexFacts.this.isIndexFromDocumentTypes(indexName, documentTypes);
+        }
+
+        /** Returns an immutable list of the document types this has resolved to */
+        public List<String> documentTypes() { return documentTypes; }
+
+        @Override
+        public String toString() {
+            return "index facts for search definitions " + documentTypes;
+        }
+
+    }
+
+}