Merge pull request #22391 from vespa-engine/bratseth/schema-info-2

Use SchemaInfo
author: Jon Bratseth <bratseth@gmail.com> 2022-05-02 21:08:52 +0200
committer: GitHub <noreply@github.com> 2022-05-02 21:08:52 +0200
commit: d2066c0a0c04e2aa2ada12a5c85f5eae9ff65b02 (patch)
tree: 8a41d51e4bbc420bc1dbc65d76e79e3f8ab423d3
parent: 04898b34190a3e3bb9d3053e11eb892bc48ff842 (diff)
parent: 33989168358c94ea236e436db672d6be0119be70 (diff)
8 files changed, 74 insertions, 66 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/FileDistributionConfigProducer.java b/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/FileDistributionConfigProducer.java
index 157c23732c7..34041217b0a 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/FileDistributionConfigProducer.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/FileDistributionConfigProducer.java
@@ -8,9 +8,9 @@ import java.util.IdentityHashMap;
 import java.util.Map;
 
 /**
- * @author hmusum
- * <p>
  * File distribution config producer, delegates getting config to {@link FileDistributionConfigProvider} (one per host)
+ *
+ * @author hmusum
  */
 public class FileDistributionConfigProducer extends AbstractConfigProducer<AbstractConfigProducer<?>> {
 
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index 54a3b3a0f36..fb1740d0e5e 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -8060,6 +8060,7 @@
     "methods": [
       "public void <init>(com.yahoo.search.config.IndexInfoConfig, com.yahoo.search.config.SchemaInfoConfig, com.yahoo.container.QrSearchersConfig)",
       "public void <init>(java.util.List, java.util.Map)",
+      "public java.util.List schemas()",
       "public com.yahoo.search.schema.SchemaInfo$Session newSession(com.yahoo.search.Query)",
       "public static com.yahoo.search.schema.SchemaInfo empty()",
       "public boolean equals(java.lang.Object)",
@@ -8098,6 +8099,7 @@
       "public static com.yahoo.search.searchchain.Execution$Context createContextStub()",
       "public static com.yahoo.search.searchchain.Execution$Context createContextStub(com.yahoo.search.searchchain.SearchChainRegistry)",
       "public static com.yahoo.search.searchchain.Execution$Context createContextStub(com.yahoo.prelude.IndexFacts)",
+      "public static com.yahoo.search.searchchain.Execution$Context createContextStub(com.yahoo.search.schema.SchemaInfo)",
       "public static com.yahoo.search.searchchain.Execution$Context createContextStub(com.yahoo.search.searchchain.SearchChainRegistry, com.yahoo.prelude.IndexFacts)",
       "public static com.yahoo.search.searchchain.Execution$Context createContextStub(com.yahoo.prelude.IndexFacts, com.yahoo.language.Linguistics)",
       "public static com.yahoo.search.searchchain.Execution$Context createContextStub(com.yahoo.search.searchchain.SearchChainRegistry, com.yahoo.prelude.IndexFacts, com.yahoo.language.Linguistics)",
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
index 5b2df3485c8..3953190eac4 100644
--- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
@@ -39,6 +39,7 @@ import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
 import java.util.concurrent.FutureTask;
 import java.util.concurrent.RejectedExecutionException;
+import java.util.stream.Collectors;
 
 import static com.yahoo.container.QrSearchersConfig.Searchcluster.Indexingmode.STREAMING;
 
@@ -58,10 +59,10 @@ public class ClusterSearcher extends Searcher {
     private final String searchClusterName;
 
     // The set of document types contained in this search cluster
-    private final Set<String> documentTypes;
+    private final Set<String> schemas;
 
-    // Mapping from rank profile names to document types containing them
-    private final Map<String, Set<String>> rankProfiles = new HashMap<>();
+    // Mapping from rank profile names to schemas containing them
+    private final Map<String, Set<String>> rankProfilesz = new HashMap<>();
 
     private final long maxQueryTimeout; // in milliseconds
     private final long maxQueryCacheTimeout; // in milliseconds
@@ -83,7 +84,7 @@ public class ClusterSearcher extends Searcher {
         int searchClusterIndex = clusterConfig.clusterId();
         searchClusterName = clusterConfig.clusterName();
         QrSearchersConfig.Searchcluster searchClusterConfig = getSearchClusterConfigFromClusterName(qrsConfig, searchClusterName);
-        documentTypes = new LinkedHashSet<>();
+        schemas = new LinkedHashSet<>();
 
         maxQueryTimeout = ParameterParser.asMilliSeconds(clusterConfig.maxQueryTimeout(), DEFAULT_MAX_QUERY_TIMEOUT);
         maxQueryCacheTimeout = ParameterParser.asMilliSeconds(clusterConfig.maxQueryCacheTimeout(), DEFAULT_MAX_QUERY_CACHE_TIMEOUT);
@@ -92,14 +93,8 @@ public class ClusterSearcher extends Searcher {
                 .com().yahoo().prelude().fastsearch().FastSearcher().docsum()
                 .defaultclass());
 
-        for (DocumentdbInfoConfig.Documentdb docDb : documentDbConfig.documentdb()) {
-            String docTypeName = docDb.name();
-            documentTypes.add(docTypeName);
-
-            for (DocumentdbInfoConfig.Documentdb.Rankprofile profile : docDb.rankprofile()) {
-                addValidRankProfile(profile.name(), docTypeName);
-            }
-        }
+        for (DocumentdbInfoConfig.Documentdb docDb : documentDbConfig.documentdb())
+            schemas.add(docDb.name());
 
         String uniqueServerId = UUID.randomUUID().toString();
         if (searchClusterConfig.indexingmode() == STREAMING) {
@@ -159,8 +154,8 @@ public class ClusterSearcher extends Searcher {
     }
 
     /** Do not use, for internal testing purposes only. **/
-    ClusterSearcher(Set<String> documentTypes, VespaBackEndSearcher searcher, Executor executor) {
-        this.documentTypes = documentTypes;
+    ClusterSearcher(Set<String> schemas, VespaBackEndSearcher searcher, Executor executor) {
+        this.schemas = schemas;
         searchClusterName = "testScenario";
         maxQueryTimeout = DEFAULT_MAX_QUERY_TIMEOUT;
         maxQueryCacheTimeout = DEFAULT_MAX_QUERY_CACHE_TIMEOUT;
@@ -168,19 +163,8 @@ public class ClusterSearcher extends Searcher {
         this.executor = executor;
     }
     /** Do not use, for internal testing purposes only. **/
-    ClusterSearcher(Set<String> documentTypes) {
-        this(documentTypes, null, null);
-    }
-
-    void addValidRankProfile(String profileName, String docTypeName) {
-        if (!rankProfiles.containsKey(profileName)) {
-            rankProfiles.put(profileName, new HashSet<>());
-        }
-        rankProfiles.get(profileName).add(docTypeName);
-    }
-
-    void setValidRankProfile(String profileName, Set<String> documentTypes) {
-        rankProfiles.put(profileName, documentTypes);
+    ClusterSearcher(Set<String> schemas) {
+        this(schemas, null, null);
     }
 
     /**
@@ -192,49 +176,54 @@ public class ClusterSearcher extends Searcher {
      * probably not reasonable.
      *
      * @param  query    query
-     * @param  docTypes set of requested doc types for this query
-     * @return          null if request rank profile is ok for the requested
-     *                  doc types, a result with error message if not.
+     * @param  schemas set of requested schemas for this query
+     * @return          null if requested rank profile is ok for the requested
+     *                  schemas, a result with error message if not.
      */
     // TODO: This should be in a separate searcher
-    private Result checkValidRankProfiles(Query query, Set<String> docTypes) {
+    // TODO Vespa 8: This should simply fail if the specified profile isn't present in all schemas
+    private Result checkValidRankProfiles(Query query, Set<String> schemas, Execution.Context context) {
         String rankProfile = query.getRanking().getProfile();
-        Set<String> invalidInDocTypes = null;
-        Set<String> rankDocTypes = rankProfiles.get(rankProfile);
+        Set<String> invalidInSchemas = null;
+        Set<String> schemasHavingProfile = schemasHavingProfile(rankProfile, context);
 
-        if (rankDocTypes == null) {
-            // rank profile does not exist in any document type
-            invalidInDocTypes = docTypes;
+        if (schemasHavingProfile.isEmpty()) {
+            invalidInSchemas = schemas;
         }
-        else if (docTypes.size() == 1) {
-            // one document type, fails if invalid rank profile
-            if (!rankDocTypes.contains(docTypes.iterator().next())) {
-                invalidInDocTypes = docTypes;
-            }
+        else if (schemas.size() == 1) {
+            if ( ! schemasHavingProfile.containsAll(schemas))
+                invalidInSchemas = schemas;
         }
         else {
-            // multiple document types, only fail when restricting doc types
+            // multiple schemas, only fail when restricting doc types
             Set<String> restrict = query.getModel().getRestrict();
             Set<String> sources = query.getModel().getSources();
             boolean validate = restrict != null && !restrict.isEmpty();
             validate = validate || sources != null && !sources.isEmpty();
-            if (validate && !rankDocTypes.containsAll(docTypes)) {
-                invalidInDocTypes = new HashSet<>(docTypes);
-                invalidInDocTypes.removeAll(rankDocTypes);
+            if (validate && !schemasHavingProfile.containsAll(schemas)) {
+                invalidInSchemas = new HashSet<>(schemas);
+                invalidInSchemas.removeAll(schemasHavingProfile);
             }
         }
 
-        if (invalidInDocTypes != null && !invalidInDocTypes.isEmpty()) {
-            String plural = invalidInDocTypes.size() > 1 ? "s" : "";
+        if (invalidInSchemas != null && !invalidInSchemas.isEmpty()) {
+            String plural = invalidInSchemas.size() > 1 ? "s" : "";
             return new Result(query,
                               ErrorMessage.createInvalidQueryParameter("Requested rank profile '" + rankProfile +
                                                                        "' is undefined for document type" + plural + " '" +
-                                                                       String.join(", ", invalidInDocTypes) + "'"));
+                                                                       String.join(", ", invalidInSchemas) + "'"));
         }
 
         return null;
     }
 
+    private Set<String> schemasHavingProfile(String profile, Execution.Context context) {
+        return context.schemaInfo().schemas().stream()
+                                             .filter(schema -> schema.rankProfiles().containsKey(profile))
+                                             .map(schema -> schema.name())
+                                             .collect(Collectors.toSet());
+    }
+
     @Override
     public void fill(com.yahoo.search.Result result, String summaryClass, Execution execution) {
         Query query = result.getQuery();
@@ -292,12 +281,12 @@ public class ClusterSearcher extends Searcher {
     }
 
     private Result doSearch(Searcher searcher, Query query, Execution execution) {
-        if (documentTypes.size() > 1) {
+        if (schemas.size() > 1) {
             return searchMultipleDocumentTypes(searcher, query, execution);
         } else {
-            String docType = documentTypes.iterator().next();
+            String docType = schemas.iterator().next();
 
-            Result invalidRankProfile = checkValidRankProfiles(query, documentTypes);
+            Result invalidRankProfile = checkValidRankProfiles(query, schemas, execution.context());
             if (invalidRankProfile != null) {
                 return invalidRankProfile;
             }
@@ -320,12 +309,12 @@ public class ClusterSearcher extends Searcher {
     }
 
     private Result searchMultipleDocumentTypes(Searcher searcher, Query query, Execution execution) {
-        Set<String> docTypes = resolveDocumentTypes(query, execution.context().getIndexFacts());
+        Set<String> schemas = resolveSchemas(query, execution.context().getIndexFacts());
 
-        Result invalidRankProfile = checkValidRankProfiles(query, docTypes);
+        Result invalidRankProfile = checkValidRankProfiles(query, schemas, execution.context());
         if (invalidRankProfile != null) return invalidRankProfile;
 
-        List<Query> queries = createQueries(query, docTypes);
+        List<Query> queries = createQueries(query, schemas);
         if (queries.size() == 1) {
             return searcher.search(queries.get(0), execution);
         } else {
@@ -357,13 +346,13 @@ public class ClusterSearcher extends Searcher {
         }
     }
 
-    Set<String> resolveDocumentTypes(Query query, IndexFacts indexFacts) {
+    Set<String> resolveSchemas(Query query, IndexFacts indexFacts) {
         Set<String> restrict = query.getModel().getRestrict();
         if (restrict == null || restrict.isEmpty()) {
             Set<String> sources = query.getModel().getSources();
             return (sources == null || sources.isEmpty())
-                    ? documentTypes
-                    : new HashSet<>(indexFacts.newSession(sources, Collections.emptyList(), documentTypes).documentTypes());
+                    ? schemas
+                    : new HashSet<>(indexFacts.newSession(sources, Collections.emptyList(), schemas).documentTypes());
         } else {
             return filterValidDocumentTypes(restrict);
         }
@@ -372,7 +361,7 @@ public class ClusterSearcher extends Searcher {
     private Set<String> filterValidDocumentTypes(Collection<String> restrict) {
         Set<String> retval = new LinkedHashSet<>();
         for (String docType : restrict) {
-            if (docType != null && documentTypes.contains(docType)) {
+            if (docType != null && schemas.contains(docType)) {
                 retval.add(docType);
             }
         }
diff --git a/container-search/src/main/java/com/yahoo/search/schema/SchemaInfo.java b/container-search/src/main/java/com/yahoo/search/schema/SchemaInfo.java
index 2d1d391640f..4daf110fc54 100644
--- a/container-search/src/main/java/com/yahoo/search/schema/SchemaInfo.java
+++ b/container-search/src/main/java/com/yahoo/search/schema/SchemaInfo.java
@@ -55,6 +55,9 @@ public class SchemaInfo {
         this.clusters = Map.copyOf(clusters);
     }
 
+    /** Returns all schemas configured in this application. */
+    public List<Schema> schemas() { return schemas; }
+
     public Session newSession(Query query) {
         return new Session(query.getModel().getSources(), query.getModel().getRestrict(), clusters, schemas);
     }
diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java b/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java
index aba72cb3404..94ff9745ea6 100644
--- a/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java
+++ b/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java
@@ -165,6 +165,11 @@ public class Execution extends com.yahoo.processing.execution.Execution {
         }
 
         /** Creates a Context instance where everything except the given arguments is empty. This is for unit testing.*/
+        public static Context createContextStub(SchemaInfo schemaInfo) {
+            return createContextStub(null, null, schemaInfo, null);
+        }
+
+        /** Creates a Context instance where everything except the given arguments is empty. This is for unit testing.*/
         public static Context createContextStub(SearchChainRegistry searchChainRegistry, IndexFacts indexFacts) {
             return createContextStub(searchChainRegistry, indexFacts, SchemaInfo.empty(), null);
         }
diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/ExecutionFactory.java b/container-search/src/main/java/com/yahoo/search/searchchain/ExecutionFactory.java
index 54874dbee3e..3e44b02618e 100644
--- a/container-search/src/main/java/com/yahoo/search/searchchain/ExecutionFactory.java
+++ b/container-search/src/main/java/com/yahoo/search/searchchain/ExecutionFactory.java
@@ -68,7 +68,7 @@ public class ExecutionFactory extends AbstractComponent {
         this.executor = executor != null ? executor : Executors.newSingleThreadExecutor();
     }
 
-    /** @deprecated pass documentDbInfo */
+    /** @deprecated pass SchemaInfoConfig */
     @Deprecated
     public ExecutionFactory(ChainsConfig chainsConfig,
                             IndexInfoConfig indexInfo,
diff --git a/container-search/src/main/resources/configdefinitions/prelude.fastsearch.documentdb-info.def b/container-search/src/main/resources/configdefinitions/prelude.fastsearch.documentdb-info.def
index 0fa448e1572..4528475697b 100644
--- a/container-search/src/main/resources/configdefinitions/prelude.fastsearch.documentdb-info.def
+++ b/container-search/src/main/resources/configdefinitions/prelude.fastsearch.documentdb-info.def
@@ -1,7 +1,7 @@
 # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 namespace=prelude.fastsearch
 
-## The name of the search definition that this document database info applies to
+## The name of the schema/document database
 documentdb[].name string
 
 ## The id of the summary class
diff --git a/container-search/src/test/java/com/yahoo/prelude/cluster/ClusterSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/cluster/ClusterSearcherTestCase.java
index 801f7da1939..2d62e4c0154 100644
--- a/container-search/src/test/java/com/yahoo/prelude/cluster/ClusterSearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/cluster/ClusterSearcherTestCase.java
@@ -22,6 +22,9 @@ import com.yahoo.search.config.ClusterConfig;
 import com.yahoo.search.dispatch.Dispatcher;
 import com.yahoo.search.dispatch.rpc.RpcResourcePool;
 import com.yahoo.search.result.Hit;
+import com.yahoo.search.schema.RankProfile;
+import com.yahoo.search.schema.Schema;
+import com.yahoo.search.schema.SchemaInfo;
 import com.yahoo.search.searchchain.Execution;
 import com.yahoo.vespa.config.search.DispatchConfig;
 import org.junit.Test;
@@ -81,7 +84,7 @@ public class ClusterSearcherTestCase {
     }
 
     private Set<String> resolve(ClusterSearcher searcher, String query) {
-        return searcher.resolveDocumentTypes(new Query("?query=hello" + query), createIndexFacts());
+        return searcher.resolveSchemas(new Query("?query=hello" + query), createIndexFacts());
     }
 
     @Test
@@ -267,9 +270,15 @@ public class ClusterSearcherTestCase {
                                                       new MyMockSearcher(expectAttributePrefetch),
                                                       new InThreadExecutorService());
         try {
-            cluster.setValidRankProfile("default", documentTypes);
-            cluster.addValidRankProfile("testprofile", "type1");
-            return new Execution(cluster, Execution.Context.createContextStub());
+            List<Schema> schemas = new ArrayList<>();
+            for (String docType : docTypesList) {
+                var schemaBuilder = new Schema.Builder(docType);
+                schemaBuilder.add(new RankProfile.Builder("default").build());
+                if (docType.equals("type1"))
+                    schemaBuilder.add(new RankProfile.Builder("testprofile").build());
+                schemas.add(schemaBuilder.build());
+            }
+            return new Execution(cluster, Execution.Context.createContextStub(new SchemaInfo(schemas, Map.of())));
         } finally {
             cluster.deconstruct();
         }
author	Jon Bratseth <bratseth@gmail.com>	2022-05-02 21:08:52 +0200
committer	GitHub <noreply@github.com>	2022-05-02 21:08:52 +0200
commit	d2066c0a0c04e2aa2ada12a5c85f5eae9ff65b02 (patch)
tree	8a41d51e4bbc420bc1dbc65d76e79e3f8ab423d3
parent	04898b34190a3e3bb9d3053e11eb892bc48ff842 (diff)
parent	33989168358c94ea236e436db672d6be0119be70 (diff)