Merge pull request #30264 from vespa-engine/balder/less-use-of-indexfactsv8.304.20

- Use SchemaInfo instead of Indexfacts.
author: Henning Baldersheim <balder@yahoo-inc.com> 2024-02-13 18:33:59 +0100
committer: GitHub <noreply@github.com> 2024-02-13 18:33:59 +0100
commit: a2f05742236b0873bb6991b0134d6991d0b5d4b2 (patch)
tree: 4c3f264e87506910a72366d39834847da1c3b550
parent: df26263159179ffd110041076f25ab561115091c (diff)
parent: 35a86286c434861c66d53bdea93eac0e82b0734f (diff)
6 files changed, 76 insertions, 63 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index 2304743873f..35d097f23f1 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -8517,6 +8517,7 @@
     ],
     "methods" : [
       "public boolean isStreaming()",
+      "public java.util.Collection schemas()",
       "public java.util.Optional fieldInfo(java.lang.String)",
       "public com.yahoo.tensor.TensorType rankProfileInput(java.lang.String, java.lang.String)"
     ],
diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
index 88cc7ad7b2d..b0456b941f4 100644
--- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java
@@ -8,7 +8,6 @@ import com.yahoo.component.provider.ComponentRegistry;
 import com.yahoo.container.QrSearchersConfig;
 import com.yahoo.container.core.documentapi.VespaDocumentAccess;
 import com.yahoo.container.handler.VipStatus;
-import com.yahoo.prelude.IndexFacts;
 import com.yahoo.prelude.fastsearch.ClusterParams;
 import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig;
 import com.yahoo.prelude.fastsearch.FastSearcher;
@@ -22,6 +21,7 @@ import com.yahoo.search.dispatch.Dispatcher;
 import com.yahoo.search.query.ParameterParser;
 import com.yahoo.search.ranking.GlobalPhaseRanker;
 import com.yahoo.search.result.ErrorMessage;
+import com.yahoo.search.schema.Cluster;
 import com.yahoo.search.schema.SchemaInfo;
 import com.yahoo.search.searchchain.Execution;
 import com.yahoo.vespa.streamingvisitors.StreamingSearcher;
@@ -29,7 +29,6 @@ import com.yahoo.yolean.Exceptions;
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -39,6 +38,7 @@ import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
 import java.util.concurrent.FutureTask;
 import java.util.concurrent.RejectedExecutionException;
+import java.util.stream.Collectors;
 
 import static com.yahoo.container.QrSearchersConfig.Searchcluster.Indexingmode.STREAMING;
 
@@ -59,6 +59,7 @@ public class ClusterSearcher extends Searcher {
 
     // The set of document types contained in this search cluster
     private final Set<String> schemas;
+    private final SchemaInfo schemaInfo;
 
     private final long maxQueryTimeout; // in milliseconds
     private final long maxQueryCacheTimeout; // in milliseconds
@@ -80,6 +81,7 @@ public class ClusterSearcher extends Searcher {
                            VespaDocumentAccess access) {
         super(id);
         this.executor = executor;
+        this.schemaInfo = schemaInfo;
         int searchClusterIndex = clusterConfig.clusterId();
         searchClusterName = clusterConfig.clusterName();
         QrSearchersConfig.Searchcluster searchClusterConfig = getSearchClusterConfigFromClusterName(qrsConfig, searchClusterName);
@@ -156,19 +158,20 @@ public class ClusterSearcher extends Searcher {
     }
 
     /** Do not use, for internal testing purposes only. **/
-    ClusterSearcher(Set<String> schemas, VespaBackEndSearcher searcher, Executor executor) {
-        this.schemas = schemas;
+    ClusterSearcher(SchemaInfo schemaInfo, Set<String> schemas, VespaBackEndSearcher searcher, Executor executor) {
+        this.schemaInfo = schemaInfo;
         searchClusterName = "testScenario";
         maxQueryTimeout = DEFAULT_MAX_QUERY_TIMEOUT;
         maxQueryCacheTimeout = DEFAULT_MAX_QUERY_CACHE_TIMEOUT;
         server = searcher;
         this.executor = executor;
         this.globalPhaseRanker = null;
+        this.schemas = schemas;
     }
 
     /** Do not use, for internal testing purposes only. **/
-    ClusterSearcher(Set<String> schemas) {
-        this(schemas, null, null);
+    ClusterSearcher(SchemaInfo schemaInfo, Set<String> schemas) {
+        this(schemaInfo, schemas, null, null);
     }
 
     @Override
@@ -283,7 +286,7 @@ public class ClusterSearcher extends Searcher {
     }
 
     private Result searchMultipleDocumentTypes(Searcher searcher, Query query, Execution execution) {
-        Set<String> schemas = resolveSchemas(query, execution.context().getIndexFacts());
+        Set<String> schemas = resolveSchemas(query);
         List<Query> queries = createQueries(query, schemas);
         if (queries.size() == 1) {
             return perSchemaSearch(searcher, queries.get(0), execution);
@@ -316,13 +319,24 @@ public class ClusterSearcher extends Searcher {
         }
     }
 
-    Set<String> resolveSchemas(Query query, IndexFacts indexFacts) {
+    private Set<String> resolveSourceSubset(Set<String> sources) {
+        Set<String> candidates = new HashSet<>();
+        for (String source : sources) {
+            Cluster cluster = schemaInfo.clusters().get(source);
+            if (cluster != null)
+                candidates.addAll(cluster.schemas());
+        }
+        return (candidates.isEmpty() ? sources : candidates).stream()
+                .filter(schemas::contains).collect(Collectors.toUnmodifiableSet());
+    }
+
+    Set<String> resolveSchemas(Query query) {
         Set<String> restrict = query.getModel().getRestrict();
         if (restrict == null || restrict.isEmpty()) {
             Set<String> sources = query.getModel().getSources();
             return (sources == null || sources.isEmpty())
                     ? schemas
-                    : new HashSet<>(indexFacts.newSession(sources, Collections.emptyList(), schemas).documentTypes());
+                    : resolveSourceSubset(sources);
         } else {
             return filterValidDocumentTypes(restrict);
         }
diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackEndSearcher.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackEndSearcher.java
index 5dab9d2988f..2e635d21f01 100644
--- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackEndSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackEndSearcher.java
@@ -9,7 +9,6 @@ import com.yahoo.prelude.query.Item;
 import com.yahoo.prelude.query.NullItem;
 import com.yahoo.prelude.query.textualrepresentation.TextualQueryRepresentation;
 import com.yahoo.prelude.querytransform.QueryRewrite;
-import com.yahoo.processing.request.CompoundName;
 import com.yahoo.protect.Validator;
 import com.yahoo.search.Query;
 import com.yahoo.search.Result;
@@ -223,7 +222,7 @@ public abstract class VespaBackEndSearcher extends PingableSearcher {
         if (result.isFilled(summaryClass)) return; // TODO: Checked in the superclass - remove
 
         List<Result> parts = partitionHits(result, summaryClass);
-        if (parts.size() > 0) { // anything to fill at all?
+        if (!parts.isEmpty()) { // anything to fill at all?
             for (Result r : parts) {
                 doPartialFill(r, summaryClass);
                 mergeErrorsInto(result, r);
@@ -379,11 +378,6 @@ public abstract class VespaBackEndSearcher extends PingableSearcher {
         return new FillHitsResult(skippedHits, lastError);
     }
 
-    protected DocsumDefinitionSet getDocsumDefinitionSet(Query query) {
-        DocumentDatabase db = getDocumentDatabase(query);
-        return db.getDocsumDefinitionSet();
-    }
-
     private String decodeSummary(String summaryClass, FastHit hit, byte[] docsumdata) {
         DocumentDatabase db = getDocumentDatabase(hit.getQuery());
         hit.setField(Hit.SDDOCNAME_FIELD, db.schema().name());
diff --git a/container-search/src/main/java/com/yahoo/search/schema/SchemaInfo.java b/container-search/src/main/java/com/yahoo/search/schema/SchemaInfo.java
index bbd303039cf..263fa4058c7 100644
--- a/container-search/src/main/java/com/yahoo/search/schema/SchemaInfo.java
+++ b/container-search/src/main/java/com/yahoo/search/schema/SchemaInfo.java
@@ -5,11 +5,9 @@ import com.yahoo.api.annotations.Beta;
 import com.yahoo.component.annotation.Inject;
 import com.yahoo.container.QrSearchersConfig;
 import com.yahoo.search.Query;
-import com.yahoo.search.config.IndexInfoConfig;
 import com.yahoo.search.config.SchemaInfoConfig;
 import com.yahoo.tensor.TensorType;
 
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
@@ -65,7 +63,7 @@ public class SchemaInfo {
     /** Returns all schemas configured in this application, indexed by schema name. */
     public Map<String, Schema> schemas() { return schemas; }
 
-    /** Returns information about all clusters available for searching in this applications, indexed by cluyster name. */
+    /** Returns information about all clusters available for searching in this application, indexed by cluster name. */
     public Map<String, Cluster> clusters() { return clusters; }
 
     public Session newSession(Query query) {
@@ -103,6 +101,8 @@ public class SchemaInfo {
         /** Returns true if this only searches streaming clusters. */
         public boolean isStreaming() { return isStreaming; }
 
+        public Collection<Schema> schemas() { return schemas; }
+
         /**
          * Looks up a field or field set by the given name or alias
          * in the schemas resolved for this query.
diff --git a/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java b/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java
index 84cf1744e27..b70f5145e56 100644
--- a/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java
+++ b/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java
@@ -16,7 +16,7 @@ import java.util.List;
 class SchemaInfoConfigurer {
 
     static List<Schema> toSchemas(SchemaInfoConfig schemaInfoConfig) {
-        return schemaInfoConfig.schema().stream().map(config -> toSchema(config)).toList();
+        return schemaInfoConfig.schema().stream().map(SchemaInfoConfigurer::toSchema).toList();
     }
 
     static Schema toSchema(SchemaInfoConfig.Schema schemaInfoConfig) {
diff --git a/container-search/src/test/java/com/yahoo/prelude/cluster/ClusterSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/cluster/ClusterSearcherTestCase.java
index f2ce555a068..8d4e3364ce4 100644
--- a/container-search/src/test/java/com/yahoo/prelude/cluster/ClusterSearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/cluster/ClusterSearcherTestCase.java
@@ -7,9 +7,6 @@ import com.yahoo.concurrent.InThreadExecutorService;
 import com.yahoo.container.QrSearchersConfig;
 import com.yahoo.container.handler.ClustersStatus;
 import com.yahoo.container.handler.VipStatus;
-import com.yahoo.prelude.IndexFacts;
-import com.yahoo.prelude.IndexModel;
-import com.yahoo.prelude.SearchDefinition;
 import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig;
 import com.yahoo.prelude.fastsearch.FastHit;
 import com.yahoo.prelude.fastsearch.VespaBackEndSearcher;
@@ -18,6 +15,7 @@ import com.yahoo.search.Result;
 import com.yahoo.search.config.ClusterConfig;
 import com.yahoo.search.dispatch.Dispatcher;
 import com.yahoo.search.result.Hit;
+import com.yahoo.search.schema.Cluster;
 import com.yahoo.search.schema.RankProfile;
 import com.yahoo.search.schema.Schema;
 import com.yahoo.search.schema.SchemaInfo;
@@ -36,6 +34,7 @@ import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Stream;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -54,7 +53,7 @@ public class ClusterSearcherTestCase {
 
     @Test
     void testNoBackends() {
-        ClusterSearcher cluster = new ClusterSearcher(new LinkedHashSet<>(List.of("dummy")));
+        ClusterSearcher cluster = new ClusterSearcher(createSchemaInfo(), Set.of("dummy"));
         try {
             Execution execution = new Execution(cluster, Execution.Context.createContextStub());
             Query query = new Query("query=hello");
@@ -67,55 +66,58 @@ public class ClusterSearcherTestCase {
         }
     }
 
-    private IndexFacts createIndexFacts() {
-        Map<String, List<String>> clusters = new LinkedHashMap<>();
-        clusters.put("cluster1", List.of("type1", "type2", "type3"));
-        clusters.put("cluster2", List.of("type4", "type5"));
-        clusters.put("type1", List.of("type6"));
-        Collection<SearchDefinition> searchDefs = List.of(
-                new SearchDefinition("type1"),
-                new SearchDefinition("type2"),
-                new SearchDefinition("type3"),
-                new SearchDefinition("type4"),
-                new SearchDefinition("type5"),
-                new SearchDefinition("type6"));
-        return new IndexFacts(new IndexModel(clusters, searchDefs));
+    private static SchemaInfo createSchemaInfo() {
+        var schemas = Stream.of("type1", "type2", "type3", "type4", "type5", "type6")
+                .map(name -> new Schema.Builder(name).build()).toList();
+        var clusters = List.of(new Cluster.Builder("cluster1").addSchema("type1").addSchema("type2").addSchema("type3").build(),
+                new Cluster.Builder("cluster2").addSchema("type4").addSchema("type5").build(),
+                new Cluster.Builder("type1").addSchema("type6").build());
+        return new SchemaInfo(schemas, clusters);
     }
 
     private Set<String> resolve(ClusterSearcher searcher, String query) {
-        return searcher.resolveSchemas(new Query("?query=hello" + query), createIndexFacts());
+        return searcher.resolveSchemas(new Query("?query=hello" + query));
+    }
+
+    private static SchemaInfo toSchemaInfo(Collection<String> schemaNames, String clusterName) {
+        Cluster.Builder clusterBuilder = new Cluster.Builder(clusterName);
+
+        schemaNames.forEach(clusterBuilder::addSchema);
+        return new SchemaInfo(schemaNames.stream().map(name -> new Schema.Builder(name).build()).toList(),
+                              List.of(clusterBuilder.build()));
     }
 
     @Test
     void testThatDocumentTypesAreResolved() {
-        ClusterSearcher cluster1 = new ClusterSearcher(new LinkedHashSet<>(List.of("type1", "type2", "type3")));
+        SchemaInfo schemaInfo = createSchemaInfo();
+        ClusterSearcher cluster1 = new ClusterSearcher(schemaInfo, Set.of("type1", "type2", "type3"));
         try {
-            ClusterSearcher type1 = new ClusterSearcher(new LinkedHashSet<>(List.of("type6")));
+            ClusterSearcher type1 = new ClusterSearcher(schemaInfo, Set.of("type6"));
             try {
-                assertEquals(new LinkedHashSet<>(List.of("type1", "type2", "type3")), resolve(cluster1, ""));
-                assertEquals(new LinkedHashSet<>(List.of("type6")), resolve(type1, ""));
+                assertEquals(Set.of("type1", "type2", "type3"), resolve(cluster1, ""));
+                assertEquals(Set.of("type6"), resolve(type1, ""));
                 { // specify restrict
-                    assertEquals(new LinkedHashSet<>(List.of("type1")), resolve(cluster1, "&restrict=type1"));
-                    assertEquals(new LinkedHashSet<>(List.of("type2")), resolve(cluster1, "&restrict=type2"));
-                    assertEquals(new LinkedHashSet<>(List.of("type2", "type3")), resolve(cluster1, "&restrict=type2,type3"));
-                    assertEquals(new LinkedHashSet<>(List.of("type2")), resolve(cluster1, "&restrict=type2,type4"));
-                    assertEquals(new LinkedHashSet<>(List.of()), resolve(cluster1, "&restrict=type4"));
+                    assertEquals(Set.of("type1"), resolve(cluster1, "&restrict=type1"));
+                    assertEquals(Set.of("type2"), resolve(cluster1, "&restrict=type2"));
+                    assertEquals(Set.of("type2", "type3"), resolve(cluster1, "&restrict=type2,type3"));
+                    assertEquals(Set.of("type2"), resolve(cluster1, "&restrict=type2,type4"));
+                    assertEquals(Set.of(), resolve(cluster1, "&restrict=type4"));
                 }
                 { // specify sources
-                    assertEquals(new LinkedHashSet<>(List.of("type1", "type2", "type3")), resolve(cluster1, "&sources=cluster1"));
-                    assertEquals(new LinkedHashSet<>(List.of()), resolve(cluster1, "&sources=cluster2"));
-                    assertEquals(new LinkedHashSet<>(List.of()), resolve(cluster1, "&sources=type1"));
-                    assertEquals(new LinkedHashSet<>(List.of("type6")), resolve(type1, "&sources=type1"));
-                    assertEquals(new LinkedHashSet<>(List.of("type2")), resolve(cluster1, "&sources=type2"));
-                    assertEquals(new LinkedHashSet<>(List.of("type2", "type3")), resolve(cluster1, "&sources=type2,type3"));
-                    assertEquals(new LinkedHashSet<>(List.of("type2")), resolve(cluster1, "&sources=type2,type4"));
-                    assertEquals(new LinkedHashSet<>(List.of()), resolve(cluster1, "&sources=type4"));
+                    assertEquals(Set.of("type1", "type2", "type3"), resolve(cluster1, "&sources=cluster1"));
+                    assertEquals(Set.of(), resolve(cluster1, "&sources=cluster2"));
+                    assertEquals(Set.of(), resolve(cluster1, "&sources=type1"));
+                    assertEquals(Set.of("type6"), resolve(type1, "&sources=type1"));
+                    assertEquals(Set.of("type2"), resolve(cluster1, "&sources=type2"));
+                    assertEquals(Set.of("type2", "type3"), resolve(cluster1, "&sources=type2,type3"));
+                    assertEquals(Set.of("type2"), resolve(cluster1, "&sources=type2,type4"));
+                    assertEquals(Set.of(), resolve(cluster1, "&sources=type4"));
                 }
                 { // specify both
-                    assertEquals(new LinkedHashSet<>(List.of("type1")), resolve(cluster1, "&sources=cluster1&restrict=type1"));
-                    assertEquals(new LinkedHashSet<>(List.of("type2")), resolve(cluster1, "&sources=cluster1&restrict=type2"));
-                    assertEquals(new LinkedHashSet<>(List.of("type2", "type3")), resolve(cluster1, "&sources=cluster1&restrict=type2,type3"));
-                    assertEquals(new LinkedHashSet<>(List.of("type2")), resolve(cluster1, "&sources=cluster2&restrict=type2"));
+                    assertEquals(Set.of("type1"), resolve(cluster1, "&sources=cluster1&restrict=type1"));
+                    assertEquals(Set.of("type2"), resolve(cluster1, "&sources=cluster1&restrict=type2"));
+                    assertEquals(Set.of("type2", "type3"), resolve(cluster1, "&sources=cluster1&restrict=type2,type3"));
+                    assertEquals(Set.of("type2"), resolve(cluster1, "&sources=cluster2&restrict=type2"));
                 }
             } finally {
                 type1.deconstruct();
@@ -127,11 +129,12 @@ public class ClusterSearcherTestCase {
 
     @Test
     void testThatDocumentTypesAreResolvedTODO_REMOVE() {
-        ClusterSearcher cluster1 = new ClusterSearcher(new LinkedHashSet<>(List.of("type1", "type2", "type3")));
+        SchemaInfo schemaInfo = createSchemaInfo();
+        ClusterSearcher cluster1 = new ClusterSearcher(schemaInfo, Set.of("type1", "type2", "type3"));
         try {
-            ClusterSearcher type1 = new ClusterSearcher(new LinkedHashSet<>(List.of("type6")));
+            ClusterSearcher type1 = new ClusterSearcher(schemaInfo, Set.of("type6"));
             try {
-                assertEquals(new LinkedHashSet<>(List.of()), resolve(cluster1, "&sources=cluster2"));
+                assertEquals(Set.of(), resolve(cluster1, "&sources=cluster2"));
             } finally {
                 type1.deconstruct();
             }
@@ -265,7 +268,8 @@ public class ClusterSearcherTestCase {
 
     private Execution createExecution(List<String> docTypesList, boolean expectAttributePrefetch) {
         Set<String> documentTypes = new LinkedHashSet<>(docTypesList);
-        ClusterSearcher cluster = new ClusterSearcher(documentTypes,
+        ClusterSearcher cluster = new ClusterSearcher(toSchemaInfo(documentTypes, "mycluster"),
+                                                      documentTypes,
                                                       new MyMockSearcher(expectAttributePrefetch),
                                                       new InThreadExecutorService());
         try {
author	Henning Baldersheim <balder@yahoo-inc.com>	2024-02-13 18:33:59 +0100
committer	GitHub <noreply@github.com>	2024-02-13 18:33:59 +0100
commit	a2f05742236b0873bb6991b0134d6991d0b5d4b2 (patch)
tree	4c3f264e87506910a72366d39834847da1c3b550
parent	df26263159179ffd110041076f25ab561115091c (diff)
parent	35a86286c434861c66d53bdea93eac0e82b0734f (diff)