diff options
24 files changed, 374 insertions, 333 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/AttributeFields.java b/config-model/src/main/java/com/yahoo/schema/derived/AttributeFields.java index 57e5097556a..e747235dc3c 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/AttributeFields.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/AttributeFields.java @@ -16,7 +16,6 @@ import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.Map; -import java.util.stream.Collectors; import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isArrayOfSimpleStruct; import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfPrimitiveType; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/LogserverContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/LogserverContainerCluster.java index 78dd6213e21..654c81f0519 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/LogserverContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/LogserverContainerCluster.java @@ -6,10 +6,15 @@ import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.search.config.QrStartConfig; import com.yahoo.vespa.model.container.ContainerCluster; +import com.yahoo.vespa.model.container.PlatformBundles; import com.yahoo.vespa.model.container.component.Handler; import com.yahoo.vespa.model.container.component.SystemBindingPattern; +import java.nio.file.Path; import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * @author hmusum @@ -40,4 +45,11 @@ public class LogserverContainerCluster extends ContainerCluster<LogserverContain addComponent(logHandler); } + @Override + protected Set<Path> unnecessaryPlatformBundles() { + return Stream.concat(PlatformBundles.VESPA_SECURITY_BUNDLES.stream(), + PlatformBundles.VESPA_ZK_BUNDLES.stream()) + .collect(Collectors.toSet()); + } + } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java b/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java index 0f09036b544..13e6ee6684d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java @@ -59,6 +59,7 @@ public class PlatformBundles { SEARCH_AND_DOCPROC_BUNDLE, "docprocs", LINGUISTICS_BUNDLE_NAME, + "lucene-linguistics", EVALUATION_BUNDLE_NAME, INTEGRATION_BUNDLE_NAME, ONNXRUNTIME_BUNDLE_NAME diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/searchchain/LocalProvider.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/searchchain/LocalProvider.java index 5cf3ce1d306..dfef6eac3b3 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/searchchain/LocalProvider.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/searchchain/LocalProvider.java @@ -72,8 +72,7 @@ public class LocalProvider extends Provider implements @Override public ChainSpecification getChainSpecification() { - ChainSpecification spec = - super.getChainSpecification(); + ChainSpecification spec = super.getChainSpecification(); return new ChainSpecification(spec.componentId, spec.inheritance, spec.phases(), disableStemmingIfStreaming(spec.componentReferences)); } @@ -84,10 +83,9 @@ public class LocalProvider extends Provider implements return searcherReferences; } else { Set<ComponentSpecification> filteredSearcherReferences = new LinkedHashSet<>(searcherReferences); - filteredSearcherReferences.remove( - toGlobalComponentId( - new ComponentId("com.yahoo.prelude.querytransform.StemmingSearcher")). - toSpecification()); + filteredSearcherReferences + .remove(toGlobalComponentId(new ComponentId("com.yahoo.prelude.querytransform.StemmingSearcher")) + .toSpecification()); return filteredSearcherReferences; } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java index fc5b5c25e6d..cd3103d20ab 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java @@ -398,9 +398,8 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer> if (hasIndexingModeStreaming(type)) { hasAnyNonIndexedCluster = true; - ddbB.inputdoctypename(type.getFullName().getName()) - .configid(findStreamingCluster(docTypeName).get().getDocumentDBConfigId()) - .mode(ProtonConfig.Documentdb.Mode.Enum.STREAMING); + findStreamingCluster(docTypeName).get().fillDocumentDBConfig(type.getFullName().getName(), ddbB); + ddbB.mode(ProtonConfig.Documentdb.Mode.Enum.STREAMING); } else if (hasIndexingModeIndexed(type)) { getIndexed().fillDocumentDBConfig(type.getFullName().getName(), ddbB); } else { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/DocumentDatabase.java b/config-model/src/main/java/com/yahoo/vespa/model/search/DocumentDatabase.java index 1494eae7426..32ac5d0f4ed 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/DocumentDatabase.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/DocumentDatabase.java @@ -15,6 +15,8 @@ import com.yahoo.vespa.config.search.core.OnnxModelsConfig; import com.yahoo.vespa.config.search.core.RankingConstantsConfig; import com.yahoo.vespa.config.search.core.RankingExpressionsConfig; import com.yahoo.vespa.config.search.summary.JuniperrcConfig; +import com.yahoo.vespa.config.search.vsm.VsmfieldsConfig; +import com.yahoo.vespa.config.search.vsm.VsmsummaryConfig; import com.yahoo.vespa.configdefinition.IlscriptsConfig; /** @@ -34,7 +36,10 @@ public class DocumentDatabase extends AnyConfigProducer implements JuniperrcConfig.Producer, SummaryConfig.Producer, ImportedFieldsConfig.Producer, - SchemaInfoConfig.Producer { + SchemaInfoConfig.Producer, + VsmsummaryConfig.Producer, + VsmfieldsConfig.Producer +{ private final String schemaName; private final DerivedConfiguration derivedCfg; @@ -56,41 +61,19 @@ public class DocumentDatabase extends AnyConfigProducer implements public DerivedConfiguration getDerivedConfiguration() { return derivedCfg; } - - @Override - public void getConfig(IndexInfoConfig.Builder builder) { derivedCfg.getIndexInfo().getConfig(builder); } - - @Override - public void getConfig(IlscriptsConfig.Builder builder) { derivedCfg.getIndexingScript().getConfig(builder); } - - @Override - public void getConfig(AttributesConfig.Builder builder) { derivedCfg.getConfig(builder); } - - @Override - public void getConfig(RankProfilesConfig.Builder builder) { derivedCfg.getRankProfileList().getConfig(builder); } - - @Override - public void getConfig(RankingExpressionsConfig.Builder builder) { derivedCfg.getRankProfileList().getConfig(builder); } - - @Override - public void getConfig(RankingConstantsConfig.Builder builder) { derivedCfg.getRankProfileList().getConfig(builder); } - - @Override - public void getConfig(OnnxModelsConfig.Builder builder) { derivedCfg.getRankProfileList().getConfig(builder); } - - @Override - public void getConfig(IndexschemaConfig.Builder builder) { derivedCfg.getIndexSchema().getConfig(builder); } - - @Override - public void getConfig(JuniperrcConfig.Builder builder) { derivedCfg.getJuniperrc().getConfig(builder); } - - @Override - public void getConfig(SummaryConfig.Builder builder) { derivedCfg.getSummaries().getConfig(builder); } - - @Override - public void getConfig(ImportedFieldsConfig.Builder builder) { derivedCfg.getImportedFields().getConfig(builder); } - - @Override - public void getConfig(SchemaInfoConfig.Builder builder) { derivedCfg.getSchemaInfo().getConfig(builder); } + @Override public void getConfig(IndexInfoConfig.Builder builder) { derivedCfg.getIndexInfo().getConfig(builder); } + @Override public void getConfig(IlscriptsConfig.Builder builder) { derivedCfg.getIndexingScript().getConfig(builder); } + @Override public void getConfig(AttributesConfig.Builder builder) { derivedCfg.getConfig(builder); } + @Override public void getConfig(RankProfilesConfig.Builder builder) { derivedCfg.getRankProfileList().getConfig(builder); } + @Override public void getConfig(RankingExpressionsConfig.Builder builder) { derivedCfg.getRankProfileList().getConfig(builder); } + @Override public void getConfig(RankingConstantsConfig.Builder builder) { derivedCfg.getRankProfileList().getConfig(builder); } + @Override public void getConfig(OnnxModelsConfig.Builder builder) { derivedCfg.getRankProfileList().getConfig(builder); } + @Override public void getConfig(IndexschemaConfig.Builder builder) { derivedCfg.getIndexSchema().getConfig(builder); } + @Override public void getConfig(JuniperrcConfig.Builder builder) { derivedCfg.getJuniperrc().getConfig(builder); } + @Override public void getConfig(SummaryConfig.Builder builder) { derivedCfg.getSummaries().getConfig(builder); } + @Override public void getConfig(ImportedFieldsConfig.Builder builder) { derivedCfg.getImportedFields().getConfig(builder); } + @Override public void getConfig(SchemaInfoConfig.Builder builder) { derivedCfg.getSchemaInfo().getConfig(builder); } + @Override public void getConfig(VsmsummaryConfig.Builder builder) { derivedCfg.getVsmSummary().getConfig(builder); } + @Override public void getConfig(VsmfieldsConfig.Builder builder) { derivedCfg.getVsmFields().getConfig(builder); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java index e20c294d135..b51185ddac2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java @@ -1,31 +1,22 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.search; -import com.yahoo.config.ConfigInstance; import com.yahoo.config.model.api.ModelContext; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.producer.AnyConfigProducer; import com.yahoo.config.model.producer.TreeConfigProducer; -import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; import com.yahoo.schema.DocumentOnlySchema; import com.yahoo.schema.derived.DerivedConfiguration; import com.yahoo.schema.derived.SchemaInfo; -import com.yahoo.search.config.IndexInfoConfig; -import com.yahoo.search.config.SchemaInfoConfig; -import com.yahoo.vespa.config.search.AttributesConfig; import com.yahoo.vespa.config.search.DispatchConfig; import com.yahoo.vespa.config.search.DispatchConfig.DistributionPolicy; import com.yahoo.vespa.config.search.DispatchNodesConfig; -import com.yahoo.vespa.config.search.RankProfilesConfig; -import com.yahoo.vespa.config.search.core.ProtonConfig; -import com.yahoo.vespa.configdefinition.IlscriptsConfig; import com.yahoo.vespa.model.content.DispatchTuning; import com.yahoo.vespa.model.content.Redundancy; import com.yahoo.vespa.model.content.SearchCoverage; import java.util.ArrayList; import java.util.Collections; -import java.util.LinkedList; import java.util.List; /** @@ -35,12 +26,9 @@ public class IndexedSearchCluster extends SearchCluster implements DispatchConfig.Producer, DispatchNodesConfig.Producer { - private Tuning tuning; private SearchCoverage searchCoverage; - private final List<DocumentDatabase> documentDbs = new LinkedList<>(); - private final Redundancy.Provider redundancyProvider; private final List<SearchNode> searchNodes = new ArrayList<>(); @@ -72,80 +60,20 @@ public class IndexedSearchCluster extends SearchCluster implements } public Tuning getTuning() { return tuning; } - public void fillDocumentDBConfig(String documentType, ProtonConfig.Documentdb.Builder builder) { - for (DocumentDatabase sdoc : documentDbs) { - if (sdoc.getName().equals(documentType)) { - fillDocumentDBConfig(sdoc, builder); - return; - } - } - } - - private void fillDocumentDBConfig(DocumentDatabase sdoc, ProtonConfig.Documentdb.Builder ddbB) { - ddbB.inputdoctypename(sdoc.getSchemaName()) - .configid(sdoc.getConfigId()); - } - @Override public void deriveFromSchemas(DeployState deployState) { for (SchemaInfo spec : schemas().values()) { if (spec.fullSchema() instanceof DocumentOnlySchema) continue; - DocumentDatabase db = new DocumentDatabase(this, spec.fullSchema().getName(), - new DerivedConfiguration(spec.fullSchema(), deployState, false)); - documentDbs.add(db); + var db = new DocumentDatabase(this, spec.fullSchema().getName(), + new DerivedConfiguration(spec.fullSchema(), deployState, false)); + add(db); } } - @Override - public List<DocumentDatabase> getDocumentDbs() { - return documentDbs; - } - - public boolean hasDocumentDB(String name) { - for (DocumentDatabase db : documentDbs) { - if (db.getName().equals(name)) { - return true; - } - } - return false; - } - public void setSearchCoverage(SearchCoverage searchCoverage) { this.searchCoverage = searchCoverage; } - @Override - public void getConfig(DocumentdbInfoConfig.Builder builder) { - for (DocumentDatabase db : documentDbs) { - DocumentdbInfoConfig.Documentdb.Builder docDb = new DocumentdbInfoConfig.Documentdb.Builder(); - docDb.name(db.getName()); - builder.documentdb(docDb); - } - } - - @Override - public void getConfig(IndexInfoConfig.Builder builder) { - new Join(documentDbs).getConfig(builder); - } - - @Override - public void getConfig(SchemaInfoConfig.Builder builder) { - new Join(documentDbs).getConfig(builder); - } - - @Override - public void getConfig(IlscriptsConfig.Builder builder) { - new Join(documentDbs).getConfig(builder); - } - - public void getConfig(AttributesConfig.Builder builder) { - new Join(documentDbs).getConfig(builder); - } - - public void getConfig(RankProfilesConfig.Builder builder) { - new Join(documentDbs).getConfig(builder); - } - private static DistributionPolicy.Enum toDistributionPolicy(DispatchTuning.DispatchPolicy tuning) { return switch (tuning) { case ADAPTIVE: yield DistributionPolicy.ADAPTIVE; @@ -207,44 +135,4 @@ public class IndexedSearchCluster extends SearchCluster implements return "Indexing cluster '" + getClusterName() + "'"; } - /** - * Class used to retrieve combined configuration from multiple document databases. - * It is not a direct {@link ConfigInstance.Producer} of those configs, - * that is handled (by delegating to this) by the {@link IndexedSearchCluster} - * which is the parent to this. This avoids building the config multiple times. - */ - private record Join(List<DocumentDatabase> docDbs) { - - public void getConfig(IndexInfoConfig.Builder builder) { - for (DocumentDatabase docDb : docDbs) { - docDb.getConfig(builder); - } - } - - public void getConfig(SchemaInfoConfig.Builder builder) { - for (DocumentDatabase docDb : docDbs) { - docDb.getConfig(builder); - } - } - - public void getConfig(IlscriptsConfig.Builder builder) { - for (DocumentDatabase docDb : docDbs) { - docDb.getConfig(builder); - } - } - - public void getConfig(AttributesConfig.Builder builder) { - for (DocumentDatabase docDb : docDbs) { - docDb.getConfig(builder); - } - } - - public void getConfig(RankProfilesConfig.Builder builder) { - for (DocumentDatabase docDb : docDbs) { - docDb.getConfig(builder); - } - } - - } - } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/SearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/SearchCluster.java index 2ab11e31f59..732b4ba0637 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/SearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/SearchCluster.java @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.search; +import com.yahoo.config.ConfigInstance; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.search.config.SchemaInfoConfig; import com.yahoo.schema.derived.SchemaInfo; @@ -8,12 +9,14 @@ import com.yahoo.vespa.config.search.AttributesConfig; import com.yahoo.vespa.config.search.RankProfilesConfig; import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; import com.yahoo.search.config.IndexInfoConfig; +import com.yahoo.vespa.config.search.core.ProtonConfig; import com.yahoo.vespa.configdefinition.IlscriptsConfig; import com.yahoo.config.model.producer.AnyConfigProducer; import com.yahoo.config.model.producer.TreeConfigProducer; import java.util.Collections; import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -22,8 +25,7 @@ import java.util.Map; * * @author arnej27959 */ -public abstract class SearchCluster extends TreeConfigProducer<AnyConfigProducer> - implements +public abstract class SearchCluster extends TreeConfigProducer<AnyConfigProducer> implements DocumentdbInfoConfig.Producer, IndexInfoConfig.Producer, IlscriptsConfig.Producer, @@ -34,6 +36,7 @@ public abstract class SearchCluster extends TreeConfigProducer<AnyConfigProducer private Double queryTimeout; private Double visibilityDelay = 0.0; private final Map<String, SchemaInfo> schemas = new LinkedHashMap<>(); + private final List<DocumentDatabase> documentDbs = new LinkedList<>(); public SearchCluster(TreeConfigProducer<?> parent, String clusterName, int index) { super(parent, "cluster." + clusterName); @@ -44,6 +47,18 @@ public abstract class SearchCluster extends TreeConfigProducer<AnyConfigProducer public void add(SchemaInfo schema) { schemas.put(schema.name(), schema); } + public void add(DocumentDatabase db) { + documentDbs.add(db); + } + + public boolean hasDocumentDB(String name) { + for (DocumentDatabase db : documentDbs) { + if (db.getName().equals(name)) { + return true; + } + } + return false; + } /** Returns the schemas that should be active in this cluster. Note: These are added during processing. */ public Map<String, SchemaInfo> schemas() { return Collections.unmodifiableMap(schemas); } @@ -56,7 +71,9 @@ public abstract class SearchCluster extends TreeConfigProducer<AnyConfigProducer public abstract void deriveFromSchemas(DeployState deployState); /** Returns the document databases contained in this cluster */ - public abstract List<DocumentDatabase> getDocumentDbs(); + public List<DocumentDatabase> getDocumentDbs() { + return Collections.unmodifiableList(documentDbs); + } public String getClusterName() { return clusterName; } public final String getIndexingModeName() { return getIndexingMode().getName(); } @@ -71,9 +88,50 @@ public abstract class SearchCluster extends TreeConfigProducer<AnyConfigProducer public final void setClusterIndex(int index) { this.index = index; } public final int getClusterIndex() { return index; } - public abstract void getConfig(AttributesConfig.Builder builder); + public void fillDocumentDBConfig(String documentType, ProtonConfig.Documentdb.Builder builder) { + for (DocumentDatabase sdoc : documentDbs) { + if (sdoc.getName().equals(documentType)) { + fillDocumentDBConfig(sdoc, builder); + return; + } + } + } + + protected void fillDocumentDBConfig(DocumentDatabase sdoc, ProtonConfig.Documentdb.Builder ddbB) { + ddbB.inputdoctypename(sdoc.getSchemaName()) + .configid(sdoc.getConfigId()); + } + + @Override + public void getConfig(DocumentdbInfoConfig.Builder builder) { + for (DocumentDatabase db : documentDbs) { + DocumentdbInfoConfig.Documentdb.Builder docDb = new DocumentdbInfoConfig.Documentdb.Builder(); + docDb.name(db.getName()); + builder.documentdb(docDb); + } + } + @Override + public void getConfig(IndexInfoConfig.Builder builder) { + new Join(documentDbs).getConfig(builder); + } - public abstract void getConfig(RankProfilesConfig.Builder builder); + @Override + public void getConfig(SchemaInfoConfig.Builder builder) { + new Join(documentDbs).getConfig(builder); + } + + @Override + public void getConfig(IlscriptsConfig.Builder builder) { + new Join(documentDbs).getConfig(builder); + } + + public void getConfig(AttributesConfig.Builder builder) { + new Join(documentDbs).getConfig(builder); + } + + public void getConfig(RankProfilesConfig.Builder builder) { + new Join(documentDbs).getConfig(builder); + } @Override public String toString() { return "search-capable cluster '" + clusterName + "'"; } @@ -96,4 +154,44 @@ public abstract class SearchCluster extends TreeConfigProducer<AnyConfigProducer } } + /** + * Class used to retrieve combined configuration from multiple document databases. + * It is not a direct {@link ConfigInstance.Producer} of those configs, + * that is handled (by delegating to this) by the {@link IndexedSearchCluster} + * which is the parent to this. This avoids building the config multiple times. + */ + private record Join(List<DocumentDatabase> docDbs) { + + public void getConfig(IndexInfoConfig.Builder builder) { + for (DocumentDatabase docDb : docDbs) { + docDb.getConfig(builder); + } + } + + public void getConfig(SchemaInfoConfig.Builder builder) { + for (DocumentDatabase docDb : docDbs) { + docDb.getConfig(builder); + } + } + + public void getConfig(IlscriptsConfig.Builder builder) { + for (DocumentDatabase docDb : docDbs) { + docDb.getConfig(builder); + } + } + + public void getConfig(AttributesConfig.Builder builder) { + for (DocumentDatabase docDb : docDbs) { + docDb.getConfig(builder); + } + } + + public void getConfig(RankProfilesConfig.Builder builder) { + for (DocumentDatabase docDb : docDbs) { + docDb.getConfig(builder); + } + } + + } + } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/StreamingSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/StreamingSearchCluster.java index 90f74af868b..b4e09c59079 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/StreamingSearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/StreamingSearchCluster.java @@ -4,23 +4,19 @@ package com.yahoo.vespa.model.search; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.producer.AnyConfigProducer; import com.yahoo.config.model.producer.TreeConfigProducer; -import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; import com.yahoo.schema.Schema; import com.yahoo.schema.derived.AttributeFields; import com.yahoo.schema.derived.DerivedConfiguration; -import com.yahoo.search.config.IndexInfoConfig; -import com.yahoo.search.config.SchemaInfoConfig; import com.yahoo.vespa.config.search.AttributesConfig; import com.yahoo.vespa.config.search.RankProfilesConfig; import com.yahoo.vespa.config.search.SummaryConfig; import com.yahoo.vespa.config.search.core.OnnxModelsConfig; +import com.yahoo.vespa.config.search.core.ProtonConfig; import com.yahoo.vespa.config.search.core.RankingConstantsConfig; import com.yahoo.vespa.config.search.core.RankingExpressionsConfig; +import com.yahoo.vespa.config.search.summary.JuniperrcConfig; import com.yahoo.vespa.config.search.vsm.VsmfieldsConfig; import com.yahoo.vespa.config.search.vsm.VsmsummaryConfig; -import com.yahoo.vespa.configdefinition.IlscriptsConfig; - -import java.util.List; /** * A search cluster of type streaming. @@ -29,49 +25,35 @@ import java.util.List; * @author vegardh */ public class StreamingSearchCluster extends SearchCluster implements + AttributesConfig.Producer, RankProfilesConfig.Producer, RankingConstantsConfig.Producer, RankingExpressionsConfig.Producer, OnnxModelsConfig.Producer, + JuniperrcConfig.Producer, + SummaryConfig.Producer, VsmsummaryConfig.Producer, - VsmfieldsConfig.Producer, - SummaryConfig.Producer + VsmfieldsConfig.Producer { - private final String storageRouteSpec; private final AttributesProducer attributesConfig; private final String docTypeName; - private DerivedConfiguration derivedConfig = null; - private DocumentDatabase derivedDb = null; - - public StreamingSearchCluster(TreeConfigProducer<AnyConfigProducer> parent, - String clusterName, - int index, - String docTypeName, - String storageRouteSpec) { + + public StreamingSearchCluster(TreeConfigProducer<AnyConfigProducer> parent, String clusterName, int index, + String docTypeName, String storageRouteSpec) { super(parent, clusterName, index); attributesConfig = new AttributesProducer(parent, docTypeName); this.docTypeName = docTypeName; this.storageRouteSpec = storageRouteSpec; } - public final String getDocumentDBConfigId() { - return attributesConfig.getConfigId(); - } @Override protected IndexingMode getIndexingMode() { return IndexingMode.STREAMING; } - public final String getStorageRouteSpec() { return storageRouteSpec; } + public final String getStorageRouteSpec() { return storageRouteSpec; } public String getDocTypeName() { return docTypeName; } - public DerivedConfiguration derived() { return derivedConfig; } - - @Override - public void getConfig(DocumentdbInfoConfig.Builder builder) { - DocumentdbInfoConfig.Documentdb.Builder docDb = new DocumentdbInfoConfig.Documentdb.Builder(); - docDb.name(derivedConfig.getSchema().getName()); - builder.documentdb(docDb); - } + public DerivedConfiguration derived() { return db().getDerivedConfiguration(); } @Override public void deriveFromSchemas(DeployState deployState) { @@ -82,64 +64,51 @@ public class StreamingSearchCluster extends SearchCluster implements if ( ! schema.getName().equals(docTypeName)) throw new IllegalArgumentException("Document type name '" + docTypeName + "' must be the same as the schema name '" + schema.getName() + "'"); - this.derivedConfig = new DerivedConfiguration(schema, deployState, true); - this.derivedDb = new DocumentDatabase(this, docTypeName, this.derivedConfig); + add(new DocumentDatabase(this, docTypeName, new DerivedConfiguration(schema, deployState, true))); } - @Override - public List<DocumentDatabase> getDocumentDbs() { - if (derivedDb == null) { - throw new IllegalArgumentException("missing derivedConfig"); - } - return List.of(derivedDb); + protected void fillDocumentDBConfig(DocumentDatabase sdoc, ProtonConfig.Documentdb.Builder ddbB) { + super.fillDocumentDBConfig(sdoc, ddbB); + ddbB.configid(attributesConfig.getConfigId()); // Temporary until fully cleaned up } - @Override - public void getConfig(IndexInfoConfig.Builder builder) { - derivedConfig.getIndexInfo().getConfig(builder); + private DocumentDatabase db() { + return getDocumentDbs().get(0); } @Override - public void getConfig(SchemaInfoConfig.Builder builder) { - derivedConfig.getSchemaInfo().getConfig(builder); + public void getConfig(SummaryConfig.Builder builder) { + db().getConfig(builder); } @Override - public void getConfig(IlscriptsConfig.Builder builder) { - derivedConfig.getIndexingScript().getConfig(builder); - } - - public void getConfig(AttributesConfig.Builder builder) { - derivedConfig.getConfig(builder); + public void getConfig(OnnxModelsConfig.Builder builder) { + db().getConfig(builder); } @Override - public void getConfig(RankProfilesConfig.Builder builder) { - derivedConfig.getRankProfileList().getConfig(builder); + public void getConfig(RankingConstantsConfig.Builder builder) { + db().getConfig(builder); } @Override - public void getConfig(RankingConstantsConfig.Builder builder) { derivedConfig.getRankProfileList().getConfig(builder); } - - @Override - public void getConfig(RankingExpressionsConfig.Builder builder) { derivedConfig.getRankProfileList().getConfig(builder); } - - @Override - public void getConfig(OnnxModelsConfig.Builder builder) { derivedConfig.getRankProfileList().getConfig(builder); } + public void getConfig(RankingExpressionsConfig.Builder builder) { + db().getConfig(builder); + } @Override - public void getConfig(VsmsummaryConfig.Builder builder) { - derivedConfig.getVsmSummary().getConfig(builder); + public void getConfig(JuniperrcConfig.Builder builder) { + db().getConfig(builder); } - + @Override public void getConfig(VsmfieldsConfig.Builder builder) { - derivedConfig.getVsmFields().getConfig(builder); + db().getConfig(builder); } - + @Override - public void getConfig(SummaryConfig.Builder builder) { - derivedConfig.getSummaries().getConfig(builder); + public void getConfig(VsmsummaryConfig.Builder builder) { + db().getConfig(builder); } private class AttributesProducer extends AnyConfigProducer implements AttributesConfig.Producer { @@ -150,7 +119,7 @@ public class StreamingSearchCluster extends SearchCluster implements @Override public void getConfig(AttributesConfig.Builder builder) { - derivedConfig.getConfig(builder, AttributeFields.FieldSet.FAST_ACCESS); + derived().getConfig(builder, AttributeFields.FieldSet.FAST_ACCESS); } } diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/StreamingVisitor.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/StreamingVisitor.java index 7210807adec..658eea0b526 100644 --- a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/StreamingVisitor.java +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/StreamingVisitor.java @@ -178,7 +178,7 @@ class StreamingVisitor extends VisitorDataHandler implements Visitor { } List<Grouping> groupingList = GroupingExecutor.getGroupingList(query); - if (groupingList.size() > 0){ + if ( ! groupingList.isEmpty()){ BufferSerializer gbuf = new BufferSerializer(new GrowableByteBuffer()); gbuf.putInt(null, groupingList.size()); for(Grouping g: groupingList){ diff --git a/searchcore/src/vespa/searchcore/proton/matching/sessionmanager.cpp b/searchcore/src/vespa/searchcore/proton/matching/sessionmanager.cpp index b9820e52ef9..367e9601fe5 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/sessionmanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/sessionmanager.cpp @@ -23,7 +23,7 @@ protected: mutable std::mutex _lock; void entryDropped(const SessionId &id); - ~SessionCacheBase() {} + ~SessionCacheBase() = default; }; template <typename T> @@ -31,7 +31,7 @@ struct SessionCache : SessionCacheBase { using EntryUP = typename T::UP; vespalib::lrucache_map<vespalib::LruParam<SessionId, EntryUP> > _cache; - SessionCache(uint32_t max_size) : _cache(max_size) {} + explicit SessionCache(uint32_t max_size) : _cache(max_size) {} void insert(EntryUP session) { std::lock_guard<std::mutex> guard(_lock); @@ -115,7 +115,7 @@ struct SessionMap : SessionCacheBase { toDestruct.back().swap(session); } } - for (auto key : keys) { + for (const auto & key : keys) { _map.erase(key); _stats.numTimedout++; } diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 5559e194c5e..2129cb6805a 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -380,6 +380,31 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too) } } +TEST(StreamingQueryTest, negative_integer_is_rewritten_if_allowed_for_string_field) +{ + const char term[7] = {TERM_UNIQ, 3, 1, 'c', 2, '-', '5'}; + vespalib::stringref stackDump(term, sizeof(term)); + EXPECT_EQ(7u, stackDump.size()); + AllowRewrite empty("c"); + const Query q(empty, stackDump); + EXPECT_TRUE(q.valid()); + auto& root = q.getRoot(); + auto& equiv = dynamic_cast<const EquivQueryNode &>(root); + EXPECT_EQ(2u, equiv.get_terms().size()); + { + auto& qt = *equiv.get_terms()[0]; + EXPECT_EQ("c", qt.index()); + EXPECT_EQ(vespalib::stringref("-5"), qt.getTerm()); + EXPECT_EQ(3u, qt.uniqueId()); + } + { + auto& qt = *equiv.get_terms()[1]; + EXPECT_EQ("c", qt.index()); + EXPECT_EQ(vespalib::stringref("5"), qt.getTerm()); + EXPECT_EQ(0u, qt.uniqueId()); + } +} + TEST(StreamingQueryTest, test_get_query_parts) { QueryBuilder<SimpleQueryNodeTypes> builder; diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index 65fa55174d4..5134ca575ca 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -578,7 +578,7 @@ struct BenchmarkCase { {} vespalib::string to_string() const { return "op=" + ::to_string(query_op) + ", cfg=" + field_cfg.to_string() + - ", strict_context=" + ::to_string(strict_context) + ", force_strict=" + ::to_string(force_strict); + ", strict_context=" + ::to_string(strict_context) + (force_strict ? (", force_strict=" + ::to_string(force_strict)) : ""); } }; @@ -622,6 +622,7 @@ public: } } const std::vector<BenchmarkCaseSummary>& cases() const { return _cases; } + bool empty() const { return _cases.empty(); } }; void @@ -632,7 +633,6 @@ print_summary(const BenchmarkSummary& summary) std::cout << std::fixed << std::setprecision(3) << "" << std::setw(50) << std::left << c.bcase.to_string() << ": " << "ms_per_act_cost=" << std::setw(7) << std::right << c.result.ms_per_actual_cost_stats().to_string() - << ", ms_per_alt_cost=" << std::setw(7) << std::right << c.result.ms_per_alt_cost_stats().to_string() << ", scaled_cost=" << std::setw(7) << c.scaled_cost << std::endl; } } @@ -754,9 +754,8 @@ run_benchmark_case(const BenchmarkCaseSetup& setup) } void -run_benchmarks(const BenchmarkSetup& setup) +run_benchmarks(const BenchmarkSetup& setup, BenchmarkSummary& summary) { - BenchmarkSummary summary; for (const auto& field_cfg : setup.field_cfgs) { for (auto query_op : setup.query_ops) { for (bool strict : setup.strictness) { @@ -767,6 +766,13 @@ run_benchmarks(const BenchmarkSetup& setup) } } } +} + +void +run_benchmarks(const BenchmarkSetup& setup) +{ + BenchmarkSummary summary; + run_benchmarks(setup, summary); summary.calc_scaled_costs(); print_summary(summary); } @@ -788,7 +794,8 @@ make_index_config() } constexpr uint32_t num_docs = 10'000'000; -const std::vector<double> base_hit_ratios = {0.001, 0.01, 0.1, 0.5}; +const std::vector<double> base_hit_ratios = {0.0001, 0.001, 0.01, 0.1, 0.5, 1.0}; +const std::vector<double> filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; const auto int32 = make_attr_config(BasicType::INT32, CollectionType::SINGLE, false); const auto int32_fs = make_attr_config(BasicType::INT32, CollectionType::SINGLE, true); const auto int32_array = make_attr_config(BasicType::INT32, CollectionType::ARRAY, false); @@ -802,43 +809,42 @@ const auto str_array_fs = make_attr_config(BasicType::STRING, CollectionType::AR const auto str_wset = make_attr_config(BasicType::STRING, CollectionType::WSET, false); const auto str_index = make_index_config(); +BenchmarkSummary global_summary; + TEST(IteratorBenchmark, analyze_term_search_in_disk_index) { - const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; - BenchmarkSetup setup(num_docs, {str_index}, {QueryOperator::Term}, {true, false}, hit_ratios); - setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; - run_benchmarks(setup); + BenchmarkSetup setup(num_docs, {str_index}, {QueryOperator::Term}, {true, false}, base_hit_ratios); + setup.filter_hit_ratios = filter_hit_ratios; + setup.filter_crossover_factor = 1.0; + run_benchmarks(setup, global_summary); } -TEST(IteratorBenchmark, analyze_term_search_in_attributes_without_fast_search) +TEST(IteratorBenchmark, analyze_term_search_in_attributes_non_strict) { std::vector<FieldConfig> field_cfgs = {int32, int32_array, int32_wset, str, str_array, str_wset}; - const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; - BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {true, false}, hit_ratios); + BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {false}, base_hit_ratios); setup.default_values_per_document = 1; - setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; - run_benchmarks(setup); + setup.filter_hit_ratios = filter_hit_ratios; + setup.filter_crossover_factor = 1.0; + run_benchmarks(setup, global_summary); } -TEST(IteratorBenchmark, analyze_term_search_in_attributes_with_fast_search) +TEST(IteratorBenchmark, analyze_term_search_in_attributes_strict) { - std::vector<FieldConfig> field_cfgs = {int32_fs, int32_array_fs, str_fs, str_array_fs}; - const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; - BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {true, false}, hit_ratios); - setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; - run_benchmarks(setup); + std::vector<FieldConfig> field_cfgs = {int32, int32_array, int32_wset, str, str_array, str_wset}; + // Note: This hit ratio matches the estimate of such attributes (0.5). + BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {true}, {0.5}); + setup.default_values_per_document = 1; + run_benchmarks(setup, global_summary); } -TEST(IteratorBenchmark, analyze_term_search_in_attributes_combined) +TEST(IteratorBenchmark, analyze_term_search_in_fast_search_attributes) { - // Note: all fast-search attributes has similar performance, so only needed to include one. - std::vector<FieldConfig> field_cfgs = {int32_fs, int32, int32_array, int32_wset, str, str_array, str_wset}; - const std::vector<double> hit_ratios = {0.001, 0.01, 0.1, 0.5, 1.0}; - BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {true, false}, hit_ratios); - setup.default_values_per_document = 1; - setup.filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; + std::vector<FieldConfig> field_cfgs = {int32_fs, int32_array_fs, str_fs, str_array_fs}; + BenchmarkSetup setup(num_docs, field_cfgs, {QueryOperator::Term}, {true, false}, base_hit_ratios); + setup.filter_hit_ratios = filter_hit_ratios; setup.filter_crossover_factor = 1.0; - run_benchmarks(setup); + run_benchmarks(setup, global_summary); } TEST(IteratorBenchmark, analyze_complex_leaf_operators) @@ -868,4 +874,12 @@ TEST(IteratorBenchmark, or_benchmark) run_benchmarks(setup); } -GTEST_MAIN_RUN_ALL_TESTS() +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + int res = RUN_ALL_TESTS(); + if (!global_summary.empty()) { + global_summary.calc_scaled_costs(); + print_summary(global_summary); + } + return res; +} diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index a5ca37906ba..2129ac40724 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -11,6 +11,7 @@ #include "multi_term_or_filter_search.h" #include "predicate_attribute.h" #include <vespa/eval/eval/value.h> +#include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/common/location.h> #include <vespa/searchlib/common/locationiterators.h> #include <vespa/searchlib/query/query_term_decoder.h> @@ -46,10 +47,11 @@ LOG_SETUP(".searchlib.attribute.attribute_blueprint_factory"); using search::attribute::BasicType; -using search::attribute::SearchContextParams; using search::attribute::CollectionType; +using search::attribute::Config; using search::attribute::IAttributeVector; using search::attribute::ISearchContext; +using search::attribute::SearchContextParams; using search::fef::TermFieldMatchData; using search::fef::TermFieldMatchDataArray; using search::fef::TermFieldMatchDataPosition; @@ -87,6 +89,10 @@ using search::queryeval::SearchIterator; using search::queryeval::Searchable; using search::queryeval::SimpleLeafBlueprint; using search::queryeval::WeightedSetTermBlueprint; +using search::queryeval::flow::btree_cost; +using search::queryeval::flow::btree_strict_cost; +using search::queryeval::flow::lookup_cost; +using search::queryeval::flow::lookup_strict_cost; using search::tensor::DenseTensorAttribute; using search::tensor::ITensorAttribute; using vespalib::Issue; @@ -115,6 +121,19 @@ private: }; //----------------------------------------------------------------------------- +size_t +get_num_indirections(const BasicType& basic_type, const CollectionType& col_type) +{ + size_t res = 0; + if (basic_type == BasicType::STRING) { + res += 1; + } + if (col_type != CollectionType::SINGLE) { + res += 1; + } + return res; +} + /** * Blueprint for creating regular, stack-based attribute iterators. **/ @@ -141,11 +160,12 @@ public: if (_hit_estimate.is_unknown()) { // E.g. attributes without fast-search are not able to provide a hit estimate. // In this case we just assume matching half of the document corpus. - // In addition, we are not able to skip documents efficiently when being strict. - return {0.5, 1.0, 1.0}; + // In addition, matching is lookup based, and we are not able to skip documents efficiently when being strict. + size_t indirections = get_num_indirections(_attr.getBasicType(), _attr.getCollectionType()); + return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)}; } else { double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit); - return {rel_est, 1.0, rel_est}; + return {rel_est, btree_cost(), btree_strict_cost(rel_est)}; } } @@ -480,9 +500,12 @@ public: double estimate(const IDirectPostingStore::LookupResult &term) const noexcept { return abs_to_rel_est(term.posting_size, docid_limit); } - double cost(const IDirectPostingStore::LookupResult &) const noexcept { return 1.0; } + double cost(const IDirectPostingStore::LookupResult &) const noexcept { + return btree_cost(); + } double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept { - return abs_to_rel_est(term.posting_size, docid_limit); + double rel_est = abs_to_rel_est(term.posting_size, docid_limit); + return btree_strict_cost(rel_est); } }; double child_est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms); diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h index 076c375091a..5aafe4af72b 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h @@ -80,9 +80,12 @@ public: double estimate(const IDirectPostingStore::LookupResult &term) const noexcept { return abs_to_rel_est(term.posting_size, docid_limit); } - double cost(const IDirectPostingStore::LookupResult &) const noexcept { return 1.0; } + double cost(const IDirectPostingStore::LookupResult &) const noexcept { + return search::queryeval::flow::btree_cost(); + } double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept { - return abs_to_rel_est(term.posting_size, docid_limit); + double rel_est = abs_to_rel_est(term.posting_size, docid_limit); + return search::queryeval::flow::btree_strict_cost(rel_est); } }; double est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms); diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp index 97c8dd391ba..99be653a398 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp @@ -4,6 +4,7 @@ #include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> #include <vespa/searchlib/queryeval/filter_wrapper.h> +#include <vespa/searchlib/queryeval/flow_tuning.h> #include <vespa/searchlib/queryeval/intermediate_blueprints.h> #include <vespa/vespalib/objects/visit.h> #include <vespa/vespalib/util/stringfmt.h> @@ -14,12 +15,14 @@ LOG_SETUP(".diskindex.disktermblueprint"); using search::BitVectorIterator; using search::fef::TermFieldMatchDataArray; using search::index::Schema; +using search::queryeval::Blueprint; using search::queryeval::BooleanMatchIteratorWrapper; using search::queryeval::FieldSpec; using search::queryeval::FieldSpecBaseList; -using search::queryeval::SearchIterator; using search::queryeval::LeafBlueprint; -using search::queryeval::Blueprint; +using search::queryeval::SearchIterator; +using search::queryeval::flow::disk_index_cost; +using search::queryeval::flow::disk_index_strict_cost; namespace search::diskindex { @@ -68,7 +71,8 @@ DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo) queryeval::FlowStats DiskTermBlueprint::calculate_flow_stats(uint32_t docid_limit) const { - return default_flow_stats(docid_limit, _lookupRes->counts._numDocs, 0); + double rel_est = abs_to_rel_est(_lookupRes->counts._numDocs, docid_limit); + return {rel_est, disk_index_cost(), disk_index_strict_cost(rel_est)}; } SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index db3a0019d94..e2bb5e76751 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -4,9 +4,10 @@ #include "ordered_field_index_inserter.h" #include "posting_iterator.h" #include <vespa/searchlib/bitcompression/posocccompression.h> -#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> #include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> #include <vespa/searchlib/queryeval/filter_wrapper.h> +#include <vespa/searchlib/queryeval/flow_tuning.h> #include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/vespalib/btree/btree.hpp> #include <vespa/vespalib/btree/btreeiterator.hpp> @@ -30,6 +31,8 @@ using search::queryeval::BooleanMatchIteratorWrapper; using search::queryeval::FieldSpecBase; using search::queryeval::SearchIterator; using search::queryeval::SimpleLeafBlueprint; +using search::queryeval::flow::btree_cost; +using search::queryeval::flow::btree_strict_cost; using vespalib::GenerationHandler; namespace search::memoryindex { @@ -257,7 +260,8 @@ public: } queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override { - return default_flow_stats(docid_limit, _posting_itr.size(), 0); + double rel_est = abs_to_rel_est(_posting_itr.size(), docid_limit); + return {rel_est, btree_cost(), btree_strict_cost(rel_est)}; } SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray& tfmda, bool) const override { diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 16406bffd3d..55301132a18 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -14,10 +14,13 @@ #include <vespa/searchlib/query/streaming/wand_term.h> #include <vespa/searchlib/query/streaming/weighted_set_term.h> #include <vespa/searchlib/query/tree/term_vector.h> +#include <vespa/searchlib/queryeval/split_float.h> #include <charconv> #include <vespa/log/log.h> LOG_SETUP(".vsm.querynode"); +using search::queryeval::SplitFloat; + namespace search::streaming { namespace { @@ -29,7 +32,7 @@ bool disableRewrite(const QueryNode * qn) { } bool possibleFloat(const QueryTerm & qt, const QueryTerm::string & term) { - return !qt.encoding().isBase10Integer() && qt.encoding().isFloat() && (term.find('.') != QueryTerm::string::npos); + return qt.encoding().isFloat() && ((term.find('.') != QueryTerm::string::npos) || (term.find('-') != QueryTerm::string::npos)); } } @@ -139,14 +142,32 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor qt->setUniqueId(queryRep.getUniqueId()); qt->setRanked( ! queryRep.hasNoRankFlag()); if (allowRewrite && possibleFloat(*qt, ssTerm) && factory.allow_float_terms_rewrite(ssIndex)) { - auto phrase = std::make_unique<PhraseQueryNode>(factory.create(), ssIndex, arity); - auto dotPos = ssTerm.find('.'); - phrase->add_term(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD, normalize_mode)); - phrase->add_term(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(dotPos + 1), ssIndex, TermType::WORD, normalize_mode)); - auto eqn = std::make_unique<EquivQueryNode>(factory.create(), 2); - eqn->add_term(std::move(qt)); - eqn->add_term(std::move(phrase)); - qn = std::move(eqn); + /* + * Tokenize number term and make add alternative + * phrase or term when searching for numbers in string + * fields. See + * CreateBlueprintVisitorHelper::handleNumberTermAsText() + * for similar code used for indexed search. + */ + SplitFloat splitter(ssTerm); + std::unique_ptr<QueryTerm> alt_qt; + if (splitter.parts() > 1) { + auto phrase = std::make_unique<PhraseQueryNode>(factory.create(), ssIndex, splitter.parts()); + for (size_t i = 0; i < splitter.parts(); ++i) { + phrase->add_term(std::make_unique<QueryTerm>(factory.create(), splitter.getPart(i), ssIndex, TermType::WORD, normalize_mode)); + } + alt_qt = std::move(phrase); + } else if (splitter.parts() == 1 && ssTerm != splitter.getPart(0)) { + alt_qt = std::make_unique<QueryTerm>(factory.create(), splitter.getPart(0), ssIndex, TermType::WORD, normalize_mode); + } + if (alt_qt) { + auto eqn = std::make_unique<EquivQueryNode>(factory.create(), 2); + eqn->add_term(std::move(qt)); + eqn->add_term(std::move(alt_qt)); + qn = std::move(eqn); + } else { + qn = std::move(qt); + } } else { qn = std::move(qt); } diff --git a/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp index 933aa528163..fdaa05ab005 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp @@ -124,27 +124,27 @@ SearchEnvironment::~SearchEnvironment() } SearchEnvironment::Env & -SearchEnvironment::getEnv(const vespalib::string & searchCluster) +SearchEnvironment::getEnv(const vespalib::string & config_id) { - config::ConfigUri searchClusterUri(_configUri.createWithNewId(searchCluster)); + config::ConfigUri configUri(_configUri.createWithNewId(config_id)); if (_localEnvMap == nullptr) { EnvMapUP envMap = std::make_unique<EnvMap>(); _localEnvMap = envMap.get(); std::lock_guard guard(_lock); _threadLocals.emplace_back(std::move(envMap)); } - auto localFound = _localEnvMap->find(searchCluster); + auto localFound = _localEnvMap->find(config_id); if (localFound == _localEnvMap->end()) { std::lock_guard guard(_lock); - auto found = _envMap.find(searchCluster); + auto found = _envMap.find(config_id); if (found == _envMap.end()) { - LOG(debug, "Init VSMAdapter with config id = '%s'", searchCluster.c_str()); - Env::SP env = std::make_shared<Env>(searchClusterUri, *_wordFolder, _transport, _file_distributor_connection_spec); - _envMap[searchCluster] = std::move(env); - found = _envMap.find(searchCluster); + LOG(debug, "Init VSMAdapter with config id = '%s'", config_id.c_str()); + Env::SP env = std::make_shared<Env>(configUri, *_wordFolder, _transport, _file_distributor_connection_spec); + _envMap[config_id] = std::move(env); + found = _envMap.find(config_id); } _localEnvMap->insert(*found); - localFound = _localEnvMap->find(searchCluster); + localFound = _localEnvMap->find(config_id); } return *localFound->second; } @@ -156,9 +156,9 @@ SearchEnvironment::clear_thread_local_env_map() } std::shared_ptr<const SearchEnvironmentSnapshot> -SearchEnvironment::get_snapshot(const vespalib::string& search_cluster) +SearchEnvironment::get_snapshot(const vespalib::string& config_id) { - return getEnv(search_cluster).get_snapshot(); + return getEnv(config_id).get_snapshot(); } std::optional<int64_t> diff --git a/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h index 9ea4867272e..4cdc38a286b 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h @@ -75,12 +75,12 @@ private: FNET_Transport* const _transport; vespalib::string _file_distributor_connection_spec; - Env & getEnv(const vespalib::string & searchcluster); + Env & getEnv(const vespalib::string & config_id); public: SearchEnvironment(const config::ConfigUri & configUri, FNET_Transport* transport, const vespalib::string& file_distributor_connection_spec); ~SearchEnvironment(); - std::shared_ptr<const SearchEnvironmentSnapshot> get_snapshot(const vespalib::string& search_cluster); + std::shared_ptr<const SearchEnvironmentSnapshot> get_snapshot(const vespalib::string& config_id); std::optional<int64_t> get_oldest_config_generation(); // Should only be used by unit tests to simulate that the calling thread is finished. void clear_thread_local_env_map(); diff --git a/vespalib/src/tests/testapp-state/expect.txt b/vespalib/src/tests/testapp-state/expect.txt index 0cd267e8245..936ab27edf8 100644 --- a/vespalib/src/tests/testapp-state/expect.txt +++ b/vespalib/src/tests/testapp-state/expect.txt @@ -1,22 +1,22 @@ - STATE[0]: 'foo' (statetest.cpp:25) - STATE[1]: 'bar' (statetest.cpp:28) - STATE[0]: 'foo' (statetest.cpp:25) - STATE[2]: 'baz' (statetest.cpp:31) - STATE[1]: 'bar' (statetest.cpp:28) - STATE[0]: 'foo' (statetest.cpp:25) - STATE[1]: 'bar' (statetest.cpp:28) - STATE[0]: 'foo' (statetest.cpp:25) - STATE[0]: 'foo' (statetest.cpp:25) - STATE[0]: 'testSomething()' (statetest.cpp:41) - STATE[1]: 'testInner()' (statetest.cpp:12) - STATE[0]: 'testSomething()' (statetest.cpp:41) - STATE[0]: 'testSomething()' (statetest.cpp:41) - STATE[0]: 'something else' (statetest.cpp:44) - STATE[1]: 'testSomethingElse()' (statetest.cpp:46) - STATE[0]: 'something else' (statetest.cpp:44) - STATE[2]: 'testInner()' (statetest.cpp:18) - STATE[1]: 'testSomethingElse()' (statetest.cpp:46) - STATE[0]: 'something else' (statetest.cpp:44) - STATE[1]: 'testSomethingElse()' (statetest.cpp:46) - STATE[0]: 'something else' (statetest.cpp:44) - STATE[0]: 'something else' (statetest.cpp:44) + STATE[0]: 'foo' statetest.cpp:25 + STATE[1]: 'bar' statetest.cpp:28 + STATE[0]: 'foo' statetest.cpp:25 + STATE[2]: 'baz' statetest.cpp:31 + STATE[1]: 'bar' statetest.cpp:28 + STATE[0]: 'foo' statetest.cpp:25 + STATE[1]: 'bar' statetest.cpp:28 + STATE[0]: 'foo' statetest.cpp:25 + STATE[0]: 'foo' statetest.cpp:25 + STATE[0]: 'testSomething()' statetest.cpp:41 + STATE[1]: 'testInner()' statetest.cpp:12 + STATE[0]: 'testSomething()' statetest.cpp:41 + STATE[0]: 'testSomething()' statetest.cpp:41 + STATE[0]: 'something else' statetest.cpp:44 + STATE[1]: 'testSomethingElse()' statetest.cpp:46 + STATE[0]: 'something else' statetest.cpp:44 + STATE[2]: 'testInner()' statetest.cpp:18 + STATE[1]: 'testSomethingElse()' statetest.cpp:46 + STATE[0]: 'something else' statetest.cpp:44 + STATE[1]: 'testSomethingElse()' statetest.cpp:46 + STATE[0]: 'something else' statetest.cpp:44 + STATE[0]: 'something else' statetest.cpp:44 diff --git a/vespalib/src/tests/testkit-subset/out.ref.2.txt b/vespalib/src/tests/testkit-subset/out.ref.2.txt index b08880669f7..44187a4b36c 100644 --- a/vespalib/src/tests/testkit-subset/out.ref.2.txt +++ b/vespalib/src/tests/testkit-subset/out.ref.2.txt @@ -1,8 +1,8 @@ testkit-subset_test.cpp: info: running test suite 'testkit-subset_test.cpp' testkit-subset_test.cpp: info: only running tests matching 'pass' -testkit-subset_test.cpp: info: trace: thread '0(1)' (testkit-subset_test.cpp:5) +testkit-subset_test.cpp: info: trace: thread '0(1)' testkit-subset_test.cpp:5 testkit-subset_test.cpp: info: status_for_test 'will pass main': PASS -testkit-subset_test.cpp: info: trace: thread '0(1)' (testkit-subset_extra.cpp:5) +testkit-subset_test.cpp: info: trace: thread '0(1)' testkit-subset_extra.cpp:5 testkit-subset_test.cpp: info: status_for_test 'will pass extra': PASS testkit-subset_test.cpp: info: test summary --- 2 test(s) passed --- 0 test(s) failed testkit-subset_test.cpp: info: test summary --- 2 test(s) skipped @@ -11,7 +11,7 @@ testkit-subset_test.cpp: info: summary --- 2 check(s) passed --- 0 check(s) fai testkit-subset_test.cpp: info: CONCLUSION: PASS testkit-subset_test.cpp: info: running test suite 'testkit-subset_test.cpp' testkit-subset_test.cpp: info: only running tests matching 'extra\.cpp:.*pass.*' -testkit-subset_test.cpp: info: trace: thread '0(1)' (testkit-subset_extra.cpp:5) +testkit-subset_test.cpp: info: trace: thread '0(1)' testkit-subset_extra.cpp:5 testkit-subset_test.cpp: info: status_for_test 'will pass extra': PASS testkit-subset_test.cpp: info: test summary --- 1 test(s) passed --- 0 test(s) failed testkit-subset_test.cpp: info: test summary --- 3 test(s) skipped diff --git a/vespalib/src/tests/testkit-subset/out.ref.txt b/vespalib/src/tests/testkit-subset/out.ref.txt index 18dd7ea756d..7f3aa10730b 100644 --- a/vespalib/src/tests/testkit-subset/out.ref.txt +++ b/vespalib/src/tests/testkit-subset/out.ref.txt @@ -1,8 +1,8 @@ testkit-subset_test.cpp: info: running test suite 'testkit-subset_test.cpp' testkit-subset_test.cpp: info: only running tests matching 'pass' -testkit-subset_test.cpp: info: trace: thread '0(1)' (testkit-subset_extra.cpp:5) +testkit-subset_test.cpp: info: trace: thread '0(1)' testkit-subset_extra.cpp:5 testkit-subset_test.cpp: info: status_for_test 'will pass extra': PASS -testkit-subset_test.cpp: info: trace: thread '0(1)' (testkit-subset_test.cpp:5) +testkit-subset_test.cpp: info: trace: thread '0(1)' testkit-subset_test.cpp:5 testkit-subset_test.cpp: info: status_for_test 'will pass main': PASS testkit-subset_test.cpp: info: test summary --- 2 test(s) passed --- 0 test(s) failed testkit-subset_test.cpp: info: test summary --- 2 test(s) skipped @@ -11,7 +11,7 @@ testkit-subset_test.cpp: info: summary --- 2 check(s) passed --- 0 check(s) fai testkit-subset_test.cpp: info: CONCLUSION: PASS testkit-subset_test.cpp: info: running test suite 'testkit-subset_test.cpp' testkit-subset_test.cpp: info: only running tests matching 'extra\.cpp:.*pass.*' -testkit-subset_test.cpp: info: trace: thread '0(1)' (testkit-subset_extra.cpp:5) +testkit-subset_test.cpp: info: trace: thread '0(1)' testkit-subset_extra.cpp:5 testkit-subset_test.cpp: info: status_for_test 'will pass extra': PASS testkit-subset_test.cpp: info: test summary --- 1 test(s) passed --- 0 test(s) failed testkit-subset_test.cpp: info: test summary --- 3 test(s) skipped diff --git a/vespalib/src/vespa/vespalib/testkit/test_master.cpp b/vespalib/src/vespa/vespalib/testkit/test_master.cpp index 264d4f527a5..20fa5a7e860 100644 --- a/vespalib/src/vespa/vespalib/testkit/test_master.cpp +++ b/vespalib/src/vespa/vespalib/testkit/test_master.cpp @@ -76,7 +76,7 @@ TestMaster::checkFailed(const lock_guard &guard, if (!thread.traceStack.empty()) { for (size_t i = thread.traceStack.size(); i-- > 0; ) { const TraceItem &item = thread.traceStack[i]; - fprintf(stderr, " STATE[%zu]: '%s' (%s:%d)\n", + fprintf(stderr, " STATE[%zu]: '%s' %s:%d\n", i, item.msg.c_str(), item.file.c_str(), item.line); } } @@ -95,12 +95,12 @@ TestMaster::printDiff(const lock_guard &guard, lhs.c_str(), rhs.c_str()); } else { fprintf(_state.lhsFile, - "[check failure #%zu] '%s' in thread '%s' (%s:%d)\n" + "[check failure #%zu] '%s' in thread '%s' %s:%d\n" "%s\n", _state.failCnt, text.c_str(), thread.name.c_str(), file.c_str(), line, lhs.c_str()); fprintf(_state.rhsFile, - "[check failure #%zu] '%s' in thread '%s' (%s:%d)\n" + "[check failure #%zu] '%s' in thread '%s' %s:%d\n" "%s\n", _state.failCnt, text.c_str(), thread.name.c_str(), file.c_str(), line, rhs.c_str()); @@ -328,7 +328,7 @@ TestMaster::flush(const char *file, uint32_t line) if (thread.passCnt > 0) { lock_guard guard(_lock); _state.passCnt += thread.passCnt; - fprintf(stderr, "%s: info: flushed %zu passed check(s) from thread '%s' (%s:%d)\n", + fprintf(stderr, "%s: info: flushed %zu passed check(s) from thread '%s' %s:%d\n", _name.c_str(), thread.passCnt, thread.name.c_str(), skip_path(file), line); thread.passCnt = 0; } @@ -338,7 +338,7 @@ void TestMaster::trace(const char *file, uint32_t line) { ThreadState &thread = threadState(); - fprintf(stderr, "%s: info: trace: thread '%s' (%s:%d)\n", + fprintf(stderr, "%s: info: trace: thread '%s' %s:%d\n", _name.c_str(), thread.name.c_str(), skip_path(file), line); } |