diff options
125 files changed, 1058 insertions, 508 deletions
diff --git a/config-model-api/abi-spec.json b/config-model-api/abi-spec.json index ccd256d0c88..21374061bfa 100644 --- a/config-model-api/abi-spec.json +++ b/config-model-api/abi-spec.json @@ -1290,9 +1290,6 @@ "public long mergingMaxMemoryUsagePerNode()", "public boolean usePerDocumentThrottledDeleteBucket()", "public boolean alwaysMarkPhraseExpensive()", - "public boolean createPostinglistWhenNonStrict()", - "public boolean useEstimateForFetchPostings()", - "public boolean useThreadBundleForFetchPostings()", "public boolean restartOnDeployWhenOnnxModelChanges()", "public boolean sortBlueprintsByCost()" ], @@ -1374,6 +1371,7 @@ "public abstract com.yahoo.config.application.api.FileRegistry getFileRegistry()", "public abstract java.util.concurrent.ExecutorService getExecutor()", "public java.util.Optional reindexing()", + "public java.util.Set restartingClusters()", "public abstract com.yahoo.config.model.api.ModelContext$Properties properties()", "public java.util.Optional appDir()", "public abstract com.yahoo.config.model.api.OnnxModelCost onnxModelCost()", diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index 9b75f4bcdda..eb5942bd49c 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -43,6 +43,7 @@ public interface ModelContext { FileRegistry getFileRegistry(); ExecutorService getExecutor(); default Optional<? extends Reindexing> reindexing() { return Optional.empty(); } + default Set<ClusterSpec.Id> restartingClusters() { return Set.of(); } // TODO: Remove after 8.290 is gone. Properties properties(); default Optional<File> appDir() { return Optional.empty(); } OnnxModelCost onnxModelCost(); @@ -108,15 +109,12 @@ public interface ModelContext { @ModelFeatureFlag(owners = {"arnej"}) default String logFileCompressionAlgorithm(String defVal) { return defVal; } @ModelFeatureFlag(owners = {"baldersheim"}, comment = "Select summary decode type") default String summaryDecodePolicy() { return "eager"; } @ModelFeatureFlag(owners = {"vekterli"}) default int contentLayerMetadataFeatureLevel() { return 0; } - @ModelFeatureFlag(owners = {"bjorncs"}) default boolean dynamicHeapSize() { return false; } + @ModelFeatureFlag(owners = {"bjorncs"}, removeAfter = "8.289") default boolean dynamicHeapSize() { return true; } @ModelFeatureFlag(owners = {"hmusum"}) default String unknownConfigDefinition() { return "warn"; } @ModelFeatureFlag(owners = {"hmusum"}) default int searchHandlerThreadpool() { return 2; } @ModelFeatureFlag(owners = {"vekterli"}) default long mergingMaxMemoryUsagePerNode() { return -1; } @ModelFeatureFlag(owners = {"vekterli"}) default boolean usePerDocumentThrottledDeleteBucket() { return false; } @ModelFeatureFlag(owners = {"baldersheim"}) default boolean alwaysMarkPhraseExpensive() { return false; } - @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter = "8.278") default boolean createPostinglistWhenNonStrict() { return true; } - @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter = "8.278") default boolean useEstimateForFetchPostings() { return true; } - @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter = "8.278") default boolean useThreadBundleForFetchPostings() { return true; } @ModelFeatureFlag(owners = {"hmusum"}) default boolean restartOnDeployWhenOnnxModelChanges() { return false; } @ModelFeatureFlag(owners = {"baldersheim"}) default boolean sortBlueprintsByCost() { return false; } } diff --git a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java index c8f088509c5..befe57a97e4 100644 --- a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java +++ b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java @@ -13,7 +13,6 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; -import com.yahoo.config.provision.NodeResources.DiskSpeed; import com.yahoo.config.provision.ProvisionLogger; import java.util.ArrayList; @@ -310,9 +309,7 @@ public class InMemoryProvisioner implements HostProvisioner { if (a.memoryGb() < b.memoryGb()) return -1; if (a.diskGb() > b.diskGb()) return 1; if (a.diskGb() < b.diskGb()) return -1; - if (a.vcpu() > b.vcpu()) return 1; - if (a.vcpu() < b.vcpu()) return -1; - return 0; + return Double.compare(a.vcpu(), b.vcpu()); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java index 482c4477cdc..9cf5fe84c21 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java @@ -18,7 +18,6 @@ public class JvmHeapSizeValidator implements Validator { @Override public void validate(Context context) { - if (!context.deployState().featureFlags().dynamicHeapSize()) return; if (!context.deployState().isHostedTenantApplication(context.model().getAdmin().getApplicationType())) return; context.model().getContainerClusters().forEach((clusterId, appCluster) -> { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java index 9896ca95e97..ed7646b3066 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java @@ -101,7 +101,6 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private int zookeeperSessionTimeoutSeconds = 30; private final int transport_events_before_wakeup; private final int transport_connections_per_target; - private final boolean dynamicHeapSize; /** The heap size % of total memory available to the JVM process. */ private final int heapSizePercentageOfAvailableMemory; @@ -115,7 +114,6 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat public ApplicationContainerCluster(TreeConfigProducer<?> parent, String configSubId, String clusterId, DeployState deployState) { super(parent, configSubId, clusterId, deployState, true, 10); this.tlsClientAuthority = deployState.tlsClientAuthority(); - dynamicHeapSize = deployState.featureFlags().dynamicHeapSize(); previousHosts = Collections.unmodifiableSet(deployState.getPreviousModel().stream() .map(Model::allocatedHosts) .map(AllocatedHosts::getHosts) @@ -215,10 +213,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat if (getContainers().isEmpty()) return Optional.of(JvmMemoryPercentage.of(availableMemoryPercentage)); // Node memory is not known // Node memory is known so convert available memory percentage to node memory percentage - double totalMemory = dynamicHeapSize - ? getContainers().stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow() - : getContainers().get(0).getHostResource().realResources().memoryGb(); - double jvmHeapDeductionGb = dynamicHeapSize ? onnxModelCostCalculator.aggregatedModelCostInBytes() / (1024D * 1024 * 1024) : 0; + double totalMemory = getContainers().stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow(); + double jvmHeapDeductionGb = onnxModelCostCalculator.aggregatedModelCostInBytes() / (1024D * 1024 * 1024); double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - jvmHeapDeductionGb); int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage); logger.log(FINE, () -> "cluster id '%s': memoryPercentage=%d, availableMemory=%f, totalMemory=%f, availableMemoryPercentage=%d, jvmHeapDeductionGb=%f" diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java b/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java index 4e56d1d1d5a..1254f8e110a 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java @@ -27,7 +27,6 @@ import com.yahoo.vespa.model.container.docproc.ContainerDocproc; import com.yahoo.vespa.model.container.docproc.DocprocChain; import com.yahoo.vespa.model.container.docproc.DocprocChains; import com.yahoo.vespa.model.content.cluster.ContentCluster; -import com.yahoo.vespa.model.search.IndexedSearchCluster; import com.yahoo.vespa.model.search.IndexingDocproc; import com.yahoo.vespa.model.search.IndexingDocprocChain; import com.yahoo.vespa.model.search.SearchCluster; @@ -215,25 +214,21 @@ public class Content extends ConfigModel { private void buildIndexingClusters(Content content, ConfigModelContext modelContext, ApplicationConfigProducerRoot root) { var search = content.getCluster().getSearch(); - if (!search.getIndexingDocproc().isPresent()) { - return; - } - var indexingDocproc = search.getIndexingDocproc().get(); + var indexingDocproc = search.getIndexingDocproc(); if (indexingDocproc.hasExplicitCluster()) { setExistingIndexingCluster(content, indexingDocproc, content.containers); } else { - if (search.hasIndexedCluster()) { - setContainerAsIndexingCluster(search.getIndexed(), content, modelContext, root); - } + setContainerAsIndexingCluster(search.getSearchNodes(), indexingDocproc, content, modelContext, root); } } - private void setContainerAsIndexingCluster(IndexedSearchCluster indexedSearchCluster, + private void setContainerAsIndexingCluster(List<SearchNode> cluster, + IndexingDocproc indexingDocproc, Content content, ConfigModelContext modelContext, ApplicationConfigProducerRoot root) { if (content.containers.isEmpty()) { - createImplicitIndexingCluster(indexedSearchCluster, content, modelContext, root); + createImplicitIndexingCluster(cluster, indexingDocproc, content, modelContext, root); } else { ContainerCluster<?> targetCluster = getContainerWithDocproc(content.containers); if (targetCluster == null) @@ -242,7 +237,6 @@ public class Content extends ConfigModel { targetCluster = content.containers.iterator().next().getCluster(); addDocproc(targetCluster); - var indexingDocproc = indexedSearchCluster.getIndexingDocproc(); indexingDocproc.setClusterName(targetCluster.getName()); addIndexingChainsTo(targetCluster, content, indexingDocproc); } @@ -303,11 +297,12 @@ public class Content extends ConfigModel { } /** Create a new container cluster for indexing and add it to the Vespa model */ - private void createImplicitIndexingCluster(IndexedSearchCluster cluster, + private void createImplicitIndexingCluster(List<SearchNode> cluster, + IndexingDocproc indexingDocproc, Content content, ConfigModelContext modelContext, ApplicationConfigProducerRoot root) { - String indexerName = cluster.getIndexingDocproc().getClusterName(content.getCluster().getName()); + String indexerName = indexingDocproc.getClusterName(content.getCluster().getName()); TreeConfigProducer<AnyConfigProducer> parent = getDocProc(root); ApplicationContainerCluster indexingCluster = new ApplicationContainerCluster(parent, "cluster." + indexerName, indexerName, modelContext.getDeployState()); ContainerModel indexingClusterModel = new ContainerModel(modelContext.withParent(parent).withId(indexingCluster.getSubId())); @@ -323,7 +318,7 @@ public class Content extends ConfigModel { List<ApplicationContainer> nodes = new ArrayList<>(); int index = 0; Set<HostResource> processedHosts = new LinkedHashSet<>(); - for (SearchNode searchNode : cluster.getSearchNodes()) { + for (SearchNode searchNode : cluster) { HostResource host = searchNode.getHostResource(); if (!processedHosts.contains(host)) { String containerName = String.valueOf(searchNode.getDistributionKey()); @@ -340,7 +335,7 @@ public class Content extends ConfigModel { indexingCluster.addContainers(nodes); addIndexingChain(indexingCluster); - cluster.getIndexingDocproc().setChain(indexingCluster.getDocprocChains().allChains().getComponent(IndexingDocprocChain.NAME)); + indexingDocproc.setChain(indexingCluster.getDocprocChains().allChains().getComponent(IndexingDocprocChain.NAME)); } private ContainerCluster<?> getContainerWithDocproc(Collection<ContainerModel> containers) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java index 1420cd82247..0027a9ca45c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java @@ -58,7 +58,7 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer> /** The single, indexed search cluster this sets up (supporting multiple document types), or null if none */ private IndexedSearchCluster indexedCluster; - private Optional<IndexingDocproc> indexingDocproc; + private final IndexingDocproc indexingDocproc; private Redundancy redundancy; private final String clusterName; @@ -208,7 +208,7 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer> double fractionOfMemoryReserved) { super(parent, "search"); - this.indexingDocproc = Optional.empty(); + this.indexingDocproc = new IndexingDocproc(); this.clusterName = clusterName; this.documentDefinitions = documentDefinitions; this.globallyDistributedDocuments = globallyDistributedDocuments; @@ -262,10 +262,6 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer> throw new IllegalArgumentException("Duplicate indexed cluster '" + indexedCluster.getClusterName() + "'"); } indexedCluster = (IndexedSearchCluster)sc; - if (indexingDocproc.isPresent()) { - throw new IllegalArgumentException("Indexing docproc has previously been setup for streaming search"); - } - indexingDocproc = Optional.of(indexedCluster.getIndexingDocproc()); } clusters.put(sc.getClusterName(), sc); } @@ -483,12 +479,7 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer> public Map<String, SearchCluster> getClusters() { return clusters; } public IndexedSearchCluster getIndexed() { return indexedCluster; } public boolean hasIndexedCluster() { return indexedCluster != null; } - public Optional<IndexingDocproc> getIndexingDocproc() { return indexingDocproc; } - public void setupStreamingSearchIndexingDocProc() { - if (indexingDocproc.isEmpty()) { - indexingDocproc = Optional.of(new IndexingDocproc()); - } - } + public IndexingDocproc getIndexingDocproc() { return indexingDocproc; } public String getClusterName() { return clusterName; } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java index 7c48ec11729..791faa401ed 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java @@ -209,20 +209,10 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem docprocChain = docprocChain.trim(); } if (docprocCluster != null && !docprocCluster.isEmpty()) { - if (!c.getSearch().hasIndexedCluster() && c.getSearch().getIndexingDocproc().isEmpty() && - docprocChain != null && !docprocChain.isEmpty()) { - c.getSearch().setupStreamingSearchIndexingDocProc(); - } - var indexingDocproc = c.getSearch().getIndexingDocproc(); - if (indexingDocproc.isPresent()) { - indexingDocproc.get().setClusterName(docprocCluster); - } + c.getSearch().getIndexingDocproc().setClusterName(docprocCluster); } if (docprocChain != null && !docprocChain.isEmpty()) { - var indexingDocproc = c.getSearch().getIndexingDocproc(); - if (indexingDocproc.isPresent()) { - indexingDocproc.get().setChainName(docprocChain); - } + c.getSearch().getIndexingDocproc().setChainName(docprocChain); } } @@ -301,10 +291,7 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem Objects.requireNonNull(admin.getLogserver(), "logserver cannot be null"); List<HostResource> host = List.of(admin.getLogserver().getHostResource()); admin.setClusterControllers(createClusterControllers(new ClusterControllerCluster(admin, "standalone", deployState), - host, - clusterName, - true, - deployState), + host, clusterName, true, deployState), deployState); } clusterControllers = admin.getClusterControllers(); @@ -457,7 +444,6 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem @Override public void getConfig(MessagetyperouteselectorpolicyConfig.Builder builder) { - if (getSearch().getIndexingDocproc().isEmpty()) return; DocumentProtocol.getConfig(builder, getConfigId()); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java b/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java index d555d13c09c..1c8567b4079 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java @@ -110,11 +110,7 @@ public final class DocumentProtocol implements Protocol, for (ContentCluster cluster : Content.getContentClusters(repo)) { DocumentProtocolPoliciesConfig.Cluster.Builder clusterBuilder = new DocumentProtocolPoliciesConfig.Cluster.Builder(); addSelector(cluster.getConfigId(), cluster.getRoutingSelector(), clusterBuilder); - if (cluster.getSearch().getIndexingDocproc().isPresent()) - addRoutes(getDirectRouteName(cluster.getConfigId()), getIndexedRouteName(cluster.getConfigId()), clusterBuilder); - else - clusterBuilder.defaultRoute(cluster.getConfigId()); - + addRoutes(getDirectRouteName(cluster.getConfigId()), getIndexedRouteName(cluster.getConfigId()), clusterBuilder); builder.cluster(cluster.getConfigId(), clusterBuilder); } } @@ -226,18 +222,13 @@ public final class DocumentProtocol implements Protocol, private static void addContentRouting(List<ContentCluster> content, RoutingTableSpec table) { for (ContentCluster cluster : content) { RouteSpec spec = new RouteSpec(cluster.getConfigId()); - - if (cluster.getSearch().getIndexingDocproc().isPresent()) { - var indexingDocproc = cluster.getSearch().getIndexingDocproc().get(); - table.addRoute(spec.addHop("[MessageType:" + cluster.getConfigId() + "]")); - table.addRoute(new RouteSpec(getIndexedRouteName(cluster.getConfigId())) - .addHop(indexingDocproc.getServiceName()) - .addHop("[Content:cluster=" + cluster.getName() + "]")); - table.addRoute(new RouteSpec(getDirectRouteName(cluster.getConfigId())) - .addHop("[Content:cluster=" + cluster.getName() + "]")); - } else { - table.addRoute(spec.addHop("[Content:cluster=" + cluster.getName() + "]")); - } + var indexingDocproc = cluster.getSearch().getIndexingDocproc(); + table.addRoute(spec.addHop("[MessageType:" + cluster.getConfigId() + "]")); + table.addRoute(new RouteSpec(getIndexedRouteName(cluster.getConfigId())) + .addHop(indexingDocproc.getServiceName()) + .addHop("[Content:cluster=" + cluster.getName() + "]")); + table.addRoute(new RouteSpec(getDirectRouteName(cluster.getConfigId())) + .addHop("[Content:cluster=" + cluster.getName() + "]")); table.addRoute(new RouteSpec("storage/cluster." + cluster.getName()) .addHop("route:" + cluster.getConfigId())); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java index a79bc14db52..77c6f8f99c4 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java @@ -41,7 +41,6 @@ public class IndexedSearchCluster extends SearchCluster DispatchNodesConfig.Producer, ConfigInstance.Producer { - private final IndexingDocproc indexingDocproc; private Tuning tuning; private SearchCoverage searchCoverage; @@ -68,7 +67,6 @@ public class IndexedSearchCluster extends SearchCluster public IndexedSearchCluster(TreeConfigProducer<AnyConfigProducer> parent, String clusterName, int index, ModelContext.FeatureFlags featureFlags) { super(parent, clusterName, index); - indexingDocproc = new IndexingDocproc(); documentDbsConfigProducer = new MultipleDocumentDatabasesConfigProducer(this, documentDbs); defaultDispatchPolicy = DispatchTuning.Builder.toDispatchPolicy(featureFlags.queryDispatchPolicy()); dispatchWarmup = featureFlags.queryDispatchWarmup(); @@ -78,9 +76,6 @@ public class IndexedSearchCluster extends SearchCluster @Override protected IndexingMode getIndexingMode() { return IndexingMode.REALTIME; } - public IndexingDocproc getIndexingDocproc() { return indexingDocproc; } - - public void addSearcher(SearchNode searcher) { searchNodes.add(searcher); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java index 4f301f6df9a..2032720db96 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java @@ -3,6 +3,8 @@ package com.yahoo.vespa.model.search; import com.yahoo.vespa.model.container.docproc.DocprocChain; +import java.util.Objects; + /** * Utility class to track configuration for which indexing docproc to use by a search cluster. */ @@ -51,7 +53,7 @@ public class IndexingDocproc { } public String getServiceName() { - return chain.getServiceName(); + return Objects.requireNonNull(chain).getServiceName(); } /** diff --git a/config-model/src/test/derived/advanced/ilscripts.cfg b/config-model/src/test/derived/advanced/ilscripts.cfg index b78cb892501..c3cfb3774cd 100644 --- a/config-model/src/test/derived/advanced/ilscripts.cfg +++ b/config-model/src/test/derived/advanced/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "advanced" ilscript[].docfield[] "debug_src" diff --git a/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg b/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg index e7fefe0035a..bd8a0278d55 100644 --- a/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg @@ -1,3 +1,3 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsimplicitstruct" diff --git a/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg b/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg index 22c4259ddac..7622f455903 100644 --- a/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg @@ -1,3 +1,3 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsinheritance" diff --git a/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg b/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg index bca5a004d1a..e9aa34ad573 100644 --- a/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg @@ -1,3 +1,3 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsinheritance2" diff --git a/config-model/src/test/derived/annotationsreference/ilscripts.cfg b/config-model/src/test/derived/annotationsreference/ilscripts.cfg index db575631ba3..8c338f00ae1 100644 --- a/config-model/src/test/derived/annotationsreference/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsreference/ilscripts.cfg @@ -1,3 +1,3 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsreference" diff --git a/config-model/src/test/derived/annotationssimple/ilscripts.cfg b/config-model/src/test/derived/annotationssimple/ilscripts.cfg index 744ba043c63..36842dd955f 100644 --- a/config-model/src/test/derived/annotationssimple/ilscripts.cfg +++ b/config-model/src/test/derived/annotationssimple/ilscripts.cfg @@ -1,3 +1,3 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationssimple" diff --git a/config-model/src/test/derived/arrays/ilscripts.cfg b/config-model/src/test/derived/arrays/ilscripts.cfg index 0490835db33..cef6de758d0 100644 --- a/config-model/src/test/derived/arrays/ilscripts.cfg +++ b/config-model/src/test/derived/arrays/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "arrays" ilscript[].docfield[] "tags" diff --git a/config-model/src/test/derived/attributeprefetch/ilscripts.cfg b/config-model/src/test/derived/attributeprefetch/ilscripts.cfg index 771a54359b9..23f2fa610f1 100644 --- a/config-model/src/test/derived/attributeprefetch/ilscripts.cfg +++ b/config-model/src/test/derived/attributeprefetch/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "prefetch" ilscript[].docfield[] "singlebyte" diff --git a/config-model/src/test/derived/attributes/ilscripts.cfg b/config-model/src/test/derived/attributes/ilscripts.cfg index 42fda653618..3cd40dd94e2 100644 --- a/config-model/src/test/derived/attributes/ilscripts.cfg +++ b/config-model/src/test/derived/attributes/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "attributes" ilscript[].docfield[] "a1" diff --git a/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg b/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg index 04251c1a270..36f5b3d1505 100644 --- a/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg +++ b/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "test" ilscript[].docfield[] "str_1" diff --git a/config-model/src/test/derived/complex/ilscripts.cfg b/config-model/src/test/derived/complex/ilscripts.cfg index 6074333bd24..f5f884bb7f4 100644 --- a/config-model/src/test/derived/complex/ilscripts.cfg +++ b/config-model/src/test/derived/complex/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "complex" ilscript[].docfield[] "title" diff --git a/config-model/src/test/derived/emptydefault/ilscripts.cfg b/config-model/src/test/derived/emptydefault/ilscripts.cfg index b17920fcaab..fba6d2a3c25 100644 --- a/config-model/src/test/derived/emptydefault/ilscripts.cfg +++ b/config-model/src/test/derived/emptydefault/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "emptydefault" ilscript[].docfield[] "one" diff --git a/config-model/src/test/derived/exactmatch/ilscripts.cfg b/config-model/src/test/derived/exactmatch/ilscripts.cfg index c24b656c4e7..8cec774181a 100644 --- a/config-model/src/test/derived/exactmatch/ilscripts.cfg +++ b/config-model/src/test/derived/exactmatch/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "exactmatch" ilscript[].docfield[] "tag" diff --git a/config-model/src/test/derived/hnsw_index/ilscripts.cfg b/config-model/src/test/derived/hnsw_index/ilscripts.cfg index 0c8266336b1..ad4f856dfc9 100644 --- a/config-model/src/test/derived/hnsw_index/ilscripts.cfg +++ b/config-model/src/test/derived/hnsw_index/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "test" ilscript[].docfield[] "t1" diff --git a/config-model/src/test/derived/id/ilscripts.cfg b/config-model/src/test/derived/id/ilscripts.cfg index 7543c76a12a..9c952fe4acc 100644 --- a/config-model/src/test/derived/id/ilscripts.cfg +++ b/config-model/src/test/derived/id/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "id" ilscript[].docfield[] "uri" diff --git a/config-model/src/test/derived/indexswitches/ilscripts.cfg b/config-model/src/test/derived/indexswitches/ilscripts.cfg index 5cda0a9fdc7..6c95fa24767 100644 --- a/config-model/src/test/derived/indexswitches/ilscripts.cfg +++ b/config-model/src/test/derived/indexswitches/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "indexswitches" ilscript[].docfield[] "title" diff --git a/config-model/src/test/derived/inheritance/ilscripts.cfg b/config-model/src/test/derived/inheritance/ilscripts.cfg index 91410489de8..f9ada266814 100644 --- a/config-model/src/test/derived/inheritance/ilscripts.cfg +++ b/config-model/src/test/derived/inheritance/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "child" ilscript[].docfield[] "onlygrandparent" diff --git a/config-model/src/test/derived/language/ilscripts.cfg b/config-model/src/test/derived/language/ilscripts.cfg index 4858788095c..acc715a0a01 100644 --- a/config-model/src/test/derived/language/ilscripts.cfg +++ b/config-model/src/test/derived/language/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "language" ilscript[].docfield[] "language" diff --git a/config-model/src/test/derived/lowercase/ilscripts.cfg b/config-model/src/test/derived/lowercase/ilscripts.cfg index fe36dc5daef..b0463f86755 100644 --- a/config-model/src/test/derived/lowercase/ilscripts.cfg +++ b/config-model/src/test/derived/lowercase/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "lowercase" ilscript[].docfield[] "single_field_source" diff --git a/config-model/src/test/derived/multiplesummaries/ilscripts.cfg b/config-model/src/test/derived/multiplesummaries/ilscripts.cfg index 2f4ecdb1712..87b5880c0db 100644 --- a/config-model/src/test/derived/multiplesummaries/ilscripts.cfg +++ b/config-model/src/test/derived/multiplesummaries/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "multiplesummaries" ilscript[].docfield[] "a" diff --git a/config-model/src/test/derived/music/ilscripts.cfg b/config-model/src/test/derived/music/ilscripts.cfg index ba292c4013a..7ce7ea18396 100644 --- a/config-model/src/test/derived/music/ilscripts.cfg +++ b/config-model/src/test/derived/music/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "music" ilscript[].docfield[] "bgndata" diff --git a/config-model/src/test/derived/newrank/ilscripts.cfg b/config-model/src/test/derived/newrank/ilscripts.cfg index ec46d9acc68..e16de1ec47e 100644 --- a/config-model/src/test/derived/newrank/ilscripts.cfg +++ b/config-model/src/test/derived/newrank/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "newrank" ilscript[].docfield[] "bgndata" diff --git a/config-model/src/test/derived/orderilscripts/ilscripts.cfg b/config-model/src/test/derived/orderilscripts/ilscripts.cfg index c41939b34d1..32b9f4d7b05 100644 --- a/config-model/src/test/derived/orderilscripts/ilscripts.cfg +++ b/config-model/src/test/derived/orderilscripts/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "orderilscripts" ilscript[].docfield[] "foo" diff --git a/config-model/src/test/derived/position_array/ilscripts.cfg b/config-model/src/test/derived/position_array/ilscripts.cfg index f96542147b4..662b8f9380b 100644 --- a/config-model/src/test/derived/position_array/ilscripts.cfg +++ b/config-model/src/test/derived/position_array/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "position_array" ilscript[].docfield[] "pos" diff --git a/config-model/src/test/derived/position_attribute/ilscripts.cfg b/config-model/src/test/derived/position_attribute/ilscripts.cfg index c2c66db4f77..1060e9b842d 100644 --- a/config-model/src/test/derived/position_attribute/ilscripts.cfg +++ b/config-model/src/test/derived/position_attribute/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "position_attribute" ilscript[].docfield[] "pos" diff --git a/config-model/src/test/derived/position_extra/ilscripts.cfg b/config-model/src/test/derived/position_extra/ilscripts.cfg index 110b5e40644..95681924fac 100644 --- a/config-model/src/test/derived/position_extra/ilscripts.cfg +++ b/config-model/src/test/derived/position_extra/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "position_extra" ilscript[].docfield[] "pos_str" diff --git a/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg b/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg index a2ada9792a0..5af86ee0152 100644 --- a/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg +++ b/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "prefixexactattribute" ilscript[].docfield[] "indexfield0" diff --git a/config-model/src/test/derived/ranktypes/ilscripts.cfg b/config-model/src/test/derived/ranktypes/ilscripts.cfg index 9f82e75986c..f8015a84f29 100644 --- a/config-model/src/test/derived/ranktypes/ilscripts.cfg +++ b/config-model/src/test/derived/ranktypes/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "ranktypes" ilscript[].docfield[] "title" diff --git a/config-model/src/test/derived/schemainheritance/ilscripts.cfg b/config-model/src/test/derived/schemainheritance/ilscripts.cfg index 0a8e10859a0..e20fac6a5c0 100644 --- a/config-model/src/test/derived/schemainheritance/ilscripts.cfg +++ b/config-model/src/test/derived/schemainheritance/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "child" ilscript[].docfield[] "pf1" diff --git a/config-model/src/test/derived/structanyorder/ilscripts.cfg b/config-model/src/test/derived/structanyorder/ilscripts.cfg index d89bb92df53..9db111fc20e 100644 --- a/config-model/src/test/derived/structanyorder/ilscripts.cfg +++ b/config-model/src/test/derived/structanyorder/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsimplicitstruct" ilscript[].docfield[] "structfield" diff --git a/config-model/src/test/derived/tokenization/ilscripts.cfg b/config-model/src/test/derived/tokenization/ilscripts.cfg index 4414ad0f7cb..6d62a8a941d 100644 --- a/config-model/src/test/derived/tokenization/ilscripts.cfg +++ b/config-model/src/test/derived/tokenization/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "tokenization" ilscript[].docfield[] "text" diff --git a/config-model/src/test/derived/types/ilscripts.cfg b/config-model/src/test/derived/types/ilscripts.cfg index b3da5f8e727..31628ac5573 100644 --- a/config-model/src/test/derived/types/ilscripts.cfg +++ b/config-model/src/test/derived/types/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "types" ilscript[].docfield[] "abyte" diff --git a/config-model/src/test/derived/uri_array/ilscripts.cfg b/config-model/src/test/derived/uri_array/ilscripts.cfg index 90664bba50d..a2985d48743 100644 --- a/config-model/src/test/derived/uri_array/ilscripts.cfg +++ b/config-model/src/test/derived/uri_array/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "uri_array" ilscript[].docfield[] "my_uri" diff --git a/config-model/src/test/derived/uri_wset/ilscripts.cfg b/config-model/src/test/derived/uri_wset/ilscripts.cfg index 1ada759a711..be1332d44b8 100644 --- a/config-model/src/test/derived/uri_wset/ilscripts.cfg +++ b/config-model/src/test/derived/uri_wset/ilscripts.cfg @@ -1,4 +1,4 @@ -maxtermoccurrences 100 +maxtermoccurrences 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "uri_wset" ilscript[].docfield[] "my_uri" diff --git a/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java b/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java index e920672646f..c959634019d 100644 --- a/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java @@ -1,17 +1,11 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.schema; -import com.yahoo.document.Document; import com.yahoo.schema.document.Stemming; import com.yahoo.schema.parser.ParseException; import com.yahoo.schema.processing.ImportedFieldsResolver; import com.yahoo.schema.processing.OnnxModelTypeResolver; import com.yahoo.vespa.documentmodel.DocumentSummary; -import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; -import com.yahoo.vespa.indexinglanguage.expressions.Expression; -import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; -import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; -import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; import com.yahoo.vespa.model.test.utils.DeployLoggerStub; import org.junit.jupiter.api.Test; diff --git a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java index 423bc0b1798..61d636d911f 100644 --- a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java @@ -32,7 +32,8 @@ public class VsmFieldsTestCase { private static VsmfieldsConfig vsmfieldsConfig(Schema schema) { VsmFields vsmFields = new VsmFields(schema); VsmfieldsConfig.Builder cfgBuilder = new VsmfieldsConfig.Builder(); - vsmFields.getConfig(cfgBuilder);return cfgBuilder.build(); + vsmFields.getConfig(cfgBuilder); + return cfgBuilder.build(); } @Test diff --git a/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerTest.java b/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerTest.java index af825ca544a..ac431f081ed 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerTest.java @@ -6,15 +6,18 @@ import ai.vespa.metricsproxy.metric.dimensions.NodeDimensionsConfig; import ai.vespa.metricsproxy.metric.dimensions.PublicDimensions; import ai.vespa.metricsproxy.rpc.RpcConnectorConfig; import ai.vespa.metricsproxy.service.VespaServicesConfig; +import com.yahoo.config.model.api.HostInfo; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.vespa.model.VespaModel; import org.junit.jupiter.api.Test; +import java.util.Iterator; + import static com.yahoo.config.model.api.container.ContainerServiceType.METRICS_PROXY_CONTAINER; +import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.CLUSTER_CONFIG_ID; import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.CONTAINER_CONFIG_ID; import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.TestMode.hosted; import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.TestMode.self_hosted; -import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.containerConfigId; import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.getModel; import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.getNodeDimensionsConfig; import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.getRpcConnectorConfig; @@ -104,12 +107,22 @@ public class MetricsProxyContainerTest { assertEquals("", container.getPreLoad()); } + String hostedConfigIdForHost(VespaModel model, int index) { + HostInfo hostInfo = null; + for (Iterator<HostInfo> iter = model.getHosts().iterator(); iter.hasNext(); index--) { + hostInfo = iter.next(); + if (index == 0) break; + } + return CLUSTER_CONFIG_ID + "/" + hostInfo.getHostname(); + } + @Test void hosted_application_propagates_node_dimensions() { String services = hostedServicesWithContent(); - VespaModel hostedModel = getModel(services, hosted); - assertEquals(4, hostedModel.getHosts().size()); - String configId = containerConfigId(hostedModel, hosted); + VespaModel hostedModel = getModel(services, hosted, new DeployState.Builder(), 5); + assertEquals(5, hostedModel.getHosts().size()); + String configId = hostedConfigIdForHost(hostedModel, 1); + NodeDimensionsConfig config = getNodeDimensionsConfig(hostedModel, configId); assertEquals("content", config.dimensions(PublicDimensions.INTERNAL_CLUSTER_TYPE)); @@ -120,9 +133,10 @@ public class MetricsProxyContainerTest { @Test void metrics_v2_handler_is_set_up_with_node_info_config() { String services = hostedServicesWithContent(); - VespaModel hostedModel = getModel(services, hosted); + VespaModel hostedModel = getModel(services, hosted, new DeployState.Builder(), 5); - var container = (MetricsProxyContainer) hostedModel.id2producer().get(containerConfigId(hostedModel, hosted)); + String configId = hostedConfigIdForHost(hostedModel, 1); + var container = (MetricsProxyContainer) hostedModel.id2producer().get(configId); var handlers = container.getHandlers().getComponents(); assertEquals(1, handlers.size()); @@ -136,7 +150,7 @@ public class MetricsProxyContainerTest { @Test void vespa_services_config_has_all_services() { VespaServicesConfig vespaServicesConfig = getVespaServicesConfig(hostedServicesWithContent()); - assertEquals(9, vespaServicesConfig.service().size()); + assertEquals(10, vespaServicesConfig.service().size()); for (var service : vespaServicesConfig.service()) { if (service.configId().equals("admin/cluster-controllers/0")) { @@ -178,6 +192,9 @@ public class MetricsProxyContainerTest { private static String hostedServicesWithContent() { return String.join("\n", "<services>", + " <container version='1.0' id='foo'>", + " <nodes count='1'/>", + " </container>", " <content version='1.0' id='my-content'>", " <redundancy>1</redundancy>" + " <documents />", diff --git a/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyModelTester.java b/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyModelTester.java index 332426ff9a8..c356db31d15 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyModelTester.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyModelTester.java @@ -12,7 +12,6 @@ import ai.vespa.metricsproxy.service.VespaServicesConfig; import com.yahoo.config.model.api.ApplicationClusterEndpoint; import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.deploy.DeployState; -import com.yahoo.search.config.QrStartConfig; import com.yahoo.vespa.model.VespaModel; import com.yahoo.vespa.model.admin.monitoring.MetricsConsumer; import com.yahoo.vespa.model.test.VespaModelTester; @@ -47,7 +46,7 @@ class MetricsProxyModelTester { } static VespaModel getModel(String servicesXml, TestMode testMode, DeployState.Builder builder) { - return getModel(servicesXml, testMode, new DeployState.Builder(), 4); + return getModel(servicesXml, testMode, builder, 4); } static VespaModel getModel(String servicesXml, TestMode testMode, DeployState.Builder builder, int hostCount) { @@ -62,12 +61,6 @@ class MetricsProxyModelTester { return tester.createModel(servicesXml, true, builder); } - static String containerConfigId(VespaModel model, MetricsProxyModelTester.TestMode mode) { - return (mode == hosted) - ? CLUSTER_CONFIG_ID + "/" + model.getHosts().iterator().next().getHostname() - : CONTAINER_CONFIG_ID; - } - static String servicesWithAdminOnly() { return String.join("\n", "<services>", @@ -112,10 +105,6 @@ class MetricsProxyModelTester { return model.getConfig(ApplicationDimensionsConfig.class, CLUSTER_CONFIG_ID); } - static QrStartConfig getQrStartConfig(VespaModel model, String hostname) { - return model.getConfig(QrStartConfig.class, CLUSTER_CONFIG_ID + "/" + hostname); - } - static NodeDimensionsConfig getNodeDimensionsConfig(VespaModel model, String configId) { return model.getConfig(NodeDimensionsConfig.class, configId); } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ContainerInCloudValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ContainerInCloudValidatorTest.java index b6484049eaf..61cde1e1c13 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ContainerInCloudValidatorTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ContainerInCloudValidatorTest.java @@ -42,12 +42,6 @@ public class ContainerInCloudValidatorTest { String servicesXml = """ <services version='1.0'> %s - <content id='foo' version='1.0'> - <redundancy>2</redundancy> - <documents> - </documents> - <nodes count='2' /> - </content> </services> """.formatted(container); ApplicationPackage app = new MockApplicationPackage.Builder() diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ValidationTester.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ValidationTester.java index 29279635918..42ca2a8001b 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ValidationTester.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ValidationTester.java @@ -24,7 +24,6 @@ import java.time.Instant; import java.time.LocalDate; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Set; @@ -33,7 +32,6 @@ import java.util.stream.Stream; import static com.yahoo.config.model.test.MockApplicationPackage.BOOK_SCHEMA; import static com.yahoo.config.model.test.MockApplicationPackage.MUSIC_SCHEMA; -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; /** diff --git a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/ContentBuilderTest.java b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/ContentBuilderTest.java index 43ea0191ca5..f8adb18a2c3 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/ContentBuilderTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/ContentBuilderTest.java @@ -213,7 +213,7 @@ public class ContentBuilderTest extends DomBuilderTest { assertEquals(1, cluster.getRoot().hostSystem().getHosts().size()); HostResource h = cluster.getRoot().hostSystem().getHost("mockhost"); String [] expectedServices = { - "logd", "configproxy", "config-sentinel", "configserver", "logserver", + "logd", "configproxy", "config-sentinel", "configserver", "container", "logserver", "slobrok", "storagenode", "distributor", "searchnode", "transactionlogserver", CLUSTERCONTROLLER_CONTAINER.serviceName, METRICS_PROXY_CONTAINER.serviceName }; diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java index d4087c0acf9..a2f68ec8f18 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java @@ -1440,41 +1440,34 @@ public class ContentClusterTest extends ContentBaseTest { assertGroupsAllowedDown(2, 1, 2); } - private void assertIndexingDocprocEnabled(boolean indexed, boolean force, boolean expEnabled) { + private void assertIndexingDocprocEnabled(boolean indexed) { String services = "<?xml version='1.0' encoding='UTF-8' ?>" + "<services version='1.0'>" + " <container id='default' version='1.0'>" + - " <document-processing/>" + + " <search/>" + " </container>" + " <content id='search' version='1.0'>" + " <redundancy>1</redundancy>" + " <documents>" + - " <document-processing cluster='default'" + (force ? " chain='indexing'" : "") + "/>" + " <document type='type1' mode='" + (indexed ? "index" : "streaming") + "'/>" + " </documents>" + " </content>" + "</services>"; VespaModel model = createEnd2EndOneNode(new TestProperties(), services); var searchCluster = model.getContentClusters().get("search").getSearch(); - assertEquals(expEnabled, searchCluster.getIndexingDocproc().isPresent()); + assertEquals("default", searchCluster.getIndexingDocproc().getClusterName("search")); } @Test void testIndexingDocprocEnabledWhenIndexMode() { - assertIndexingDocprocEnabled(true, false, true); + assertIndexingDocprocEnabled(true); } @Test void testIndexingDocprocNotEnabledWhenStreamingMode() { - assertIndexingDocprocEnabled(false, false, false); - } - - @Test - void testIndexingDocprocEnabledWhenStreamingModeAndForced() - { - assertIndexingDocprocEnabled(false, true, true); + assertIndexingDocprocEnabled(false); } private void assertGroupsAllowedDown(int groupCount, double groupsAllowedDown, int expectedGroupsAllowedDown) { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java index e37999ded12..3defaad549c 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java @@ -65,9 +65,13 @@ public class StorageContentTest extends ContentBaseTest { DocumentProtocol protocol = (DocumentProtocol) routing.getProtocols().get(0); RoutingTableSpec spec = protocol.getRoutingTableSpec(); - assertEquals(1, spec.getNumHops()); - assertEquals("indexing", spec.getHop(0).getName()); - assertEquals("[DocumentRouteSelector]", spec.getHop(0).getSelector()); + assertEquals(3, spec.getNumHops()); + assertEquals("docproc/cluster.bar.indexing/chain.indexing", spec.getHop(0).getName()); + assertEquals("[LoadBalancer:cluster=docproc/cluster.bar.indexing;session=chain.indexing]", spec.getHop(0).getSelector()); + assertEquals("docproc/cluster.zoo.indexing/chain.indexing", spec.getHop(1).getName()); + assertEquals("[LoadBalancer:cluster=docproc/cluster.zoo.indexing;session=chain.indexing]", spec.getHop(1).getSelector()); + assertEquals("indexing", spec.getHop(2).getName()); + assertEquals("[DocumentRouteSelector]", spec.getHop(2).getSelector()); Map<String, RouteSpec> routes = new TreeMap<>(); diff --git a/configdefinitions/src/vespa/ilscripts.def b/configdefinitions/src/vespa/ilscripts.def index 16671806603..2e86ae1a02a 100644 --- a/configdefinitions/src/vespa/ilscripts.def +++ b/configdefinitions/src/vespa/ilscripts.def @@ -2,7 +2,7 @@ namespace=vespa.configdefinition ## The maximum number of occurrences of a given term to index per field -maxtermoccurrences int default=100 +maxtermoccurrences int default=1000 fieldmatchmaxlength int default=1000000 ilscript[].doctype string diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index 5dd04c102c4..22b2b581b44 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -204,7 +204,6 @@ public class ModelContextImpl implements ModelContext { private boolean sortBlueprintsByCost; private final boolean alwaysMarkPhraseExpensive; private final int contentLayerMetadataFeatureLevel; - private final boolean dynamicHeapSize; private final String unknownConfigDefinition; private final int searchHandlerThreadpool; private final long mergingMaxMemoryUsagePerNode; @@ -248,7 +247,6 @@ public class ModelContextImpl implements ModelContext { this.heapPercentage = flagValue(source, appId, version, PermanentFlags.HEAP_SIZE_PERCENTAGE); this.summaryDecodePolicy = flagValue(source, appId, version, Flags.SUMMARY_DECODE_POLICY); this.contentLayerMetadataFeatureLevel = flagValue(source, appId, version, Flags.CONTENT_LAYER_METADATA_FEATURE_LEVEL); - this.dynamicHeapSize = flagValue(source, appId, version, Flags.DYNAMIC_HEAP_SIZE); this.unknownConfigDefinition = flagValue(source, appId, version, Flags.UNKNOWN_CONFIG_DEFINITION); this.searchHandlerThreadpool = flagValue(source, appId, version, Flags.SEARCH_HANDLER_THREADPOOL); this.mergingMaxMemoryUsagePerNode = flagValue(source, appId, version, Flags.MERGING_MAX_MEMORY_USAGE_PER_NODE); @@ -303,7 +301,6 @@ public class ModelContextImpl implements ModelContext { } @Override public boolean alwaysMarkPhraseExpensive() { return alwaysMarkPhraseExpensive; } @Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; } - @Override public boolean dynamicHeapSize() { return dynamicHeapSize; } @Override public String unknownConfigDefinition() { return unknownConfigDefinition; } @Override public int searchHandlerThreadpool() { return searchHandlerThreadpool; } @Override public long mergingMaxMemoryUsagePerNode() { return mergingMaxMemoryUsagePerNode; } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java index 64ccd910120..ad785a33d5b 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java @@ -123,7 +123,7 @@ public class ActivatedModelsBuilder extends ModelsBuilder<Application> { wantedNodeVespaVersion); MetricUpdater applicationMetricUpdater = metrics.getOrCreateMetricUpdater(Metrics.createDimensions(applicationId)); ServerCache serverCache = new ServerCache(configDefinitionRepo, zkClient.getUserConfigDefinitions()); - return new Application(withDeferredConfigForRestartingClusters(modelFactory.createModel(modelContext)), + return new Application(modelFactory.createModel(modelContext), serverCache, applicationGeneration, modelFactory.version(), @@ -170,15 +170,4 @@ public class ActivatedModelsBuilder extends ModelsBuilder<Application> { zkClient.readDataplaneTokens()); } - private Model withDeferredConfigForRestartingClusters(Model model) { - if ( ! (model instanceof VespaModel vespaModel)) return model; - for (ClusterSpec.Id cluster : zkClient.readActivationTriggers().restartingClusters()) { - ApplicationContainerCluster containerCluster = vespaModel.getContainerClusters().get(cluster.value()); - if (containerCluster != null) containerCluster.setDeferChangesUntilRestart(true); - ContentCluster contentCluster = vespaModel.getContentClusters().get(cluster.value()); - if (contentCluster != null) contentCluster.setDeferChangesUntilRestart(true); - } - return model; - } - } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/ActivationTriggers.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/ActivationTriggers.java index 04db4b1b806..e704a36d21e 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/ActivationTriggers.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/ActivationTriggers.java @@ -15,9 +15,9 @@ import java.util.List; * * @author jonmv */ -public record ActivationTriggers(List<NodeRestart> nodeRestarts, List<ClusterSpec.Id> restartingClusters, List<Reindexing> reindexings) { +public record ActivationTriggers(List<NodeRestart> nodeRestarts, List<Reindexing> reindexings) { - private static final ActivationTriggers empty = new ActivationTriggers(List.of(), List.of(), List.of()); + private static final ActivationTriggers empty = new ActivationTriggers(List.of(), List.of()); public record NodeRestart(String hostname) { } public record Reindexing(String clusterId, String documentType) { } @@ -30,11 +30,6 @@ public record ActivationTriggers(List<NodeRestart> nodeRestarts, List<ClusterSpe .hostnames().stream() .map(NodeRestart::new) .toList(), - configChangeActions.getRestartActions() - .useForInternalRestart(isInternalRedeployment) - .getEntries().stream() - .map(entry -> ClusterSpec.Id.from(entry.getClusterName())) - .toList(), configChangeActions.getReindexActions().getEntries().stream() .map(entry -> new Reindexing(entry.getClusterName(), entry.getDocumentType())) .toList()); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/ActivationTriggersSerializer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/ActivationTriggersSerializer.java index 11a0f3cb935..4bad32ffee4 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/ActivationTriggersSerializer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/ActivationTriggersSerializer.java @@ -17,7 +17,6 @@ import static com.yahoo.yolean.Exceptions.uncheck; public class ActivationTriggersSerializer { static final String NODE_RESTARTS = "nodeRestarts"; - static final String RESTARTING_CLUSTERS = "restartingClusters"; static final String REINDEXINGS = "reindexings"; static final String CLUSTER_NAME = "clusterName"; static final String DOCUMENT_TYPE = "documentType"; @@ -37,10 +36,6 @@ public class ActivationTriggersSerializer { for (NodeRestart nodeRestart : triggers.nodeRestarts()) nodeRestarts.addString(nodeRestart.hostname()); - Cursor restartingClusters = object.setArray(RESTARTING_CLUSTERS); - for (ClusterSpec.Id clusterId : triggers.restartingClusters()) - restartingClusters.addString(clusterId.value()); - Cursor reindexings = object.setArray(REINDEXINGS); for (Reindexing reindexing : triggers.reindexings()) { Cursor entry = reindexings.addObject(); @@ -56,14 +51,11 @@ public class ActivationTriggersSerializer { List<NodeRestart> nodeRestarts = SlimeUtils.entriesStream(object.field(NODE_RESTARTS)) .map(entry -> new NodeRestart(entry.asString())) .toList(); - List<ClusterSpec.Id> restartingClusters = SlimeUtils.entriesStream(object.field(RESTARTING_CLUSTERS)) - .map(entry -> ClusterSpec.Id.from(entry.asString())) - .toList(); List<Reindexing> reindexings = SlimeUtils.entriesStream(object.field(REINDEXINGS)) .map(entry -> new Reindexing(entry.field(CLUSTER_NAME).asString(), entry.field(DOCUMENT_TYPE).asString())) .toList(); - return new ActivationTriggers(nodeRestarts, restartingClusters, reindexings); + return new ActivationTriggers(nodeRestarts, reindexings); } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java index 8da58de26b8..52c11ed0e93 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java @@ -606,7 +606,7 @@ public class SessionRepository { existingSession.getOperatorCertificates(), existingSession.getCloudAccount(), existingSession.getDataplaneTokens(), - existingSession.getActivationTriggers(), + ActivationTriggers.empty(), writeSessionData); } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java index a6f2eb38cc3..838b1b6b209 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java @@ -537,20 +537,6 @@ public class HostedDeployTest { assertEquals(Optional.of(ApplicationReindexing.empty() .withPending("music", "music", prepareResult.sessionId())), tester.tenant().getApplicationRepo().database().readReindexingStatus(tester.applicationId())); - - VespaModel model = ((VespaModel) tester.tenant().getSessionRepository() - .activeApplicationVersions(tester.applicationId()).get().get(Version.fromString("6.1.0")).get() - .getModel()); - - // Config for the container cluster to be restarted has been deferred until after restart. - ComponentsConfig.Builder builder1 = new ComponentsConfig.Builder(); - model.getContainerClusters().get("container").getContainers().get(0).getConfig(builder1); - assertTrue(builder1.getApplyOnRestart()); - - // Config for the metricsproxy cluster, which is not restarted, has not been deferred until after restart. - ComponentsConfig.Builder builder2 = new ComponentsConfig.Builder(); - model.getAdmin().getMetricsProxyCluster().getContainers().get(0).getConfig(builder2); - assertFalse(builder2.getApplyOnRestart()); } @Test diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/ActivationTriggersSerializerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/ActivationTriggersSerializerTest.java index 085fecaea16..5ef143198e8 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/ActivationTriggersSerializerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/ActivationTriggersSerializerTest.java @@ -1,6 +1,5 @@ package com.yahoo.vespa.config.server.session; -import com.yahoo.config.provision.ClusterSpec.Id; import com.yahoo.vespa.config.server.session.ActivationTriggers.NodeRestart; import com.yahoo.vespa.config.server.session.ActivationTriggers.Reindexing; import org.junit.jupiter.api.Test; @@ -18,7 +17,6 @@ class ActivationTriggersSerializerTest { void testSerialization() { ActivationTriggers triggers = new ActivationTriggers(List.of(new NodeRestart("node1"), new NodeRestart("node2")), - List.of(Id.from("cluster1")), List.of(new Reindexing("cluster1", "type1"), new Reindexing("cluster1", "type2"), new Reindexing("cluster2", "type1"))); diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java index e182c3f557b..2fe46868562 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java @@ -178,7 +178,7 @@ public class SessionZooKeeperClientTest { String data = Utf8.toString(curator.getData(path).get()); assertTrue(data.contains("{\"applicationId\":\"default:default:default\",\"applicationPackageReference\":\"foo\",\"version\":\"8.195.1\",\"createTime\":")); assertTrue(data.contains(",\"tenantSecretStores\":[],\"operatorCertificates\":[],\"dataplaneTokens\":[]," + - "\"activationTriggers\":{\"nodeRestarts\":[],\"restartingClusters\":[],\"reindexings\":[]}")); + "\"activationTriggers\":{\"nodeRestarts\":[],\"reindexings\":[]}")); } private void assertApplicationIdParse(long sessionId, String idString, String expectedIdString) { diff --git a/container-core/src/main/resources/configdefinitions/container.qr.def b/container-core/src/main/resources/configdefinitions/container.qr.def index e49e334a299..51ec6fc324a 100644 --- a/container-core/src/main/resources/configdefinitions/container.qr.def +++ b/container-core/src/main/resources/configdefinitions/container.qr.def @@ -9,16 +9,16 @@ namespace=container filedistributor.configid reference default="" ## Is RPC server enabled? -rpc.enabled bool default=false restart +rpc.enabled bool default=false ## RPC server listen port -rpc.port int default=8086 restart +rpc.port int default=8086 ## Which interface to bind to. -rpc.host string default="" restart +rpc.host string default="" ## The id this service should register itself with in slobrok -rpc.slobrokId string default="" restart +rpc.slobrokId string default="" ## A unique identifier string for this QRS. The only guarantee given is ## this string will be unique for every QRS in a Vespa application. diff --git a/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java b/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java index 6cc922682cb..0c93ccc43b3 100644 --- a/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java +++ b/container-disc/src/main/java/com/yahoo/container/jdisc/ConfiguredApplication.java @@ -62,6 +62,7 @@ import java.util.Comparator; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.Phaser; import java.util.logging.Level; @@ -272,8 +273,25 @@ public final class ConfiguredApplication implements Application { if (first(subscriber.config().values()) instanceof QrConfig newConfig) { reconfigure(newConfig.shutdown()); synchronized (this) { - if (qrConfig.rpc().port() != newConfig.rpc().port()) { - log.log(Level.INFO, "Rpc port changed from " + qrConfig.rpc().port() + " to " + newConfig.rpc().port()); + var currRpc = qrConfig.rpc(); + var newRpc = newConfig.rpc(); + boolean reListen = (currRpc.port() != newRpc.port()) || + (currRpc.enabled() != newRpc.enabled()) || + ! Objects.equals(currRpc.host(), newRpc.host()) || + ! Objects.equals(currRpc.slobrokId(), newRpc.slobrokId()); + if (reListen) { + if (currRpc.port() != newRpc.port()) { + log.log(Level.INFO, "Rpc port changed from " + currRpc.port() + " to " + newRpc.port()); + } + if (currRpc.enabled() != newRpc.enabled()) { + log.log(Level.INFO, "Rpc server " + (newRpc.enabled() ? "enabled" : "disabled")); + } + if ( ! Objects.equals(currRpc.host(), newRpc.host())) { + log.log(Level.INFO, "Rpc host changed from " + currRpc.host() + " to " + newRpc.host()); + } + if ( ! Objects.equals(currRpc.slobrokId(), newRpc.slobrokId())) { + log.log(Level.INFO, "Rpc slobrokid changed from " + currRpc.slobrokId() + " to " + newRpc.slobrokId()); + } try { reListenRpc(newConfig); } catch (Throwable e) { diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/ConversionContext.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/ConversionContext.java index 70f6e405a92..bef766e7ef9 100644 --- a/container-search/src/main/java/com/yahoo/search/query/profile/types/ConversionContext.java +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/ConversionContext.java @@ -15,7 +15,6 @@ public class ConversionContext { private final String destination; private final CompiledQueryProfileRegistry registry; private final Map<String, Embedder> embedders; - private final Map<String, String> contextValues; private final Language language; public ConversionContext(String destination, CompiledQueryProfileRegistry registry, Embedder embedder, @@ -31,7 +30,6 @@ public class ConversionContext { this.embedders = embedders; this.language = context.containsKey("language") ? Language.fromLanguageTag(context.get("language")) : Language.UNKNOWN; - this.contextValues = context; } /** Returns the local name of the field which will receive the converted value (or null when this is empty) */ @@ -46,9 +44,6 @@ public class ConversionContext { /** Returns the language, which is never null but may be UNKNOWN */ Language language() { return language; } - /** Returns a read-only map of context key-values which can be looked up during conversion. */ - Map<String,String> contextValues() { return contextValues; } - /** Returns an empty context */ public static ConversionContext empty() { return new ConversionContext(null, null, Embedder.throwsOnUse.asMap(), Map.of()); diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java index e16f8e7b0cd..cfadd79de8f 100644 --- a/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java @@ -48,8 +48,7 @@ public class TensorFieldType extends FieldType { @Override public Object convertFrom(Object o, ConversionContext context) { if (o instanceof SubstituteString) return new SubstituteStringTensor((SubstituteString) o, type); - return new TensorConverter(context.embedders()).convertTo(type, context.destination(), o, - context.language(), context.contextValues()); + return new TensorConverter(context.embedders()).convertTo(type, context.destination(), o, context.language()); } public static TensorFieldType fromTypeString(String s) { diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/RankProfileInputProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/RankProfileInputProperties.java index 25a5c277dce..c9f935e5f52 100644 --- a/container-search/src/main/java/com/yahoo/search/query/properties/RankProfileInputProperties.java +++ b/container-search/src/main/java/com/yahoo/search/query/properties/RankProfileInputProperties.java @@ -44,8 +44,7 @@ public class RankProfileInputProperties extends Properties { value = tensorConverter.convertTo(expectedType, name.last(), value, - query.getModel().getLanguage(), - context); + query.getModel().getLanguage()); } } catch (IllegalArgumentException e) { diff --git a/container-search/src/main/java/com/yahoo/search/ranking/Normalizer.java b/container-search/src/main/java/com/yahoo/search/ranking/Normalizer.java index eb81d0555b3..0d86e1409c3 100644 --- a/container-search/src/main/java/com/yahoo/search/ranking/Normalizer.java +++ b/container-search/src/main/java/com/yahoo/search/ranking/Normalizer.java @@ -3,14 +3,29 @@ package com.yahoo.search.ranking; abstract class Normalizer { - protected final double[] data; + protected double[] data; protected int size = 0; - Normalizer(int maxSize) { - this.data = new double[maxSize]; + private static int initialCapacity(int hint) { + for (int capacity = 64; capacity < 4096; capacity *= 2) { + if (hint <= capacity) { + return capacity; + } + } + return 4096; + } + + Normalizer(int sizeHint) { + this.data = new double[initialCapacity(sizeHint)]; } int addInput(double value) { + if (size == data.length) { + int newSize = size * 2; + var tmp = new double[newSize]; + System.arraycopy(data, 0, tmp, 0, size); + this.data = tmp; + } data[size] = value; return size++; } diff --git a/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java b/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java index 94f92c7fd48..6da53ae699c 100644 --- a/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java +++ b/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java @@ -19,8 +19,7 @@ import java.util.regex.Pattern; */ public class TensorConverter { - private static final Pattern embedderArgumentAndQuotedTextRegexp = Pattern.compile("^([A-Za-z0-9_@\\-.]+),\\s*([\"'].*[\"'])"); - private static final Pattern embedderArgumentAndReferenceRegexp = Pattern.compile("^([A-Za-z0-9_@\\-.]+),\\s*(@.*)"); + private static final Pattern embedderArgumentRegexp = Pattern.compile("^([A-Za-z0-9_\\-.]+),\\s*([\"'].*[\"'])"); private final Map<String, Embedder> embedders; @@ -28,9 +27,8 @@ public class TensorConverter { this.embedders = embedders; } - public Tensor convertTo(TensorType type, String key, Object value, Language language, - Map<String, String> contextValues) { - var context = new Embedder.Context(key).setLanguage(language).setContextValues(contextValues); + public Tensor convertTo(TensorType type, String key, Object value, Language language) { + var context = new Embedder.Context(key).setLanguage(language); Tensor tensor = toTensor(type, value, context); if (tensor == null) return null; if (! tensor.type().isAssignableTo(type)) @@ -57,16 +55,16 @@ public class TensorConverter { String embedderId; // Check if arguments specifies an embedder with the format embed(embedder, "text to encode") - Matcher matcher; - if (( matcher = embedderArgumentAndQuotedTextRegexp.matcher(argument)).matches()) { + Matcher matcher = embedderArgumentRegexp.matcher(argument); + if (matcher.matches()) { embedderId = matcher.group(1); - embedder = requireEmbedder(embedderId); argument = matcher.group(2); - } else if (( matcher = embedderArgumentAndReferenceRegexp.matcher(argument)).matches()) { - embedderId = matcher.group(1); - embedder = requireEmbedder(embedderId); - argument = matcher.group(2); - } else if (embedders.isEmpty()) { + if ( ! embedders.containsKey(embedderId)) { + throw new IllegalArgumentException("Can't find embedder '" + embedderId + "'. " + + "Valid embedders are " + validEmbedders(embedders)); + } + embedder = embedders.get(embedderId); + } else if (embedders.size() == 0) { throw new IllegalStateException("No embedders provided"); // should never happen } else if (embedders.size() > 1) { throw new IllegalArgumentException("Multiple embedders are provided but no embedder id is given. " + @@ -76,35 +74,19 @@ public class TensorConverter { embedderId = entry.getKey(); embedder = entry.getValue(); } - return embedder.embed(resolve(argument, embedderContext), embedderContext.copy().setEmbedderId(embedderId), type); + return embedder.embed(removeQuotes(argument), embedderContext.copy().setEmbedderId(embedderId), type); } - private Embedder requireEmbedder(String embedderId) { - if ( ! embedders.containsKey(embedderId)) - throw new IllegalArgumentException("Can't find embedder '" + embedderId + "'. " + - "Valid embedders are " + validEmbedders(embedders)); - return embedders.get(embedderId); - } - - private static String resolve(String s, Embedder.Context embedderContext) { - if (s.startsWith("'") && s.endsWith("'")) + private static String removeQuotes(String s) { + if (s.startsWith("'") && s.endsWith("'")) { return s.substring(1, s.length() - 1); - if (s.startsWith("\"") && s.endsWith("\"")) + } + if (s.startsWith("\"") && s.endsWith("\"")) { return s.substring(1, s.length() - 1); - if (s.startsWith("@")) - return resolveReference(s, embedderContext); + } return s; } - private static String resolveReference(String s, Embedder.Context embedderContext) { - String referenceKey = s.substring(1); - String referencedValue = embedderContext.getContextValues().get(referenceKey); - if (referencedValue == null) - throw new IllegalArgumentException("Could not resolve query parameter reference '" + referenceKey + - "' used in an embed() argument"); - return referencedValue; - } - private static String validEmbedders(Map<String, Embedder> embedders) { List<String> embedderIds = new ArrayList<>(); embedders.forEach((key, value) -> embedderIds.add(key)); diff --git a/container-search/src/test/java/com/yahoo/search/query/RankProfileInputTest.java b/container-search/src/test/java/com/yahoo/search/query/RankProfileInputTest.java index 429b8d1c6cb..90e21e5f3b0 100644 --- a/container-search/src/test/java/com/yahoo/search/query/RankProfileInputTest.java +++ b/container-search/src/test/java/com/yahoo/search/query/RankProfileInputTest.java @@ -185,21 +185,6 @@ public class RankProfileInputTest { assertEmbedQuery("embed(emb2, '" + text + "')", embedding2, embedders, Language.UNKNOWN.languageCode()); } - @Test - void testUnembeddedTensorRankFeatureInRequestReferencedFromAParameter() { - String text = "text to embed into a tensor"; - Tensor embedding1 = Tensor.from("tensor<float>(x[5]):[3,7,4,0,0]]"); - - Map<String, Embedder> embedders = Map.of( - "emb1", new MockEmbedder(text, Language.UNKNOWN, embedding1) - ); - assertEmbedQuery("embed(@param1)", embedding1, embedders, null, text); - assertEmbedQuery("embed(emb1, @param1)", embedding1, embedders, null, text); - assertEmbedQueryFails("embed(emb1, @noSuchParam)", embedding1, embedders, - "Could not resolve query parameter reference 'noSuchParam' " + - "used in an embed() argument"); - } - private Query createTensor1Query(String tensorString, String profile, String additionalParams) { return new Query.Builder() .setSchemaInfo(createSchemaInfo()) @@ -217,24 +202,18 @@ public class RankProfileInputTest { } private void assertEmbedQuery(String embed, Tensor expected, Map<String, Embedder> embedders) { - assertEmbedQuery(embed, expected, embedders, null, null); + assertEmbedQuery(embed, expected, embedders, null); } private void assertEmbedQuery(String embed, Tensor expected, Map<String, Embedder> embedders, String language) { - assertEmbedQuery(embed, expected, embedders, language, null); - } - private void assertEmbedQuery(String embed, Tensor expected, Map<String, Embedder> embedders, String language, String param1Value) { String languageParam = language == null ? "" : "&language=" + language; - String param1 = param1Value == null ? "" : "¶m1=" + urlEncode(param1Value); - String destination = "query(myTensor4)"; Query query = new Query.Builder().setRequest(HttpRequest.createTestRequest( "?" + urlEncode("ranking.features." + destination) + "=" + urlEncode(embed) + "&ranking=commonProfile" + - languageParam + - param1, + languageParam, com.yahoo.jdisc.http.HttpRequest.Method.GET)) .setSchemaInfo(createSchemaInfo()) .setQueryProfile(createQueryProfile()) @@ -251,7 +230,7 @@ public class RankProfileInputTest { if (t.getMessage().equals(errMsg)) return; t = t.getCause(); } - fail("Exception with message '" + errMsg + "' not thrown"); + fail("Error '" + errMsg + "' not thrown"); } private CompiledQueryProfile createQueryProfile() { diff --git a/default_build_settings.cmake b/default_build_settings.cmake index 580124993e6..41ab347f8a0 100644 --- a/default_build_settings.cmake +++ b/default_build_settings.cmake @@ -131,8 +131,13 @@ function(vespa_use_default_build_settings) if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") if(APPLE AND (("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") OR ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang"))) elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - # Default to haswell cpu or newer - set(DEFAULT_VESPA_CPU_ARCH_FLAGS "-march=haswell -mtune=skylake-avx512") + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3) + # Temporary workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108599 + set(DEFAULT_VESPA_CPU_ARCH_FLAGS "-march=haswell -mtune=skylake") + else() + # Default to haswell cpu or newer + set(DEFAULT_VESPA_CPU_ARCH_FLAGS "-march=haswell -mtune=skylake-avx512") + endif() endif() elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") set(DEFAULT_VESPA_CPU_ARCH_FLAGS "-march=armv8.2-a+fp16+dotprod+crypto -mtune=neoverse-n1") diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index af9d51509cb..76e537601d5 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -145,6 +145,8 @@ <!-- Versions used by tenant parent pom and testing framework --> <!-- CAUTION: upgrading junit for tenants poms may break testing frameworks --> + <!-- CAUTION 2: this version must match the exported packages from the tenant-cd-api module --> + <!-- CAUTION 3: this is probably not a good idea to change too ofter; consider a major version next time --> <junit.vespa.tenant.version>5.10.1</junit.vespa.tenant.version> <junit.platform.vespa.tenant.version>1.10.1</junit.platform.vespa.tenant.version> <surefire.vespa.tenant.version>${surefire.vespa.version}</surefire.vespa.tenant.version> diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 12130005bdb..2b8bff8e6b4 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -306,6 +306,14 @@ public class Flags { "Takes effect at next tick", INSTANCE_ID); + public static final UnboundListFlag<String> OTELCOL_LOGS = defineListFlag( + "otelcol-logs", List.of(), String.class, + List.of("olaa"), "2024-01-15", "2024-03-01", + "Determines log files handled by the OpenTelemetry collector", + "Takes effect at next tick", + INSTANCE_ID, HOSTNAME + ); + public static final UnboundStringFlag CORE_ENCRYPTION_PUBLIC_KEY_ID = defineStringFlag( "core-encryption-public-key-id", "", List.of("vekterli"), "2022-11-03", "2024-02-01", @@ -313,11 +321,6 @@ public class Flags { "Takes effect on the next tick.", NODE_TYPE, HOSTNAME); - public static final UnboundBooleanFlag ENABLE_THE_ONE_THAT_SHOULD_NOT_BE_NAMED = defineFeatureFlag( - "enable-the-one-that-should-not-be-named", false, List.of("hmusum"), "2023-05-08", "2024-01-15", - "Whether to enable the one program that should not be named", - "Takes effect at next host-admin tick"); - public static final UnboundListFlag<String> ZONAL_WEIGHTED_ENDPOINT_RECORDS = defineListFlag( "zonal-weighted-endpoint-records", List.of(), String.class, List.of("jonmv"), "2023-12-15", "2024-06-01", "A list of weighted (application) endpoint fqdns for which we should use zonal endpoints as targets, not LBs.", @@ -385,8 +388,8 @@ public class Flags { INSTANCE_ID); public static final UnboundBooleanFlag DYNAMIC_HEAP_SIZE = defineFeatureFlag( - "dynamic-heap-size", false, - List.of("bjorncs"), "2023-09-21", "2024-01-15", + "dynamic-heap-size", true, + List.of("bjorncs"), "2023-09-21", "2024-02-15", "Whether to calculate JVM heap size based on predicted Onnx model memory requirements", "Takes effect at redeployment", INSTANCE_ID); diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json index dc6a62cc463..1ffb879e57e 100644 --- a/linguistics/abi-spec.json +++ b/linguistics/abi-spec.json @@ -344,9 +344,7 @@ "public java.lang.String getDestination()", "public com.yahoo.language.process.Embedder$Context setDestination(java.lang.String)", "public java.lang.String getEmbedderId()", - "public com.yahoo.language.process.Embedder$Context setEmbedderId(java.lang.String)", - "public java.util.Map getContextValues()", - "public com.yahoo.language.process.Embedder$Context setContextValues(java.util.Map)" + "public com.yahoo.language.process.Embedder$Context setEmbedderId(java.lang.String)" ], "fields" : [ ] }, diff --git a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java index d9d2256d0c1..fa141977d5d 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java +++ b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java @@ -88,7 +88,6 @@ public interface Embedder { private Language language = Language.UNKNOWN; private String destination; private String embedderId = "unknown"; - private Map<String, String> contextValues; public Context(String destination) { this.destination = destination; @@ -139,15 +138,6 @@ public interface Embedder { this.embedderId = embedderId; return this; } - - /** Returns a read-only map of context key-values which can be looked up during conversion. */ - public Map<String, String> getContextValues() { return contextValues; } - - public Context setContextValues(Map<String, String> contextValues) { - this.contextValues = Map.copyOf(contextValues); - return this; - } - } class FailingEmbedder implements Embedder { diff --git a/model-integration/src/main/java/ai/vespa/embedding/ColBertEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/ColBertEmbedder.java index 0bee03a65af..8c39cc8c813 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/ColBertEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/ColBertEmbedder.java @@ -191,10 +191,10 @@ public class ColBertEmbedder extends AbstractComponent implements Embedder { attentionMaskName, attentionMaskTensor.expand("d0")); Map<String, Tensor> outputs = evaluator.evaluate(inputs); Tensor tokenEmbeddings = outputs.get(outputName); - IndexedTensor result = (IndexedTensor) tokenEmbeddings.reduce(Reduce.Aggregator.min, "d0"); + IndexedTensor result = (IndexedTensor) tokenEmbeddings; int dims = tensorType.indexedSubtype().dimensions().get(0).size().get().intValue(); - if (dims != result.shape()[1]) { + if (dims != result.shape()[2]) { throw new IllegalArgumentException("Token vector dimensionality does not" + " match indexed dimensionality of " + dims); } @@ -217,9 +217,9 @@ public class ColBertEmbedder extends AbstractComponent implements Embedder { Map<String, Tensor> outputs = evaluator.evaluate(inputs); Tensor tokenEmbeddings = outputs.get(outputName); - IndexedTensor result = (IndexedTensor) tokenEmbeddings.reduce(Reduce.Aggregator.min, "d0"); + IndexedTensor result = (IndexedTensor) tokenEmbeddings; Tensor contextualEmbeddings; - int maxTokens = input.inputIds.size() -1; //Do not retain last PAD + int maxTokens = input.inputIds.size(); //Retain all token vectors, including PAD tokens. if (tensorType.valueType() == TensorType.Value.INT8) { contextualEmbeddings = toBitTensor(result, tensorType, maxTokens); } else { @@ -230,11 +230,13 @@ public class ColBertEmbedder extends AbstractComponent implements Embedder { } public static Tensor toFloatTensor(IndexedTensor result, TensorType type, int nTokens) { + if(result.shape().length != 3) + throw new IllegalArgumentException("Expected onnx result to have 3-dimensions [batch, sequence, dim]"); int size = type.indexedSubtype().dimensions().size(); if (size != 1) - throw new IllegalArgumentException("Indexed tensor must have one dimension"); + throw new IllegalArgumentException("Target indexed sub-type must have one dimension"); int wantedDimensionality = type.indexedSubtype().dimensions().get(0).size().get().intValue(); - int resultDimensionality = (int)result.shape()[1]; + int resultDimensionality = (int)result.shape()[2]; if (resultDimensionality != wantedDimensionality) { throw new IllegalArgumentException("Not possible to map token vector embedding with " + resultDimensionality + " + dimensions into tensor with " + wantedDimensionality); @@ -242,7 +244,7 @@ public class ColBertEmbedder extends AbstractComponent implements Embedder { Tensor.Builder builder = Tensor.Builder.of(type); for (int token = 0; token < nTokens; token++) { for (int d = 0; d < resultDimensionality; d++) { - var value = result.get(TensorAddress.of(token, d)); + var value = result.get(0,token,d); // batch, sequence token, dimension builder.cell(TensorAddress.of(token,d),value); } } @@ -253,11 +255,14 @@ public class ColBertEmbedder extends AbstractComponent implements Embedder { if (type.valueType() != TensorType.Value.INT8) throw new IllegalArgumentException("Only a int8 tensor type can be" + " the destination of bit packing"); + if(result.shape().length != 3) + throw new IllegalArgumentException("Expected onnx result to have 3-dimensions [batch, sequence, dim]"); + int size = type.indexedSubtype().dimensions().size(); if (size != 1) - throw new IllegalArgumentException("Indexed tensor must have one dimension"); + throw new IllegalArgumentException("Target indexed sub-type must have one dimension"); int wantedDimensionality = type.indexedSubtype().dimensions().get(0).size().get().intValue(); - int resultDimensionality = (int)result.shape()[1]; + int resultDimensionality = (int)result.shape()[2]; if (resultDimensionality != 8 * wantedDimensionality) { throw new IllegalArgumentException("Not possible to pack " + resultDimensionality + " + dimensions into " + wantedDimensionality + " dimensions"); @@ -266,8 +271,8 @@ public class ColBertEmbedder extends AbstractComponent implements Embedder { for (int token = 0; token < nTokens; token++) { BitSet bitSet = new BitSet(8); int key = 0; - for (int d = 0; d < result.shape()[1]; d++) { - var value = result.get(TensorAddress.of(token, d)); + for (int d = 0; d < result.shape()[2]; d++) { + var value = result.get(0, token, d); // batch, sequence token, dimension int bitIndex = 7 - (d % 8); if (value > 0.0) { bitSet.set(bitIndex); diff --git a/model-integration/src/test/java/ai/vespa/embedding/ColBertEmbedderTest.java b/model-integration/src/test/java/ai/vespa/embedding/ColBertEmbedderTest.java index f3682e45efc..0cae94c372a 100644 --- a/model-integration/src/test/java/ai/vespa/embedding/ColBertEmbedderTest.java +++ b/model-integration/src/test/java/ai/vespa/embedding/ColBertEmbedderTest.java @@ -10,6 +10,7 @@ import com.yahoo.tensor.MixedTensor; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; +import org.junit.Ignore; import org.junit.Test; import java.util.List; @@ -35,25 +36,25 @@ public class ColBertEmbedderTest { public void testPacking() { assertPackedRight( "" + - "tensor<float>(d1[6],d2[8]):" + - "[" + + "tensor<float>(d0[1],d1[6],d2[8]):" + + "[[" + "[0, 0, 0, 0, 0, 0, 0, 1]," + "[0, 0, 0, 0, 0, 1, 0, 1]," + "[0, 0, 0, 0, 0, 0, 1, 1]," + "[0, 1, 1, 1, 1, 1, 1, 1]," + "[1, 0, 0, 0, 0, 0, 0, 0]," + "[1, 1, 1, 1, 1, 1, 1, 1]" + - "]", + "]]", TensorType.fromSpec("tensor<int8>(dt{},x[1])"), "tensor<int8>(dt{},x[1]):{0:1.0, 1:5.0, 2:3.0, 3:127.0, 4:-128.0, 5:-1.0}", 6 ); assertPackedRight( "" + - "tensor<float>(d1[2],d2[16]):" + - "[" + + "tensor<float>(d0[1],d1[2],d2[16]):" + + "[[" + "[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0]," + "[0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1]" + - "]", + "]]", TensorType.fromSpec("tensor<int8>(dt{},x[2])"), "tensor<int8>(dt{},x[2]):{0:[1.0, -128.0], 1:[5.0, 1.0]}",2 ); @@ -133,18 +134,35 @@ public class ColBertEmbedderTest { } String text = sb.toString(); Tensor fullFloat = assertEmbed("tensor<float>(dt{},x[128])", text, indexingContext); - assertEquals(511*128,fullFloat.size()); + assertEquals(512*128,fullFloat.size()); Tensor query = assertEmbed("tensor<float>(dt{},x[128])", text, queryContext); assertEquals(32*128,query.size()); Tensor binaryRep = assertEmbed("tensor<int8>(dt{},x[16])", text, indexingContext); - assertEquals(511*16,binaryRep.size()); + assertEquals(512*16,binaryRep.size()); Tensor shortDoc = assertEmbed("tensor<int8>(dt{},x[16])", "annoyance", indexingContext); - // 3 tokens, 16 bytes each = 48 bytes + // 4 tokens, 16 bytes each = 64 bytes //CLS [unused1] sequence - assertEquals(3*16,shortDoc.size());; + assertEquals(4*16,shortDoc.size());; + } + + @Ignore + public void testPerf() { + StringBuilder sb = new StringBuilder(); + for(int i = 0; i < 256; i++) { + sb.append("annoyance"); + sb.append(" "); + } + String text = sb.toString(); + Long now = System.currentTimeMillis(); + int n = 1000; + for (int i = 0; i < n; i++) { + assertEmbed("tensor<float>(dt{},x[128])", text, indexingContext); + } + Long elapsed = (System.currentTimeMillis() - now); + System.out.println("Elapsed time: " + elapsed + " ms"); } static Tensor assertEmbed(String tensorSpec, String text, Embedder.Context context) { @@ -163,11 +181,11 @@ public class ColBertEmbedderTest { Tensor packed = ColBertEmbedder.toBitTensor(in, destination, size); assertEquals(expected, packed.toString()); Tensor unpacked = ColBertEmbedder.expandBitTensor(packed); - assertEquals(in.shape()[1], unpacked.type().indexedSubtype().dimensions().get(0).size().get().longValue()); + assertEquals(in.shape()[2], unpacked.type().indexedSubtype().dimensions().get(0).size().get().longValue()); for (int dOuter = 0; dOuter < size; dOuter++) { - for (int dInner = 0; dInner < in.shape()[1]; dInner++) { + for (int dInner = 0; dInner < in.shape()[2]; dInner++) { var addr = TensorAddress.of(dOuter, dInner); - double oldVal = in.get(addr); + double oldVal = in.get(TensorAddress.of(0,dOuter, dInner)); if (oldVal > 0) { assertEquals(unpacked.get(addr), 1.0, 0.0); } else { diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 7c4b7555158..fe6149e6fba 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -7,6 +7,7 @@ #include <vespa/searchlib/query/streaming/query.h> #include <vespa/searchlib/query/streaming/nearest_neighbor_query_node.h> #include <vespa/searchlib/query/streaming/wand_term.h> +#include <vespa/searchlib/query/streaming/weighted_set_term.h> #include <vespa/searchlib/query/tree/querybuilder.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/query/tree/stackdumpcreator.h> @@ -1020,6 +1021,48 @@ TEST(StreamingQueryTest, wand_term) check_wand_term(exp_wand_score_field_11 + 1, "hidden score below limit"); } +TEST(StreamingQueryTest, weighted_set_term) +{ + search::streaming::WeightedSetTerm term({}, "index", 2); + term.add_term(std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), "7", "", QueryTermSimple::Type::WORD)); + term.get_terms().back()->setWeight(Weight(4)); + term.add_term(std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), "9", "", QueryTermSimple::Type::WORD)); + term.get_terms().back()->setWeight(Weight(13)); + EXPECT_EQ(2, term.get_terms().size()); + SimpleTermData td; + /* + * Search in fields 10, 11 and 12 (cf. fieldset in schema). + * Fields 11 and 12 have content for doc containing the keys. + * Fields 10 and 12 have valid handles and can be used for ranking. + * Field 11 does not have a valid handle, thus no associated match data. + */ + td.addField(10); + td.addField(11); + td.addField(12); + td.lookupField(10)->setHandle(0); + td.lookupField(12)->setHandle(1); + EXPECT_FALSE(term.evaluate()); + auto& q0 = *term.get_terms()[0]; + q0.add(0, 11, 0, 10); + q0.add(0, 12, 0, 10); + auto& q1 = *term.get_terms()[1]; + q1.add(0, 11, 0, 10); + q1.add(0, 12, 0, 10); + EXPECT_TRUE(term.evaluate()); + MatchData md(MatchData::params().numTermFields(2)); + term.unpack_match_data(23, td, md); + auto tmd0 = md.resolveTermField(0); + EXPECT_NE(23, tmd0->getDocId()); + auto tmd1 = md.resolveTermField(1); + EXPECT_EQ(23, tmd1->getDocId()); + using Weights = std::vector<int32_t>; + Weights weights; + for (auto& pos : *tmd1) { + weights.emplace_back(pos.getElementWeight()); + } + EXPECT_EQ((Weights{13, 4}), weights); +} + TEST(StreamingQueryTest, control_the_size_of_query_terms) { EXPECT_EQ(112u, sizeof(QueryTermSimple)); diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index f800e124bdc..bbd2744119a 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -24,10 +24,10 @@ class MyOr : public IntermediateBlueprint private: public: double calculate_cost() const final { - return cost_of(get_children(), OrFlow()); + return OrFlow::cost_of(get_children()); } double calculate_relative_estimate() const final { - return estimate_of(get_children(), OrFlow()); + return OrFlow::estimate_of(get_children()); } HitEstimate combine(const std::vector<HitEstimate> &data) const override { return max(data); diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index ab1c004c721..856ac2391f8 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -1380,7 +1380,7 @@ TEST("cost for ONEAR") { } TEST("cost for WEAKAND") { - verify_cost(make::WEAKAND(1000), calc_cost({{1.1, 0.8},{1.2, 0.7},{1.3, 0.5}})); + verify_cost(make::WEAKAND(1000), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}})); } TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index ceda30f169a..9a9adeac2bc 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -5,44 +5,46 @@ #include <vector> #include <random> -using search::queryeval::AndFlow; -using search::queryeval::OrFlow; +constexpr size_t loop_cnt = 64; + +using namespace search::queryeval; + +struct ItemAdapter { + double estimate(const auto &child) const noexcept { return child.rel_est; } + double cost(const auto &child) const noexcept { return child.cost; } + double strict_cost(const auto &child) const noexcept { return child.strict_cost; } +}; struct Item { double rel_est; double cost; - Item(double rel_est_in, double cost_in) noexcept - : rel_est(rel_est_in), cost(cost_in) {} - static void sort_for_and(std::vector<Item> &data) { - std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept { - return (1.0 - a.rel_est) / a.cost > (1.0 - b.rel_est) / b.cost; - }); + double strict_cost; + Item(double rel_est_in, double cost_in, double strict_cost_in) noexcept + : rel_est(rel_est_in), cost(cost_in), strict_cost(strict_cost_in) {} + template <typename FLOW> static double estimate_of(std::vector<Item> &data) { + return FLOW::estimate_of(ItemAdapter(), data); } - static void sort_for_or(std::vector<Item> &data) { - std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept { - return a.rel_est / a.cost > b.rel_est / b.cost; - }); + template <typename FLOW> static void sort(std::vector<Item> &data, bool strict) { + FLOW::sort(ItemAdapter(), data, strict); } - static double cost_of(const std::vector<Item> &data, auto flow) { - double cost = 0.0; - for (const Item &item: data) { - cost += flow.flow() * item.cost; - flow.add(item.rel_est); - } - return cost; + template <typename FLOW> static double cost_of(const std::vector<Item> &data, bool strict) { + return FLOW::cost_of(ItemAdapter(), data, strict); + } + template <typename FLOW> static double ordered_cost_of(const std::vector<Item> &data, bool strict) { + return flow::ordered_cost_of(ItemAdapter(), data, FLOW(1.0, strict)); } - static double cost_of_and(const std::vector<Item> &data) { return cost_of(data, AndFlow()); } - static double cost_of_or(const std::vector<Item> &data) { return cost_of(data, OrFlow()); } + auto operator <=>(const Item &rhs) const noexcept = default; }; std::vector<Item> gen_data(size_t size) { static std::mt19937 gen; - static std::uniform_real_distribution<double> rel_est(0.1, 0.9); - static std::uniform_real_distribution<double> cost(1.0, 10.0); + static std::uniform_real_distribution<double> rel_est(0.1, 0.9); + static std::uniform_real_distribution<double> cost(1.0, 10.0); + static std::uniform_real_distribution<double> strict_cost(0.1, 5.0); std::vector<Item> result; result.reserve(size); for (size_t i = 0; i < size; ++i) { - result.emplace_back(rel_est(gen), cost(gen)); + result.emplace_back(rel_est(gen), cost(gen), strict_cost(gen)); } return result; } @@ -80,37 +82,191 @@ TEST(FlowTest, perm_test) { EXPECT_EQ(seen.size(), 120); } +template <template <typename> typename ORDER> +void verify_ordering_is_strict_weak() { + auto cmp = ORDER(ItemAdapter()); + auto input = gen_data(7); + input.emplace_back(0.5, 1.5, 0.5); + input.emplace_back(0.5, 1.5, 0.5); + input.emplace_back(0.5, 1.5, 0.5); + input.emplace_back(0.0, 1.5, 0.5); + input.emplace_back(0.0, 1.5, 0.5); + input.emplace_back(0.5, 0.0, 0.5); + input.emplace_back(0.5, 0.0, 0.5); + input.emplace_back(0.5, 1.5, 0.0); + input.emplace_back(0.5, 1.5, 0.0); + input.emplace_back(0.0, 0.0, 0.0); + input.emplace_back(0.0, 0.0, 0.0); + std::vector<Item> output; + for (const Item &in: input) { + EXPECT_FALSE(cmp(in, in)); // Irreflexivity + size_t out_idx = 0; + bool lower = false; + bool upper = false; + for (const Item &out: output) { + if (cmp(out, in)) { + EXPECT_FALSE(cmp(in, out)); // Antisymmetry + EXPECT_FALSE(lower); // Transitivity + EXPECT_FALSE(upper); // Transitivity + ++out_idx; + } else { + lower = true; + if (cmp(in, out)) { + upper = true; + } else { + EXPECT_FALSE(upper); // Transitivity + } + } + } + output.insert(output.begin() + out_idx, in); + } +} + +TEST(FlowTest, and_ordering_is_strict_weak) { + verify_ordering_is_strict_weak<flow::MinAndCost>(); +} + +TEST(FlowTest, or_ordering_is_strict_weak) { + verify_ordering_is_strict_weak<flow::MinOrCost>(); +} + +TEST(FlowTest, strict_or_ordering_is_strict_weak) { + verify_ordering_is_strict_weak<flow::MinOrStrictCost>(); +} + +struct ExpectFlow { + double flow; + double est; + bool strict; +}; + +void verify_flow(auto flow, const std::vector<double> &est_list, const std::vector<ExpectFlow> &expect) { + ASSERT_EQ(est_list.size() + 1, expect.size()); + for (size_t i = 0; i < expect.size(); ++i) { + EXPECT_DOUBLE_EQ(flow.flow(), expect[i].flow); + EXPECT_DOUBLE_EQ(flow.estimate(), expect[i].est); + EXPECT_EQ(flow.strict(), expect[i].strict); + if (i < est_list.size()) { + flow.add(est_list[i]); + } + } +} + +TEST(FlowTest, basic_and_flow) { + for (double in: {1.0, 0.5, 0.25}) { + for (bool strict: {false, true}) { + verify_flow(AndFlow(in, strict), {0.4, 0.7, 0.2}, + {{in, 0.0, strict}, + {in*0.4, in*0.4, false}, + {in*0.4*0.7, in*0.4*0.7, false}, + {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}}); + } + } +} + +TEST(FlowTest, basic_or_flow) { + for (double in: {1.0, 0.5, 0.25}) { + for (bool strict: {false, true}) { + verify_flow(OrFlow(in, strict), {0.4, 0.7, 0.2}, + {{in, 0.0, strict}, + {in*0.6, 1.0-in*0.6, strict}, + {in*0.6*0.3, 1.0-in*0.6*0.3, strict}, + {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, strict}}); + } + } +} + +TEST(FlowTest, basic_and_not_flow) { + for (double in: {1.0, 0.5, 0.25}) { + for (bool strict: {false, true}) { + verify_flow(AndNotFlow(in, strict), {0.4, 0.7, 0.2}, + {{in, 0.0, strict}, + {in*0.4, in*0.4, false}, + {in*0.4*0.3, in*0.4*0.3, false}, + {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}}); + } + } +} + +TEST(FlowTest, flow_cost) { + std::vector<Item> data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}}; + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.6*0.5 + 0.6*0.3*0.4); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3); + EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3); +} + TEST(FlowTest, optimal_and_flow) { - for (size_t i = 0; i < 256; ++i) { - auto data = gen_data(7); - Item::sort_for_and(data); - double min_cost = Item::cost_of_and(data); - double max_cost = 0.0; - auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept { - double my_cost = Item::cost_of_and(my_data); - EXPECT_LE(min_cost, my_cost); - max_cost = std::max(max_cost, my_cost); - }; - each_perm(data, check); - fprintf(stderr, " and cost(%zu): min: %g, max: %g, factor: %g\n", - i, min_cost, max_cost, max_cost / min_cost); + for (size_t i = 0; i < loop_cnt; ++i) { + for (bool strict: {false, true}) { + auto data = gen_data(7); + double ref_est = Item::estimate_of<AndFlow>(data); + double min_cost = Item::cost_of<AndFlow>(data, strict); + double max_cost = 0.0; + Item::sort<AndFlow>(data, strict); + EXPECT_EQ(Item::ordered_cost_of<AndFlow>(data, strict), min_cost); + auto check = [&](const std::vector<Item> &my_data) noexcept { + double my_cost = Item::ordered_cost_of<AndFlow>(my_data, strict); + EXPECT_LE(min_cost, my_cost); + max_cost = std::max(max_cost, my_cost); + }; + each_perm(data, check); + if (loop_cnt < 1024 || i % 1024 == 0) { + fprintf(stderr, " AND cost(%zu,%s): min: %g, max: %g, factor: %g\n", + i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost); + } + EXPECT_NEAR(ref_est, Item::estimate_of<AndFlow>(data), 1e-9); + } } } TEST(FlowTest, optimal_or_flow) { - for (size_t i = 0; i < 256; ++i) { - auto data = gen_data(7); - Item::sort_for_or(data); - double min_cost = Item::cost_of_or(data); - double max_cost = 0.0; - auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept { - double my_cost = Item::cost_of_or(my_data); - EXPECT_LE(min_cost, my_cost); - max_cost = std::max(max_cost, my_cost); - }; - each_perm(data, check); - fprintf(stderr, " or cost(%zu): min: %g, max: %g, factor: %g\n", - i, min_cost, max_cost, max_cost / min_cost); + for (size_t i = 0; i < loop_cnt; ++i) { + for (bool strict: {false, true}) { + auto data = gen_data(7); + double min_cost = Item::cost_of<OrFlow>(data, strict); + double max_cost = 0.0; + Item::sort<OrFlow>(data, strict); + EXPECT_EQ(Item::ordered_cost_of<OrFlow>(data, strict), min_cost); + auto check = [&](const std::vector<Item> &my_data) noexcept { + double my_cost = Item::ordered_cost_of<OrFlow>(my_data, strict); + EXPECT_LE(min_cost, my_cost); + max_cost = std::max(max_cost, my_cost); + }; + each_perm(data, check); + if (loop_cnt < 1024 || i % 1024 == 0) { + fprintf(stderr, " OR cost(%zu,%s): min: %g, max: %g, factor: %g\n", + i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost); + } + } + } +} + +TEST(FlowTest, optimal_and_not_flow) { + for (size_t i = 0; i < loop_cnt; ++i) { + for (bool strict: {false, true}) { + auto data = gen_data(7); + Item first = data[0]; + double min_cost = Item::cost_of<AndNotFlow>(data, strict); + double max_cost = 0.0; + Item::sort<AndNotFlow>(data, strict); + EXPECT_EQ(data[0], first); + EXPECT_EQ(Item::ordered_cost_of<AndNotFlow>(data, strict), min_cost); + auto check = [&](const std::vector<Item> &my_data) noexcept { + if (my_data[0] == first) { + double my_cost = Item::ordered_cost_of<AndNotFlow>(my_data, strict); + EXPECT_LE(min_cost, my_cost); + max_cost = std::max(max_cost, my_cost); + } + }; + each_perm(data, check); + if (loop_cnt < 1024 || i % 1024 == 0) { + fprintf(stderr, " ANDNOT cost(%zu,%s): min: %g, max: %g, factor: %g\n", + i, strict ? "strict" : "non-strict", min_cost, max_cost, max_cost / min_cost); + } + } } } diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp index aa6d922f23f..a9f549a0bd9 100644 --- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -391,8 +391,11 @@ struct HeapFixture SearchIterator::UP sb(spec.create()); result.search(*sb); } + ~HeapFixture(); }; +HeapFixture::~HeapFixture() = default; + TEST(ParallelWeakAndTest, require_that_scores_are_collected_in_batches_before_adjusting_heap) { HeapFixture f; diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp index 9409b2b26c4..689f9f085d0 100644 --- a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp @@ -37,8 +37,11 @@ struct SimpleWandFixture { SearchIterator::UP search(spec.create()); hits.search(*search); } + ~SimpleWandFixture(); }; +SimpleWandFixture::~SimpleWandFixture() = default; + struct AdvancedWandFixture { MyWandSpec spec; SimpleResult hits; @@ -51,8 +54,11 @@ struct AdvancedWandFixture { SearchIterator::UP search(spec.create()); hits.search(*search); } + ~AdvancedWandFixture(); }; +AdvancedWandFixture::~AdvancedWandFixture() = default; + struct WeightOrder { bool operator()(const wand::Term &t1, const wand::Term &t2) const { return (t1.weight < t2.weight); diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp index 99d3ba3f7aa..01148c11c9c 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "attribute_weighted_set_blueprint.h" -#include "multi_term_filter.hpp" +#include "multi_term_hash_filter.hpp" #include <vespa/searchcommon/attribute/i_search_context.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/fef/matchdatalayout.h> @@ -73,7 +73,7 @@ make_multi_term_filter(fef::TermFieldMatchData& tfmd, const std::vector<int32_t>& weights, const std::vector<ISearchContext*>& contexts) { - using FilterType = attribute::MultiTermFilter<WrapperType>; + using FilterType = attribute::MultiTermHashFilter<WrapperType>; typename FilterType::TokenMap tokens; WrapperType wrapper(attr); for (size_t i = 0; i < contexts.size(); ++i) { diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_filter.h b/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.h index adbf37d2dcd..9c3ea258fdc 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_term_filter.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.h @@ -18,7 +18,7 @@ namespace search::attribute { * @tparam WrapperType Type that wraps an attribute vector and provides access to the attribute value for a given docid. */ template <typename WrapperType> -class MultiTermFilter final : public queryeval::SearchIterator +class MultiTermHashFilter final : public queryeval::SearchIterator { public: using Key = typename WrapperType::TokenT; @@ -31,9 +31,9 @@ private: int32_t _weight; public: - MultiTermFilter(fef::TermFieldMatchData& tfmd, - WrapperType attr, - TokenMap&& map); + MultiTermHashFilter(fef::TermFieldMatchData& tfmd, + WrapperType attr, + TokenMap&& map); void and_hits_into(BitVector& result, uint32_t begin_id) override; void doSeek(uint32_t docId) override; diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_filter.hpp b/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.hpp index dc572aedbff..96d5b3ac1f3 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_term_filter.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.hpp @@ -2,16 +2,16 @@ #pragma once -#include "multi_term_filter.h" +#include "multi_term_hash_filter.h" #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> namespace search::attribute { template <typename WrapperType> -MultiTermFilter<WrapperType>::MultiTermFilter(fef::TermFieldMatchData& tfmd, - WrapperType attr, - TokenMap&& map) +MultiTermHashFilter<WrapperType>::MultiTermHashFilter(fef::TermFieldMatchData& tfmd, + WrapperType attr, + TokenMap&& map) : _tfmd(tfmd), _attr(attr), _map(std::move(map)), @@ -21,7 +21,7 @@ MultiTermFilter<WrapperType>::MultiTermFilter(fef::TermFieldMatchData& tfmd, template <typename WrapperType> void -MultiTermFilter<WrapperType>::and_hits_into(BitVector& result, uint32_t begin_id) +MultiTermHashFilter<WrapperType>::and_hits_into(BitVector& result, uint32_t begin_id) { auto end = _map.end(); result.foreach_truebit([&, end](uint32_t key) { if ( _map.find(_attr.getToken(key)) == end) { result.clearBit(key); }}, begin_id); @@ -29,7 +29,7 @@ MultiTermFilter<WrapperType>::and_hits_into(BitVector& result, uint32_t begin_id template <typename WrapperType> void -MultiTermFilter<WrapperType>::doSeek(uint32_t docId) +MultiTermHashFilter<WrapperType>::doSeek(uint32_t docId) { auto pos = _map.find(_attr.getToken(docId)); if (pos != _map.end()) { @@ -40,7 +40,7 @@ MultiTermFilter<WrapperType>::doSeek(uint32_t docId) template <typename WrapperType> void -MultiTermFilter<WrapperType>::doUnpack(uint32_t docId) +MultiTermHashFilter<WrapperType>::doUnpack(uint32_t docId) { _tfmd.reset(docId); fef::TermFieldMatchDataPosition pos; diff --git a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt index 9b53407aff5..05a75f4662e 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt @@ -10,5 +10,7 @@ vespa_add_library(searchlib_query_streaming OBJECT querynoderesultbase.cpp queryterm.cpp wand_term.cpp + weighted_set_term.cpp + regexp_term.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.cpp b/searchlib/src/vespa/searchlib/query/streaming/query.cpp index 3079ec31e8f..ca742aabe26 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/query.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/query.cpp @@ -107,9 +107,7 @@ QueryConnector::create(ParseItem::ItemType type) case search::ParseItem::ITEM_AND: return std::make_unique<AndQueryNode>(); case search::ParseItem::ITEM_OR: case search::ParseItem::ITEM_WEAK_AND: return std::make_unique<OrQueryNode>(); - case search::ParseItem::ITEM_WEIGHTED_SET: case search::ParseItem::ITEM_EQUIV: return std::make_unique<EquivQueryNode>(); - case search::ParseItem::ITEM_WAND: return std::make_unique<OrQueryNode>(); case search::ParseItem::ITEM_NOT: return std::make_unique<AndNotQueryNode>(); case search::ParseItem::ITEM_PHRASE: return std::make_unique<PhraseQueryNode>(); case search::ParseItem::ITEM_SAME_ELEMENT: return std::make_unique<SameElementQueryNode>(); diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.h b/searchlib/src/vespa/searchlib/query/streaming/query.h index 8befa2fe7fa..84c693b86d0 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/query.h +++ b/searchlib/src/vespa/searchlib/query/streaming/query.h @@ -103,8 +103,7 @@ public: EquivQueryNode() noexcept : OrQueryNode("EQUIV") { } bool evaluate() const override; bool isFlattenable(ParseItem::ItemType type) const override { - return (type == ParseItem::ITEM_EQUIV) || - (type == ParseItem::ITEM_WEIGHTED_SET); + return (type == ParseItem::ITEM_EQUIV); } }; diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index c24f41d16cf..2ee515f062a 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -2,10 +2,12 @@ #include "query.h" #include "nearest_neighbor_query_node.h" +#include "regexp_term.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <vespa/searchlib/query/streaming/dot_product_term.h> #include <vespa/searchlib/query/streaming/in_term.h> #include <vespa/searchlib/query/streaming/wand_term.h> +#include <vespa/searchlib/query/streaming/weighted_set_term.h> #include <vespa/searchlib/query/tree/term_vector.h> #include <charconv> #include <vespa/log/log.h> @@ -40,7 +42,6 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor case ParseItem::ITEM_OR: case ParseItem::ITEM_WEAK_AND: case ParseItem::ITEM_EQUIV: - case ParseItem::ITEM_WEIGHTED_SET: case ParseItem::ITEM_NOT: case ParseItem::ITEM_PHRASE: case ParseItem::ITEM_SAME_ELEMENT: @@ -55,7 +56,6 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor nqn->distance(queryRep.getNearDistance()); } if ((type == ParseItem::ITEM_WEAK_AND) || - (type == ParseItem::ITEM_WEIGHTED_SET) || (type == ParseItem::ITEM_SAME_ELEMENT)) { qn->setIndex(queryRep.getIndexName()); @@ -146,7 +146,12 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor qn = std::make_unique<TrueNode>(); } else { Normalizing normalize_mode = factory.normalizing_mode(ssIndex); - auto qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode); + std::unique_ptr<QueryTerm> qt; + if (sTerm != TermType::REGEXP) { + qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode); + } else { + qt = std::make_unique<RegexpTerm>(factory.create(), ssTerm, ssIndex, TermType::REGEXP, normalize_mode); + } qt->setWeight(queryRep.GetWeight()); qt->setUniqueId(queryRep.getUniqueId()); if (qt->isFuzzy()) { @@ -192,6 +197,9 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor case ParseItem::ITEM_WAND: qn = build_wand_term(factory, queryRep); break; + case ParseItem::ITEM_WEIGHTED_SET: + qn = build_weighted_set_term(factory, queryRep); + break; default: skip_unknown(queryRep); break; @@ -270,6 +278,16 @@ QueryNode::build_wand_term(const QueryNodeResultFactory& factory, SimpleQuerySta return wand; } +std::unique_ptr<QueryNode> +QueryNode::build_weighted_set_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep) +{ + auto ws = std::make_unique<WeightedSetTerm>(factory.create(), queryRep.getIndexName(), queryRep.getArity()); + ws->setWeight(queryRep.GetWeight()); + ws->setUniqueId(queryRep.getUniqueId()); + populate_multi_term(factory.normalizing_mode(ws->index()), *ws, queryRep); + return ws; +} + void QueryNode::skip_unknown(SimpleQueryStackDumpIterator& queryRep) { diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.h b/searchlib/src/vespa/searchlib/query/streaming/querynode.h index a0561b2e52e..454932c0a68 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.h +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.h @@ -32,6 +32,7 @@ class QueryNode static void populate_multi_term(Normalizing string_normalize_mode, MultiTerm& mt, SimpleQueryStackDumpIterator& queryRep); static std::unique_ptr<QueryNode> build_dot_product_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); static std::unique_ptr<QueryNode> build_wand_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); + static std::unique_ptr<QueryNode> build_weighted_set_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); static void skip_unknown(SimpleQueryStackDumpIterator& queryRep); public: using UP = std::unique_ptr<QueryNode>; diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.cpp index c58ec55de9f..d72a3371846 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.cpp @@ -3,4 +3,22 @@ namespace search::streaming { +namespace { + +const char* to_str(Normalizing norm) noexcept { + switch (norm) { + case Normalizing::NONE: return "NONE"; + case Normalizing::LOWERCASE: return "LOWERCASE"; + case Normalizing::LOWERCASE_AND_FOLD: return "LOWERCASE_AND_FOLD"; + } + abort(); +} + +} + +std::ostream& operator<<(std::ostream& os, Normalizing n) { + os << to_str(n); + return os; +} + } diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h index 74f872ad187..83fb27794a3 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h +++ b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h @@ -2,6 +2,7 @@ #pragma once #include <vespa/vespalib/stllike/string.h> +#include <iosfwd> #include <memory> namespace search::streaming { @@ -24,6 +25,8 @@ enum class Normalizing { LOWERCASE_AND_FOLD }; +std::ostream& operator<<(std::ostream&, Normalizing); + class QueryNodeResultFactory { public: virtual ~QueryNodeResultFactory() = default; diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index 3950a179d67..3e05d381ee2 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -179,4 +179,10 @@ QueryTerm::as_multi_term() noexcept return nullptr; } +RegexpTerm* +QueryTerm::as_regexp_term() noexcept +{ + return nullptr; +} + } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index 743998a630e..cd2bdd7eaec 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -13,6 +13,7 @@ namespace search::streaming { class NearestNeighborQueryNode; class MultiTerm; +class RegexpTerm; /** This is a leaf in the Query tree. All terms are leafs. @@ -93,6 +94,7 @@ public: void setFuzzyPrefixLength(uint32_t fuzzyPrefixLength) { _fuzzyPrefixLength = fuzzyPrefixLength; } virtual NearestNeighborQueryNode* as_nearest_neighbor_query_node() noexcept; virtual MultiTerm* as_multi_term() noexcept; + virtual RegexpTerm* as_regexp_term() noexcept; protected: using QueryNodeResultBaseContainer = std::unique_ptr<QueryNodeResultBase>; string _index; diff --git a/searchlib/src/vespa/searchlib/query/streaming/regexp_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/regexp_term.cpp new file mode 100644 index 00000000000..4508caa7072 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/regexp_term.cpp @@ -0,0 +1,27 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "regexp_term.h" + +namespace search::streaming { + +using vespalib::Regex; + +namespace { + +constexpr Regex::Options normalize_mode_to_regex_opts(Normalizing norm) noexcept { + return ((norm == Normalizing::NONE) + ? Regex::Options::None + : Regex::Options::IgnoreCase); +} + +} + +RegexpTerm::RegexpTerm(std::unique_ptr<QueryNodeResultBase> result_base, stringref term, + const string& index, Type type, Normalizing normalizing) + : QueryTerm(std::move(result_base), term, index, type, normalizing), + _regexp(Regex::from_pattern({term.data(), term.size()}, normalize_mode_to_regex_opts(normalizing))) +{ +} + +RegexpTerm::~RegexpTerm() = default; + +} diff --git a/searchlib/src/vespa/searchlib/query/streaming/regexp_term.h b/searchlib/src/vespa/searchlib/query/streaming/regexp_term.h new file mode 100644 index 00000000000..96d14eeb0bd --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/regexp_term.h @@ -0,0 +1,25 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "queryterm.h" +#include <vespa/vespalib/regex/regex.h> + +namespace search::streaming { + +/** + * Query term that matches fields using a regular expression, with case sensitivity + * controlled by the provided Normalizing mode. + */ +class RegexpTerm : public QueryTerm { + vespalib::Regex _regexp; +public: + RegexpTerm(std::unique_ptr<QueryNodeResultBase> result_base, stringref term, + const string& index, Type type, Normalizing normalizing); + ~RegexpTerm() override; + + RegexpTerm* as_regexp_term() noexcept override { return this; } + + [[nodiscard]] const vespalib::Regex& regexp() const noexcept { return _regexp; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp new file mode 100644 index 00000000000..90d0be5d43c --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp @@ -0,0 +1,53 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "weighted_set_term.h" +#include <vespa/searchlib/fef/itermdata.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/vespalib/stllike/hash_map.hpp> + +using search::fef::ITermData; +using search::fef::MatchData; + +namespace search::streaming { + +WeightedSetTerm::WeightedSetTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, uint32_t num_terms) + : MultiTerm(std::move(result_base), index, num_terms) +{ +} + +WeightedSetTerm::~WeightedSetTerm() = default; + +void +WeightedSetTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data) +{ + vespalib::hash_map<uint32_t,std::vector<double>> scores; + HitList hl_store; + for (const auto& term : _terms) { + auto& hl = term->evaluateHits(hl_store); + for (auto& hit : hl) { + scores[hit.context()].emplace_back(term->weight().percent()); + } + } + auto num_fields = td.numFields(); + for (uint32_t field_idx = 0; field_idx < num_fields; ++field_idx) { + auto& tfd = td.field(field_idx); + auto field_id = tfd.getFieldId(); + if (scores.contains(field_id)) { + auto handle = tfd.getHandle(); + if (handle != fef::IllegalHandle) { + auto &field_scores = scores[field_id]; + std::sort(field_scores.begin(), field_scores.end(), std::greater()); + auto tmd = match_data.resolveTermField(tfd.getHandle()); + tmd->setFieldId(field_id); + tmd->reset(docid); + for (auto& field_score : field_scores) { + fef::TermFieldMatchDataPosition pos; + pos.setElementWeight(field_score); + tmd->appendPosition(pos); + } + } + } + } +} + +} diff --git a/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.h b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.h new file mode 100644 index 00000000000..4473e0fa45b --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.h @@ -0,0 +1,20 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_term.h" + +namespace search::streaming { + +/* + * A weighted set query term for streaming search. + */ +class WeightedSetTerm : public MultiTerm { + double _score_threshold; +public: + WeightedSetTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, uint32_t num_terms); + ~WeightedSetTerm() override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index a78dd092f5a..d998c2e343e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -144,22 +144,6 @@ public: return (total_docs == 0) ? 0.0 : double(est) / double(total_docs); } - static double cost_of(const Children &children, auto flow) { - double cost = 0.0; - for (const auto &child: children) { - cost += flow.flow() * child->cost(); - flow.add(child->estimate()); - } - return cost; - } - - static double estimate_of(const Children &children, auto flow) { - for (const auto &child: children) { - flow.add(child->estimate()); - } - return flow.estimate(); - } - // utility that just takes maximum estimate static HitEstimate max(const std::vector<HitEstimate> &data); @@ -172,20 +156,6 @@ public: // lower limit for docid_limit: max child estimate static HitEstimate sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit); - // sort children to minimize total cost of OR flow - struct MinimalOrCost { - bool operator () (const auto &a, const auto &b) const noexcept { - return a->estimate() / a->cost() > b->estimate() / b->cost(); - } - }; - - // sort children to minimize total cost of AND flow - struct MinimalAndCost { - bool operator () (const auto &a, const auto &b) const noexcept { - return (1.0 - a->estimate()) / a->cost() > (1.0 - b->estimate()) / b->cost(); - } - }; - // utility to get the greater estimate to sort first, higher tiers last struct TieredGreaterEstimate { bool operator () (const auto &a, const auto &b) const noexcept { diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index 36c0a259feb..86ce6f8b93b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -2,60 +2,261 @@ #pragma once #include <cstddef> - -namespace search::queryeval { +#include <algorithm> +#include <vespa/vespalib/util/small_vector.h> // Model how boolean result decisions flow through intermediate nodes // of different types based on relative estimates for sub-expressions -class AndFlow { +namespace search::queryeval { + +namespace flow { + +// the default adapter expects the shape of std::unique_ptr<Blueprint> +// with respect to estimate, cost and (coming soon) strict_cost. +struct DefaultAdapter { + double estimate(const auto &child) const noexcept { return child->estimate(); } + double cost(const auto &child) const noexcept { return child->cost(); } + // Estimate the per-document cost of strict evaluation of this + // child. This will typically be something like (estimate() * + // cost()) for leafs with posting lists. OR will aggregate strict + // cost by calculating the minimal OR flow of strict child + // costs. AND will aggregate strict cost by calculating the + // minimal AND flow where the cost of the first child is + // substituted by its strict cost. This value is currently not + // available in Blueprints. + double strict_cost(const auto &child) const noexcept { return child->cost(); } +}; + +template <typename ADAPTER, typename T> +struct IndirectAdapter { + const T &data; + [[no_unique_address]] ADAPTER adapter; + IndirectAdapter(ADAPTER adapter_in, const T &data_in) noexcept + : data(data_in), adapter(adapter_in) {} + double estimate(size_t child) const noexcept { return adapter.estimate(data[child]); } + double cost(size_t child) const noexcept { return adapter.cost(data[child]); } + double strict_cost(size_t child) const noexcept { return adapter.strict_cost(data[child]); } +}; + +auto make_index(const auto &children) { + vespalib::SmallVector<uint32_t> index(children.size()); + for (size_t i = 0; i < index.size(); ++i) { + index[i] = i; + } + return index; +} + +template <typename ADAPTER> +struct MinAndCost { + // sort children to minimize total cost of AND flow + [[no_unique_address]] ADAPTER adapter; + MinAndCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {} + bool operator () (const auto &a, const auto &b) const noexcept { + return (1.0 - adapter.estimate(a)) * adapter.cost(b) > (1.0 - adapter.estimate(b)) * adapter.cost(a); + } +}; + +template <typename ADAPTER> +struct MinOrCost { + // sort children to minimize total cost of OR flow + [[no_unique_address]] ADAPTER adapter; + MinOrCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {} + bool operator () (const auto &a, const auto &b) const noexcept { + return adapter.estimate(a) * adapter.cost(b) > adapter.estimate(b) * adapter.cost(a); + } +}; + +template <typename ADAPTER> +struct MinOrStrictCost { + // sort children to minimize total cost of strict OR flow + [[no_unique_address]] ADAPTER adapter; + MinOrStrictCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {} + bool operator () (const auto &a, const auto &b) const noexcept { + return adapter.estimate(a) * adapter.strict_cost(b) > adapter.estimate(b) * adapter.strict_cost(a); + } +}; + +template <typename ADAPTER, typename T, typename F> +double estimate_of(ADAPTER adapter, const T &children, F flow) { + for (const auto &child: children) { + flow.add(adapter.estimate(child)); + } + return flow.estimate(); +} + +template <template <typename> typename ORDER, typename ADAPTER, typename T> +void sort(ADAPTER adapter, T &children) { + std::sort(children.begin(), children.end(), ORDER(adapter)); +} + +template <template <typename> typename ORDER, typename ADAPTER, typename T> +void sort_partial(ADAPTER adapter, T &children, size_t offset) { + if (children.size() > offset) { + std::sort(children.begin() + offset, children.end(), ORDER(adapter)); + } +} + +template <typename ADAPTER, typename T, typename F> +double ordered_cost_of(ADAPTER adapter, const T &children, F flow) { + double cost = 0.0; + for (const auto &child: children) { + double child_cost = flow.strict() ? adapter.strict_cost(child) : adapter.cost(child); + cost += flow.flow() * child_cost; + flow.add(adapter.estimate(child)); + } + return cost; +} + +template <typename ADAPTER, typename T> +size_t select_strict_and_child(ADAPTER adapter, const T &children) { + size_t idx = 0; + double cost = 0.0; + size_t best_idx = 0; + double best_diff = 0.0; + double est = 1.0; + for (const auto &child: children) { + double child_cost = est * adapter.cost(child); + double child_strict_cost = adapter.strict_cost(child); + double child_est = adapter.estimate(child); + if (idx == 0) { + best_diff = child_strict_cost - child_cost; + } else { + double my_diff = (child_strict_cost + child_est * cost) - (cost + child_cost); + if (my_diff < best_diff) { + best_diff = my_diff; + best_idx = idx; + } + } + cost += child_cost; + est *= child_est; + ++idx; + } + return best_idx; +} + +} // flow + +template <typename FLOW> +struct FlowMixin { + static double estimate_of(auto adapter, const auto &children) { + return flow::estimate_of(adapter, children, FLOW(1.0, false)); + } + static double estimate_of(const auto &children) { + return estimate_of(flow::DefaultAdapter(), children); + } + static double cost_of(auto adapter, const auto &children, bool strict) { + auto my_adapter = flow::IndirectAdapter(adapter, children); + auto order = flow::make_index(children); + FLOW::sort(my_adapter, order, strict); + return flow::ordered_cost_of(my_adapter, order, FLOW(1.0, strict)); + } + static double cost_of(const auto &children, bool strict) { + return cost_of(flow::DefaultAdapter(), children, strict); + } + // TODO: remove + static double cost_of(const auto &children) { return cost_of(children, false); } +}; + +class AndFlow : public FlowMixin<AndFlow> { private: double _flow; - size_t _cnt; + bool _strict; + bool _first; public: - AndFlow(double in = 1.0) noexcept : _flow(in), _cnt(0) {} + AndFlow(double in, bool strict) noexcept + : _flow(in), _strict(strict), _first(true) {} void add(double est) noexcept { _flow *= est; - ++_cnt; + _first = false; } double flow() const noexcept { return _flow; } + bool strict() const noexcept { + return _strict && _first; + } double estimate() const noexcept { - return (_cnt > 0) ? _flow : 0.0; + return _first ? 0.0 : _flow; + } + static void sort(auto adapter, auto &children, bool strict) { + flow::sort<flow::MinAndCost>(adapter, children); + if (strict && children.size() > 1) { + size_t idx = flow::select_strict_and_child(adapter, children); + auto the_one = std::move(children[idx]); + for (; idx > 0; --idx) { + children[idx] = std::move(children[idx-1]); + } + children[0] = std::move(the_one); + } + } + // TODO: add strict + static void sort(auto &children) { + sort(flow::DefaultAdapter(), children, false); } }; -class OrFlow { +class OrFlow : public FlowMixin<OrFlow>{ private: double _flow; + bool _strict; + bool _first; public: - OrFlow(double in = 1.0) noexcept : _flow(in) {} + OrFlow(double in, bool strict) noexcept + : _flow(in), _strict(strict), _first(true) {} void add(double est) noexcept { _flow *= (1.0 - est); + _first = false; } double flow() const noexcept { return _flow; } + bool strict() const noexcept { + return _strict; + } double estimate() const noexcept { - return (1.0 - _flow); + return _first ? 0.0 : (1.0 - _flow); + } + static void sort(auto adapter, auto &children, bool strict) { + if (strict) { + flow::sort<flow::MinOrStrictCost>(adapter, children); + } else { + flow::sort<flow::MinOrCost>(adapter, children); + } + } + // TODO: add strict + static void sort(auto &children) { + sort(flow::DefaultAdapter(), children, false); } }; -class AndNotFlow { +class AndNotFlow : public FlowMixin<AndNotFlow> { private: double _flow; - size_t _cnt; + bool _strict; + bool _first; public: - AndNotFlow(double in = 1.0) noexcept : _flow(in), _cnt(0) {} + AndNotFlow(double in, bool strict) noexcept + : _flow(in), _strict(strict), _first(true) {} void add(double est) noexcept { - _flow *= (_cnt++ == 0) ? est : (1.0 - est); + _flow *= _first ? est : (1.0 - est); + _first = false; } double flow() const noexcept { return _flow; } + bool strict() const noexcept { + return _strict && _first; + } double estimate() const noexcept { - return (_cnt > 0) ? _flow : 0.0; + return _first ? 0.0 : _flow; + } + static void sort(auto adapter, auto &children, bool) { + flow::sort_partial<flow::MinOrCost>(adapter, children, 1); + } + // TODO: add strict + static void sort(auto &children) { + sort(flow::DefaultAdapter(), children, false); } }; diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index bebc1f433f7..e60fe3d3f85 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -89,13 +89,13 @@ need_normal_features_for_children(const IntermediateBlueprint &blueprint, fef::M double AndNotBlueprint::calculate_cost() const { - return cost_of(get_children(), AndNotFlow()); + return AndNotFlow::cost_of(get_children()); } double AndNotBlueprint::calculate_relative_estimate() const { - return estimate_of(get_children(), AndNotFlow()); + return AndNotFlow::estimate_of(get_children()); } Blueprint::HitEstimate @@ -168,10 +168,10 @@ AndNotBlueprint::get_replacement() void AndNotBlueprint::sort(Children &children, bool sort_by_cost) const { - if (children.size() > 2) { - if (sort_by_cost) { - std::sort(children.begin() + 1, children.end(), MinimalOrCost()); - } else { + if (sort_by_cost) { + AndNotFlow::sort(children); + } else { + if (children.size() > 2) { std::sort(children.begin() + 1, children.end(), TieredGreaterEstimate()); } } @@ -214,12 +214,12 @@ AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) co double AndBlueprint::calculate_cost() const { - return cost_of(get_children(), AndFlow()); + return AndFlow::cost_of(get_children()); } double AndBlueprint::calculate_relative_estimate() const { - return estimate_of(get_children(), AndFlow()); + return AndFlow::estimate_of(get_children()); } Blueprint::HitEstimate @@ -265,7 +265,7 @@ void AndBlueprint::sort(Children &children, bool sort_by_cost) const { if (sort_by_cost) { - std::sort(children.begin(), children.end(), MinimalAndCost()); + AndFlow::sort(children); } else { std::sort(children.begin(), children.end(), TieredLessEstimate()); } @@ -323,12 +323,12 @@ OrBlueprint::~OrBlueprint() = default; double OrBlueprint::calculate_cost() const { - return cost_of(get_children(), OrFlow()); + return OrFlow::cost_of(get_children()); } double OrBlueprint::calculate_relative_estimate() const { - return estimate_of(get_children(), OrFlow()); + return OrFlow::estimate_of(get_children()); } Blueprint::HitEstimate @@ -376,7 +376,7 @@ void OrBlueprint::sort(Children &children, bool sort_by_cost) const { if (sort_by_cost) { - std::sort(children.begin(), children.end(), MinimalOrCost()); + OrFlow::sort(children); } else { std::sort(children.begin(), children.end(), TieredGreaterEstimate()); } @@ -428,12 +428,12 @@ WeakAndBlueprint::~WeakAndBlueprint() = default; double WeakAndBlueprint::calculate_cost() const { - return cost_of(get_children(), OrFlow()); + return OrFlow::cost_of(get_children()); } double WeakAndBlueprint::calculate_relative_estimate() const { - double child_est = estimate_of(get_children(), OrFlow()); + double child_est = OrFlow::estimate_of(get_children()); double my_est = abs_to_rel_est(_n, get_docid_limit()); return std::min(my_est, child_est); } @@ -499,12 +499,12 @@ WeakAndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) c double NearBlueprint::calculate_cost() const { - return cost_of(get_children(), AndFlow()) + childCnt() * 1.0; + return AndFlow::cost_of(get_children()) + childCnt() * 1.0; } double NearBlueprint::calculate_relative_estimate() const { - return estimate_of(get_children(), AndFlow()); + return AndFlow::estimate_of(get_children()); } Blueprint::HitEstimate @@ -523,7 +523,7 @@ void NearBlueprint::sort(Children &children, bool sort_by_cost) const { if (sort_by_cost) { - std::sort(children.begin(), children.end(), MinimalAndCost()); + AndFlow::sort(children); } else { std::sort(children.begin(), children.end(), TieredLessEstimate()); } @@ -566,12 +566,12 @@ NearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons double ONearBlueprint::calculate_cost() const { - return cost_of(get_children(), AndFlow()) + (childCnt() * 1.0); + return AndFlow::cost_of(get_children()) + (childCnt() * 1.0); } double ONearBlueprint::calculate_relative_estimate() const { - return estimate_of(get_children(), AndFlow()); + return AndFlow::estimate_of(get_children()); } Blueprint::HitEstimate @@ -741,7 +741,7 @@ SourceBlenderBlueprint::calculate_cost() const { double SourceBlenderBlueprint::calculate_relative_estimate() const { - return estimate_of(get_children(), OrFlow()); + return OrFlow::estimate_of(get_children()); } Blueprint::HitEstimate diff --git a/streamingvisitors/src/tests/searcher/searcher_test.cpp b/streamingvisitors/src/tests/searcher/searcher_test.cpp index 7f89071868a..24877866c1b 100644 --- a/streamingvisitors/src/tests/searcher/searcher_test.cpp +++ b/streamingvisitors/src/tests/searcher/searcher_test.cpp @@ -3,6 +3,7 @@ #include <vespa/vespalib/testkit/testapp.h> #include <vespa/document/fieldvalue/fieldvalues.h> +#include <vespa/searchlib/query/streaming/regexp_term.h> #include <vespa/searchlib/query/streaming/queryterm.h> #include <vespa/vsm/searcher/boolfieldsearcher.h> #include <vespa/vsm/searcher/fieldsearcher.h> @@ -21,6 +22,7 @@ using namespace document; using search::streaming::HitList; using search::streaming::QueryNodeResultFactory; +using search::streaming::RegexpTerm; using search::streaming::QueryTerm; using search::streaming::Normalizing; using Searchmethod = VsmfieldsConfig::Fieldspec::Searchmethod; @@ -63,7 +65,12 @@ private: for (const auto & term : terms) { ParsedQueryTerm pqt = parseQueryTerm(term); ParsedTerm pt = parseTerm(pqt.second); - qtv.push_back(std::make_unique<QueryTerm>(eqnr.create(), pt.first, pqt.first.empty() ? "index" : pqt.first, pt.second, normalizing)); + std::string effective_index = pqt.first.empty() ? "index" : pqt.first; + if (pt.second != TermType::REGEXP) { + qtv.push_back(std::make_unique<QueryTerm>(eqnr.create(), pt.first, effective_index, pt.second, normalizing)); + } else { + qtv.push_back(std::make_unique<RegexpTerm>(eqnr.create(), pt.first, effective_index, pt.second, normalizing)); + } } for (const auto & i : qtv) { qtl.push_back(i.get()); @@ -91,6 +98,8 @@ public: return std::make_pair(term.substr(1, term.size() - 2), TermType::SUBSTRINGTERM); } else if (term[0] == '*') { return std::make_pair(term.substr(1, term.size() - 1), TermType::SUFFIXTERM); + } else if (term[0] == '#') { // magic regex enabler + return std::make_pair(term.substr(1), TermType::REGEXP); } else if (term[term.size() - 1] == '*') { return std::make_pair(term.substr(0, term.size() - 1), TermType::PREFIXTERM); } else { @@ -479,6 +488,8 @@ testStrChrFieldSearcher(StrChrFieldSearcher & fs) ASSERT_TRUE(Query::parseTerm("*suffix").second == TermType::SUFFIXTERM); ASSERT_TRUE(Query::parseTerm("prefix*").first == "prefix"); ASSERT_TRUE(Query::parseTerm("prefix*").second == TermType::PREFIXTERM); + ASSERT_TRUE(Query::parseTerm("#regex").first == "regex"); + ASSERT_TRUE(Query::parseTerm("#regex").second == TermType::REGEXP); ASSERT_TRUE(Query::parseTerm("term").first == "term"); ASSERT_TRUE(Query::parseTerm("term").second == TermType::WORD); } @@ -582,7 +593,7 @@ TEST("utf8 exact match") { TEST_DO(assertString(fs, "hütte", "hütter", Hits())); } -TEST("utf8 flexible searcher"){ +TEST("utf8 flexible searcher (except regex)"){ UTF8FlexibleStringFieldSearcher fs(0); // regular assertString(fs, "vespa", "vespa", Hits().add(0)); @@ -611,6 +622,38 @@ TEST("utf8 flexible searcher"){ EXPECT_TRUE(testStringFieldInfo(fs)); } +TEST("utf8 flexible searcher handles regex and by default has case-insensitive partial match semantics") { + UTF8FlexibleStringFieldSearcher fs(0); + // Note: the # term prefix is a magic term-as-regex symbol used only for tests in this file + TEST_DO(assertString(fs, "#abc", "ABC", Hits().add(0))); + TEST_DO(assertString(fs, "#bc", "ABC", Hits().add(0))); + TEST_DO(assertString(fs, "#ab", "ABC", Hits().add(0))); + TEST_DO(assertString(fs, "#[a-z]", "ABC", Hits().add(0))); + TEST_DO(assertString(fs, "#(zoid)(berg)", "why not zoidberg?", Hits().add(0))); + TEST_DO(assertString(fs, "#[a-z]", "123", Hits())); +} + +TEST("utf8 flexible searcher handles case-sensitive regex matching") { + UTF8FlexibleStringFieldSearcher fs(0); + fs.normalize_mode(Normalizing::NONE); + TEST_DO(assertString(fs, "#abc", "ABC", Hits())); + TEST_DO(assertString(fs, "#abc", "abc", Hits().add(0))); + TEST_DO(assertString(fs, "#[A-Z]", "A", Hits().add(0))); + TEST_DO(assertString(fs, "#[A-Z]", "ABC", Hits().add(0))); + TEST_DO(assertString(fs, "#[A-Z]", "abc", Hits())); +} + +TEST("utf8 flexible searcher handles regexes with explicit anchoring") { + UTF8FlexibleStringFieldSearcher fs(0); + TEST_DO(assertString(fs, "#^foo", "food", Hits().add(0))); + TEST_DO(assertString(fs, "#^foo", "afoo", Hits())); + TEST_DO(assertString(fs, "#foo$", "afoo", Hits().add(0))); + TEST_DO(assertString(fs, "#foo$", "food", Hits())); + TEST_DO(assertString(fs, "#^foo$", "foo", Hits().add(0))); + TEST_DO(assertString(fs, "#^foo$", "food", Hits())); + TEST_DO(assertString(fs, "#^foo$", "oo", Hits())); +} + TEST("bool search") { BoolFieldSearcher fs(0); TEST_DO(assertBool(fs, "true", true, true)); @@ -635,6 +678,8 @@ TEST("integer search") TEST_DO(assertInt(fs, "<11", 10, true)); TEST_DO(assertInt(fs, "<11", 11, false)); TEST_DO(assertInt(fs, "-10", -10, true)); + TEST_DO(assertInt(fs, "10", -10, false)); + TEST_DO(assertInt(fs, "-10", 10, false)); TEST_DO(assertInt(fs, "-9", -10, false)); TEST_DO(assertInt(fs, "a", 10, false)); TEST_DO(assertInt(fs, "[-5;5]", -5, true)); @@ -787,6 +832,18 @@ TEST("FieldSearchSpec construction") { } } +TEST("FieldSearchSpec reconfiguration preserves match/normalization properties for new searcher") { + FieldSearchSpec f(7, "f0", Searchmethod::AUTOUTF8, Normalizing::NONE, "substring", 789); + QueryNodeResultFactory qnrf; + QueryTerm qt(qnrf.create(), "foo", "index", TermType::EXACTSTRINGTERM, Normalizing::LOWERCASE_AND_FOLD); + // Match type, normalization mode and max length are all properties of the original spec + // and should be propagated to the new searcher. + f.reconfig(qt); + EXPECT_EQUAL(f.searcher().match_type(), FieldSearcher::MatchType::SUBSTRING); + EXPECT_EQUAL(f.searcher().normalize_mode(), Normalizing::NONE); + EXPECT_EQUAL(f.searcher().maxFieldLength(), 789u); +} + TEST("snippet modifier manager") { FieldSearchSpecMapT specMap; specMap[0] = FieldSearchSpec(0, "f0", Searchmethod::AUTOUTF8, Normalizing::LOWERCASE, "substring", 1000); diff --git a/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp index 6474a449272..095141c0359 100644 --- a/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp @@ -3,6 +3,7 @@ #include "matching_elements_filler.h" #include <vespa/searchlib/common/matching_elements.h> #include <vespa/searchlib/common/matching_elements_fields.h> +#include <vespa/searchlib/query/streaming/weighted_set_term.h> #include <vespa/vsm/searcher/fieldsearcher.h> #include <vespa/vdslib/container/searchresult.h> #include "hitcollector.h" @@ -17,6 +18,7 @@ using search::streaming::QueryConnector; using search::streaming::QueryNode; using search::streaming::QueryTerm; using search::streaming::SameElementQueryNode; +using search::streaming::WeightedSetTerm; using vdslib::SearchResult; using vsm::FieldIdTSearcherMap; using vsm::StorageDocument; @@ -79,6 +81,13 @@ Matcher::select_query_nodes(const MatchingElementsFields& fields, const QueryNod if (fields.has_field(same_element->getIndex())) { _same_element_nodes.emplace_back(same_element); } + } else if (auto weighted_set_term = as<WeightedSetTerm>(query_node)) { + if (fields.has_field(weighted_set_term->getIndex())) { + auto &terms = weighted_set_term->get_terms(); + for (auto& term : terms) { + _sub_field_terms.emplace_back(weighted_set_term->getIndex(), term.get()); + } + } } else if (auto query_term = as<QueryTerm>(query_node)) { if (fields.has_struct_field(query_term->getIndex())) { _sub_field_terms.emplace_back(fields.get_enclosing_field(query_term->getIndex()), query_term); diff --git a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp index c1fa6090021..c0a0249125f 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp @@ -17,7 +17,7 @@ void StrChrFieldSearcher::prepare(search::streaming::QueryTermList& qtl, void StrChrFieldSearcher::onValue(const document::FieldValue & fv) { - const document::LiteralFieldValueB & sfv = static_cast<const document::LiteralFieldValueB &>(fv); + const auto & sfv = static_cast<const document::LiteralFieldValueB &>(fv); vespalib::stringref val = sfv.getValueRef(); FieldRef fr(val.data(), std::min(maxFieldLength(), val.size())); matchDoc(fr); @@ -25,7 +25,6 @@ void StrChrFieldSearcher::onValue(const document::FieldValue & fv) bool StrChrFieldSearcher::matchDoc(const FieldRef & fieldRef) { - bool retval(true); if (_qtl.size() > 1) { size_t mintsz = shortestTerm(); if (fieldRef.size() >= mintsz) { @@ -35,14 +34,14 @@ bool StrChrFieldSearcher::matchDoc(const FieldRef & fieldRef) } } else { for (auto qt : _qtl) { - if (fieldRef.size() >= qt->termLen()) { + if (fieldRef.size() >= qt->termLen() || qt->isRegex()) { _words += matchTerm(fieldRef, *qt); } else { _words += countWords(fieldRef); } } } - return retval; + return true; } size_t StrChrFieldSearcher::shortestTerm() const @@ -50,6 +49,9 @@ size_t StrChrFieldSearcher::shortestTerm() const size_t mintsz(_qtl.front()->termLen()); for (auto it=_qtl.begin()+1, mt=_qtl.end(); it != mt; it++) { const QueryTerm & qt = **it; + if (qt.isRegex()) { + return 0; // Must avoid "too short query term" optimization when using regex + } mintsz = std::min(mintsz, qt.termLen()); } return mintsz; diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp index 78f491198ad..c6deb6eacd1 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp @@ -1,5 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "utf8flexiblestringfieldsearcher.h" +#include <vespa/searchlib/query/streaming/regexp_term.h> +#include <cassert> #include <vespa/log/log.h> LOG_SETUP(".vsm.searcher.utf8flexiblestringfieldsearcher"); @@ -27,6 +29,17 @@ UTF8FlexibleStringFieldSearcher::matchTerms(const FieldRef & f, const size_t min } size_t +UTF8FlexibleStringFieldSearcher::match_regexp(const FieldRef & f, search::streaming::QueryTerm & qt) +{ + auto* regexp_term = qt.as_regexp_term(); + assert(regexp_term != nullptr); + if (regexp_term->regexp().partial_match({f.data(), f.size()})) { + addHit(qt, 0); + } + return countWords(f); +} + +size_t UTF8FlexibleStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) { if (qt.isPrefix()) { @@ -41,6 +54,9 @@ UTF8FlexibleStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) } else if (qt.isExactstring()) { LOG(debug, "Use exact match for exact term '%s:%s'", qt.index().c_str(), qt.getTerm()); return matchTermExact(f, qt); + } else if (qt.isRegex()) { + LOG(debug, "Use regexp match for term '%s:%s'", qt.index().c_str(), qt.getTerm()); + return match_regexp(f, qt); } else { if (substring()) { LOG(debug, "Use substring match for term '%s:%s'", qt.index().c_str(), qt.getTerm()); diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h index bb1b55dffe4..cd1715ad158 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h @@ -14,16 +14,18 @@ class UTF8FlexibleStringFieldSearcher : public UTF8StringFieldSearcherBase private: /** * Tries to match the given query term against the content of the given field reference. - * Search strategy is choosen based on the query term type. + * Search strategy is chosen based on the query term type. **/ size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; /** * Tries to match each query term in the underlying query against the content of the given field reference. - * Search strategy is choosen based on the query term type. + * Search strategy is chosen based on the query term type. **/ size_t matchTerms(const FieldRef & f, size_t shortestTerm) override; + size_t match_regexp(const FieldRef & f, search::streaming::QueryTerm & qt); + public: std::unique_ptr<FieldSearcher> duplicate() const override; explicit UTF8FlexibleStringFieldSearcher(FieldIdT fId); diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp index 715c19a0bb7..9c8bb2f185a 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp @@ -114,9 +114,7 @@ FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string & break; } if (_searcher) { - setMatchType(_searcher, _arg1); - _searcher->maxFieldLength(maxLength()); - _searcher->normalize_mode(_normalize_mode); + propagate_settings_to_searcher(); } } @@ -134,11 +132,11 @@ FieldSearchSpec::reconfig(const QueryTerm & term) if ((term.isSubstring() && _arg1 != "substring") || (term.isSuffix() && _arg1 != "suffix") || (term.isExactstring() && _arg1 != "exact") || - (term.isPrefix() && _arg1 == "suffix")) + (term.isPrefix() && _arg1 == "suffix") || + term.isRegex()) { _searcher = std::make_unique<UTF8FlexibleStringFieldSearcher>(id()); - // preserve the basic match property of the searcher - setMatchType(_searcher, _arg1); + propagate_settings_to_searcher(); LOG(debug, "Reconfigured to use UTF8FlexibleStringFieldSearcher (%s) for field '%s' with id '%d'", _searcher->prefix() ? "prefix" : "regular", name().c_str(), id()); _reconfigured = true; @@ -149,6 +147,15 @@ FieldSearchSpec::reconfig(const QueryTerm & term) } } +void +FieldSearchSpec::propagate_settings_to_searcher() +{ + // preserve the basic match property and normalization mode of the searcher + setMatchType(_searcher, _arg1); + _searcher->maxFieldLength(maxLength()); + _searcher->normalize_mode(_normalize_mode); +} + vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpec & f) { diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h index 7ba9799991e..c862753a41c 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h @@ -42,6 +42,8 @@ public: friend vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpec & f); private: + void propagate_settings_to_searcher(); + FieldIdT _id; vespalib::string _name; size_t _maxLength; diff --git a/tenant-cd-api/src/main/java/org/junit/jupiter/api/condition/package-info.java b/tenant-cd-api/src/main/java/org/junit/jupiter/api/condition/package-info.java index ed1a075f9d2..7920928017b 100644 --- a/tenant-cd-api/src/main/java/org/junit/jupiter/api/condition/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/jupiter/api/condition/package-info.java @@ -2,7 +2,7 @@ /** * @author bjorncs */ -@ExportPackage(version = @Version(major = 5, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 5, minor = 10, micro = 1)) package org.junit.jupiter.api.condition; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/jupiter/api/extension/package-info.java b/tenant-cd-api/src/main/java/org/junit/jupiter/api/extension/package-info.java index a2a9eab70e0..e3cff292714 100644 --- a/tenant-cd-api/src/main/java/org/junit/jupiter/api/extension/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/jupiter/api/extension/package-info.java @@ -2,7 +2,7 @@ /** * @author bjorncs */ -@ExportPackage(version = @Version(major = 5, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 5, minor = 10, micro = 1)) package org.junit.jupiter.api.extension; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/jupiter/api/extension/support/package-info.java b/tenant-cd-api/src/main/java/org/junit/jupiter/api/extension/support/package-info.java index f3271fd9c22..57724e66600 100644 --- a/tenant-cd-api/src/main/java/org/junit/jupiter/api/extension/support/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/jupiter/api/extension/support/package-info.java @@ -2,7 +2,7 @@ /** * @author jonmv */ -@ExportPackage(version = @Version(major = 5, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 5, minor = 10, micro = 1)) package org.junit.jupiter.api.extension.support; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/jupiter/api/function/package-info.java b/tenant-cd-api/src/main/java/org/junit/jupiter/api/function/package-info.java index 9e43f885f47..ec9b4a8a195 100644 --- a/tenant-cd-api/src/main/java/org/junit/jupiter/api/function/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/jupiter/api/function/package-info.java @@ -2,7 +2,7 @@ /** * @author bjorncs */ -@ExportPackage(version = @Version(major = 5, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 5, minor = 10, micro = 1)) package org.junit.jupiter.api.function; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/jupiter/api/io/package-info.java b/tenant-cd-api/src/main/java/org/junit/jupiter/api/io/package-info.java index 685044e2ba5..daeba215701 100644 --- a/tenant-cd-api/src/main/java/org/junit/jupiter/api/io/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/jupiter/api/io/package-info.java @@ -2,7 +2,7 @@ /** * @author bjorncs */ -@ExportPackage(version = @Version(major = 5, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 5, minor = 10, micro = 1)) package org.junit.jupiter.api.io; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/jupiter/api/package-info.java b/tenant-cd-api/src/main/java/org/junit/jupiter/api/package-info.java index 1bf0613132d..d9bf4f0316e 100644 --- a/tenant-cd-api/src/main/java/org/junit/jupiter/api/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/jupiter/api/package-info.java @@ -2,7 +2,7 @@ /** * @author bjorncs */ -@ExportPackage(version = @Version(major = 5, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 5, minor = 10, micro = 1)) package org.junit.jupiter.api; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/jupiter/api/parallel/package-info.java b/tenant-cd-api/src/main/java/org/junit/jupiter/api/parallel/package-info.java index 1f67cbf96a5..c31977ac22f 100644 --- a/tenant-cd-api/src/main/java/org/junit/jupiter/api/parallel/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/jupiter/api/parallel/package-info.java @@ -2,7 +2,7 @@ /** * @author bjorncs */ -@ExportPackage(version = @Version(major = 5, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 5, minor = 10, micro = 1)) package org.junit.jupiter.api.parallel; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/platform/commons/annotation/package-info.java b/tenant-cd-api/src/main/java/org/junit/platform/commons/annotation/package-info.java index bf69bc87903..87ae7c25bb1 100644 --- a/tenant-cd-api/src/main/java/org/junit/platform/commons/annotation/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/platform/commons/annotation/package-info.java @@ -2,7 +2,7 @@ /** * @author jonmv */ -@ExportPackage(version = @Version(major = 1, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 1, minor = 10, micro = 1)) package org.junit.platform.commons.annotation; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/platform/commons/function/package-info.java b/tenant-cd-api/src/main/java/org/junit/platform/commons/function/package-info.java index c1020a3a364..7ca49947df6 100644 --- a/tenant-cd-api/src/main/java/org/junit/platform/commons/function/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/platform/commons/function/package-info.java @@ -2,7 +2,7 @@ /** * @author jonmv */ -@ExportPackage(version = @Version(major = 1, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 1, minor = 10, micro = 1)) package org.junit.platform.commons.function; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/platform/commons/logging/package-info.java b/tenant-cd-api/src/main/java/org/junit/platform/commons/logging/package-info.java index f56cc14314e..0c8e124bb60 100644 --- a/tenant-cd-api/src/main/java/org/junit/platform/commons/logging/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/platform/commons/logging/package-info.java @@ -2,7 +2,7 @@ /** * @author jonmv */ -@ExportPackage(version = @Version(major = 1, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 1, minor = 10, micro = 1)) package org.junit.platform.commons.logging; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/platform/commons/package-info.java b/tenant-cd-api/src/main/java/org/junit/platform/commons/package-info.java index 60ba2abbc54..a2ec3831534 100644 --- a/tenant-cd-api/src/main/java/org/junit/platform/commons/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/platform/commons/package-info.java @@ -2,7 +2,7 @@ /** * @author jonmv */ -@ExportPackage(version = @Version(major = 1, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 1, minor = 10, micro = 1)) package org.junit.platform.commons; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/platform/commons/support/package-info.java b/tenant-cd-api/src/main/java/org/junit/platform/commons/support/package-info.java index 772a79e40f2..51b1df85173 100644 --- a/tenant-cd-api/src/main/java/org/junit/platform/commons/support/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/platform/commons/support/package-info.java @@ -2,7 +2,7 @@ /** * @author jonmv */ -@ExportPackage(version = @Version(major = 1, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 1, minor = 10, micro = 1)) package org.junit.platform.commons.support; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/junit/platform/commons/util/package-info.java b/tenant-cd-api/src/main/java/org/junit/platform/commons/util/package-info.java index 5c11fbbb5e4..c30a31377c4 100644 --- a/tenant-cd-api/src/main/java/org/junit/platform/commons/util/package-info.java +++ b/tenant-cd-api/src/main/java/org/junit/platform/commons/util/package-info.java @@ -2,7 +2,7 @@ /** * @author jonmv */ -@ExportPackage(version = @Version(major = 1, minor = 8, micro = 1)) +@ExportPackage(version = @Version(major = 1, minor = 10, micro = 1)) package org.junit.platform.commons.util; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/tenant-cd-api/src/main/java/org/opentest4j/package-info.java b/tenant-cd-api/src/main/java/org/opentest4j/package-info.java index b5a3619017d..f0327bcef6f 100644 --- a/tenant-cd-api/src/main/java/org/opentest4j/package-info.java +++ b/tenant-cd-api/src/main/java/org/opentest4j/package-info.java @@ -2,7 +2,7 @@ /** * @author jonmv */ -@ExportPackage(version = @Version(major = 1, minor = 2, micro = 0)) +@ExportPackage(version = @Version(major = 1, minor = 3, micro = 0)) package org.opentest4j; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/vespalib/src/vespa/vespalib/util/testclock.h b/vespalib/src/vespa/vespalib/util/testclock.h index 117e19bedb8..a7c15c774df 100644 --- a/vespalib/src/vespa/vespalib/util/testclock.h +++ b/vespalib/src/vespa/vespalib/util/testclock.h @@ -3,6 +3,7 @@ #include "time.h" #include <atomic> +#include <memory> namespace vespalib { |