diff options
34 files changed, 288 insertions, 189 deletions
diff --git a/client/js/app/yarn.lock b/client/js/app/yarn.lock index 248e1e2ac4a..1f6ebac1617 100644 --- a/client/js/app/yarn.lock +++ b/client/js/app/yarn.lock @@ -4656,9 +4656,9 @@ prettier-linter-helpers@^1.0.0: fast-diff "^1.1.2" prettier@3: - version "3.0.3" - resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.0.3.tgz#432a51f7ba422d1469096c0fdc28e235db8f9643" - integrity sha512-L/4pUDMxcNa8R/EthV08Zt42WBO4h1rarVtK0K+QJG0X187OLo7l699jWw0GKuwzkPQ//jMFA/8Xm6Fh3J/DAg== + version "3.1.0" + resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.1.0.tgz#c6d16474a5f764ea1a4a373c593b779697744d5e" + integrity sha512-TQLvXjq5IAibjh8EpBIkNKxO749UEWABoiIZehEPiY4GNpVdhaFKqSTu+QrlU6D2dPAfubRmtJTi4K4YkQ5eXw== pretty-format@^29.7.0: version "29.7.0" diff --git a/config-model-api/abi-spec.json b/config-model-api/abi-spec.json index fab30efd00d..919bce460f2 100644 --- a/config-model-api/abi-spec.json +++ b/config-model-api/abi-spec.json @@ -1287,7 +1287,8 @@ "public boolean dynamicHeapSize()", "public java.lang.String unknownConfigDefinition()", "public int searchHandlerThreadpool()", - "public long mergingMaxMemoryUsagePerNode()" + "public long mergingMaxMemoryUsagePerNode()", + "public boolean usePerDocumentThrottledDeleteBucket()" ], "fields" : [ ] }, diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index 833e2f020bc..188c4a32978 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -118,6 +118,7 @@ public interface ModelContext { @ModelFeatureFlag(owners = {"hmusum"}) default String unknownConfigDefinition() { return "warn"; } @ModelFeatureFlag(owners = {"hmusum"}) default int searchHandlerThreadpool() { return 2; } @ModelFeatureFlag(owners = {"vekterli"}) default long mergingMaxMemoryUsagePerNode() { return -1; } + @ModelFeatureFlag(owners = {"vekterli"}) default boolean usePerDocumentThrottledDeleteBucket() { return false; } } /** Warning: As elsewhere in this package, do not make backwards incompatible changes that will break old config models! */ diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 1bda8a509f1..f2444e1d222 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -88,6 +88,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea private int contentLayerMetadataFeatureLevel = 0; private boolean dynamicHeapSize = false; private long mergingMaxMemoryUsagePerNode = -1; + private boolean usePerDocumentThrottledDeleteBucket = false; @Override public ModelContext.FeatureFlags featureFlags() { return this; } @Override public boolean multitenant() { return multitenant; } @@ -148,6 +149,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea @Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; } @Override public boolean dynamicHeapSize() { return dynamicHeapSize; } @Override public long mergingMaxMemoryUsagePerNode() { return mergingMaxMemoryUsagePerNode; } + @Override public boolean usePerDocumentThrottledDeleteBucket() { return usePerDocumentThrottledDeleteBucket; } public TestProperties sharedStringRepoNoReclaim(boolean sharedStringRepoNoReclaim) { this.sharedStringRepoNoReclaim = sharedStringRepoNoReclaim; @@ -390,6 +392,11 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea return this; } + public TestProperties setUsePerDocumentThrottledDeleteBucket(boolean enableThrottling) { + this.usePerDocumentThrottledDeleteBucket = enableThrottling; + return this; + } + public static class Spec implements ConfigServerSpec { private final String hostName; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java b/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java index c8f5be71f3c..18b9129cead 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java @@ -47,6 +47,7 @@ public class FileStorProducer implements StorFilestorConfig.Producer { private final int responseNumThreads; private final StorFilestorConfig.Response_sequencer_type.Enum responseSequencerType; private final boolean useAsyncMessageHandlingOnSchedule; + private final boolean usePerDocumentThrottledDeleteBucket; private static StorFilestorConfig.Response_sequencer_type.Enum convertResponseSequencerType(String sequencerType) { try { @@ -62,6 +63,7 @@ public class FileStorProducer implements StorFilestorConfig.Producer { this.responseNumThreads = featureFlags.defaultNumResponseThreads(); this.responseSequencerType = convertResponseSequencerType(featureFlags.responseSequencerType()); this.useAsyncMessageHandlingOnSchedule = featureFlags.useAsyncMessageHandlingOnSchedule(); + this.usePerDocumentThrottledDeleteBucket = featureFlags.usePerDocumentThrottledDeleteBucket(); } @Override @@ -73,6 +75,7 @@ public class FileStorProducer implements StorFilestorConfig.Producer { builder.num_response_threads(responseNumThreads); builder.response_sequencer_type(responseSequencerType); builder.use_async_message_handling_on_schedule(useAsyncMessageHandlingOnSchedule); + builder.use_per_document_throttled_delete_bucket(usePerDocumentThrottledDeleteBucket); var throttleBuilder = new StorFilestorConfig.Async_operation_throttler.Builder(); builder.async_operation_throttler(throttleBuilder); } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java index e7b2c549fa5..bdd61d93136 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java @@ -355,6 +355,24 @@ public class StorageClusterTest { assertTrue(config.async_operation_throttler().throttle_individual_merge_feed_ops()); } + private void verifyUsePerDocumentThrottledDeleteBucket(boolean expected, Boolean enabled) { + var props = new TestProperties(); + if (enabled != null) { + props.setUsePerDocumentThrottledDeleteBucket(enabled); + } + var config = filestorConfigFromProducer(simpleCluster(props)); + assertEquals(expected, config.use_per_document_throttled_delete_bucket()); + } + + @Test + void delete_bucket_throttling_is_controlled_by_feature_flag() { + // TODO update default once rolled out and tested + verifyUsePerDocumentThrottledDeleteBucket(false, null); + + verifyUsePerDocumentThrottledDeleteBucket(false, false); + verifyUsePerDocumentThrottledDeleteBucket(true, true); + } + @Test void testCapacity() { String xml = joinLines( diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java index c4fe8b42c25..07fee3c45d2 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java @@ -103,7 +103,7 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable @Override public void deconstruct() { - log.log(Level.INFO, "Stopping config server"); + log.log(Level.FINE, "Stopping config server"); down(); server.stop(); log.log(Level.FINE, "RPC server stopped"); @@ -220,13 +220,13 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable // Keep track of deployment status per application Map<ApplicationId, Future<?>> deployments = new HashMap<>(); if (applicationIds.size() > 0) { - log.log(Level.INFO, () -> "Redeploying " + applicationIds.size() + " apps " + applicationIds + " with " + + log.log(Level.FINE, () -> "Redeploying " + applicationIds.size() + " apps " + applicationIds + " with " + configserverConfig.numRedeploymentThreads() + " threads"); applicationIds.forEach(appId -> deployments.put(appId, executor.submit(() -> { - log.log(Level.INFO, () -> "Starting redeployment of " + appId); + log.log(Level.FINE, () -> "Starting redeployment of " + appId); applicationRepository.deployFromLocalActive(appId, true /* bootstrap */) .ifPresent(Deployment::activate); - log.log(Level.INFO, () -> appId + " redeployed"); + log.log(Level.FINE, () -> appId + " redeployed"); }))); } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index 2564584a7df..28d7c1eaef6 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -210,6 +210,7 @@ public class ModelContextImpl implements ModelContext { private final String unknownConfigDefinition; private final int searchHandlerThreadpool; private final long mergingMaxMemoryUsagePerNode; + private final boolean usePerDocumentThrottledDeleteBucket; public FeatureFlags(FlagSource source, ApplicationId appId, Version version) { this.defaultTermwiseLimit = flagValue(source, appId, version, Flags.DEFAULT_TERM_WISE_LIMIT); @@ -255,6 +256,7 @@ public class ModelContextImpl implements ModelContext { this.unknownConfigDefinition = flagValue(source, appId, version, Flags.UNKNOWN_CONFIG_DEFINITION); this.searchHandlerThreadpool = flagValue(source, appId, version, Flags.SEARCH_HANDLER_THREADPOOL); this.mergingMaxMemoryUsagePerNode = flagValue(source, appId, version, Flags.MERGING_MAX_MEMORY_USAGE_PER_NODE); + this.usePerDocumentThrottledDeleteBucket = flagValue(source, appId, version, Flags.USE_PER_DOCUMENT_THROTTLED_DELETE_BUCKET); } @Override public int heapSizePercentage() { return heapPercentage; } @@ -308,6 +310,7 @@ public class ModelContextImpl implements ModelContext { @Override public String unknownConfigDefinition() { return unknownConfigDefinition; } @Override public int searchHandlerThreadpool() { return searchHandlerThreadpool; } @Override public long mergingMaxMemoryUsagePerNode() { return mergingMaxMemoryUsagePerNode; } + @Override public boolean usePerDocumentThrottledDeleteBucket() { return usePerDocumentThrottledDeleteBucket; } private static <V> V flagValue(FlagSource source, ApplicationId appId, Version vespaVersion, UnboundFlag<? extends V, ?, ?> flag) { return flag.bindTo(source) diff --git a/document/src/main/java/com/yahoo/document/annotation/Annotation.java b/document/src/main/java/com/yahoo/document/annotation/Annotation.java index 3d9300550ff..237ca6db58b 100644 --- a/document/src/main/java/com/yahoo/document/annotation/Annotation.java +++ b/document/src/main/java/com/yahoo/document/annotation/Annotation.java @@ -129,7 +129,7 @@ public class Annotation implements Comparable<Annotation> { } /** - * WARNING! Should only be used by deserializers! Sets the span node that this annotation points to. + * WARNING! Should only be used by deserializers! Sets the span node that this annotation points to. * * @param spanNode the span node that this annotation shall point to. */ @@ -221,10 +221,9 @@ public class Annotation implements Comparable<Annotation> { @Override public String toString() { - String retval = "annotation of type " + type; - retval += ((value == null) ? " (no value)" : " (with value)"); - retval += ((spanNode == null) ? " (no span)" : (" with span "+spanNode)); - return retval; + return type + " annotation " + + ((value == null) ? " (no value)" : " (with value)") + + ((spanNode == null) ? " (no span)" : (" with span "+spanNode)); } diff --git a/document/src/main/java/com/yahoo/document/annotation/AnnotationContainer.java b/document/src/main/java/com/yahoo/document/annotation/AnnotationContainer.java index 6e2b986a478..ac2e6aefa1b 100644 --- a/document/src/main/java/com/yahoo/document/annotation/AnnotationContainer.java +++ b/document/src/main/java/com/yahoo/document/annotation/AnnotationContainer.java @@ -23,11 +23,7 @@ abstract class AnnotationContainer { */ abstract void annotate(Annotation annotation); - /** - * Returns a mutable collection of annotations. - * - * @return a mutable collection of annotations. - */ + /** Returns a mutable collection of the annotations in this. */ abstract Collection<Annotation> annotations(); /** diff --git a/document/src/main/java/com/yahoo/document/annotation/AnnotationType2AnnotationContainer.java b/document/src/main/java/com/yahoo/document/annotation/AnnotationType2AnnotationContainer.java index d8709baa3a1..01b8f990bb4 100644 --- a/document/src/main/java/com/yahoo/document/annotation/AnnotationType2AnnotationContainer.java +++ b/document/src/main/java/com/yahoo/document/annotation/AnnotationType2AnnotationContainer.java @@ -16,6 +16,7 @@ import java.util.NoSuchElementException; */ // TODO: Should this be removed? public class AnnotationType2AnnotationContainer extends IteratingAnnotationContainer { + private final Multimap<AnnotationType, Annotation> annotationType2Annotation = Multimaps.newMultimap(new IdentityHashMap<>(), ArrayList::new); @Override @@ -31,7 +32,6 @@ public class AnnotationType2AnnotationContainer extends IteratingAnnotationConta } @Override - @SuppressWarnings("unchecked") Collection<Annotation> annotations() { return annotationType2Annotation.values(); } @@ -56,12 +56,12 @@ public class AnnotationType2AnnotationContainer extends IteratingAnnotationConta } private class NonRecursiveIterator implements Iterator<Annotation> { + private final IdentityHashMap<SpanNode, SpanNode> nodes; private final Iterator<Annotation> annotationIt; private Annotation next = null; private boolean nextCalled; - @SuppressWarnings("unchecked") public NonRecursiveIterator(IdentityHashMap<SpanNode, SpanNode> nodes) { this.nodes = nodes; this.annotationIt = annotationType2Annotation.values().iterator(); @@ -106,4 +106,5 @@ public class AnnotationType2AnnotationContainer extends IteratingAnnotationConta nextCalled = false; } } + } diff --git a/document/src/main/java/com/yahoo/document/annotation/IteratingAnnotationContainer.java b/document/src/main/java/com/yahoo/document/annotation/IteratingAnnotationContainer.java index 54af80e6e43..c7c3545fc09 100644 --- a/document/src/main/java/com/yahoo/document/annotation/IteratingAnnotationContainer.java +++ b/document/src/main/java/com/yahoo/document/annotation/IteratingAnnotationContainer.java @@ -5,30 +5,29 @@ import java.util.IdentityHashMap; import java.util.Iterator; /** - * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a> + * @author Einar M R Rosenvinge */ abstract class IteratingAnnotationContainer extends AnnotationContainer { @Override Iterator<Annotation> iterator(SpanNode node) { - IdentityHashMap<SpanNode, SpanNode> nodes = new IdentityHashMap<SpanNode, SpanNode>(); + IdentityHashMap<SpanNode, SpanNode> nodes = new IdentityHashMap<>(); nodes.put(node, node); return iterator(nodes); } @Override Iterator<Annotation> iteratorRecursive(SpanNode node) { - IdentityHashMap<SpanNode, SpanNode> nodes = new IdentityHashMap<SpanNode, SpanNode>(); + IdentityHashMap<SpanNode, SpanNode> nodes = new IdentityHashMap<>(); nodes.put(node, node); - { - Iterator<SpanNode> childrenIt = node.childIteratorRecursive(); - while (childrenIt.hasNext()) { - SpanNode child = childrenIt.next(); - nodes.put(child, child); - } + Iterator<SpanNode> childrenIt = node.childIteratorRecursive(); + while (childrenIt.hasNext()) { + SpanNode child = childrenIt.next(); + nodes.put(child, child); } return iterator(nodes); } abstract Iterator<Annotation> iterator(IdentityHashMap<SpanNode, SpanNode> nodes); + } diff --git a/document/src/main/java/com/yahoo/document/annotation/ListAnnotationContainer.java b/document/src/main/java/com/yahoo/document/annotation/ListAnnotationContainer.java index c2c22558a32..b8d8cd692d8 100644 --- a/document/src/main/java/com/yahoo/document/annotation/ListAnnotationContainer.java +++ b/document/src/main/java/com/yahoo/document/annotation/ListAnnotationContainer.java @@ -13,6 +13,7 @@ import java.util.NoSuchElementException; * @author Einar M R Rosenvinge */ public class ListAnnotationContainer extends IteratingAnnotationContainer { + private final List<Annotation> annotations = new LinkedList<>(); @Override @@ -38,9 +39,8 @@ public class ListAnnotationContainer extends IteratingAnnotationContainer { @Override public boolean equals(Object o) { if (this == o) return true; - if (!(o instanceof ListAnnotationContainer)) return false; - ListAnnotationContainer that = (ListAnnotationContainer) o; - if (!annotations.equals(that.annotations)) return false; + if (!(o instanceof ListAnnotationContainer other)) return false; + if (!annotations.equals(other.annotations)) return false; return true; } @@ -50,8 +50,9 @@ public class ListAnnotationContainer extends IteratingAnnotationContainer { } private class AnnotationIterator implements Iterator<Annotation> { - private IdentityHashMap<SpanNode, SpanNode> nodes; - private PeekableListIterator<Annotation> base; + + private final IdentityHashMap<SpanNode, SpanNode> nodes; + private final PeekableListIterator<Annotation> base; private boolean nextCalled = false; AnnotationIterator(ListIterator<Annotation> baseIt, IdentityHashMap<SpanNode, SpanNode> nodes) { @@ -91,4 +92,5 @@ public class ListAnnotationContainer extends IteratingAnnotationContainer { nextCalled = false; } } + } diff --git a/document/src/main/java/com/yahoo/document/annotation/SpanList.java b/document/src/main/java/com/yahoo/document/annotation/SpanList.java index 9530689c3f1..04bdf5c892e 100644 --- a/document/src/main/java/com/yahoo/document/annotation/SpanList.java +++ b/document/src/main/java/com/yahoo/document/annotation/SpanList.java @@ -11,13 +11,14 @@ import java.util.ListIterator; /** * A node in a Span tree that can have child nodes. * - * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a> + * @author Einar M R Rosenvinge */ public class SpanList extends SpanNode { + public static final byte ID = 2; private final List<SpanNode> children; - private int cachedFrom = Integer.MIN_VALUE; //triggers calculateFrom() - private int cachedTo = Integer.MIN_VALUE; //triggers calculateTo() + private int cachedFrom = Integer.MIN_VALUE; // triggers calculateFrom() + private int cachedTo = Integer.MIN_VALUE; // triggers calculateTo() /** Creates a new SpanList. */ public SpanList() { @@ -39,7 +40,7 @@ public class SpanList extends SpanNode { * @param other the SpanList to copy. */ public SpanList(SpanList other) { - this.children = new LinkedList<SpanNode>(); + this.children = new LinkedList<>(); for (SpanNode otherNode : other.children) { if (otherNode instanceof Span) { children.add(new Span((Span) otherNode)); @@ -86,15 +87,15 @@ public class SpanList extends SpanNode { /** Create a span, add it to this list and return it */ public Span span(int from, int length) { - Span span=new Span(from,length); + Span span = new Span(from, length); add(span); return span; } void setInvalid() { - //invalidate ourselves: + // invalidate ourselves: super.setInvalid(); - //invalidate all our children: + // invalidate all our children: for (SpanNode node : children()) { node.setInvalid(); } @@ -213,20 +214,12 @@ public class SpanList extends SpanNode { return this; } - /** - * Returns a <strong>modifiable</strong> list of the immediate children of this SpanList. - * - * @return a <strong>modifiable</strong> list of the immediate children of this SpanList. - */ + /** Returns a modifiable list of the immediate children of this SpanList. */ protected List<SpanNode> children() { return children; } - /** - * Returns the number of children this SpanList holds. - * - * @return the number of children this SpanList holds. - */ + /** Returns the number of children this SpanList holds. */ public int numChildren() { return children().size(); } @@ -394,11 +387,9 @@ public class SpanList extends SpanNode { @Override public boolean equals(Object o) { if (this == o) return true; - if (!(o instanceof SpanList)) return false; + if (!(o instanceof SpanList spanList)) return false; if (!super.equals(o)) return false; - SpanList spanList = (SpanList) o; - if (children() != null ? !children().equals(spanList.children()) : spanList.children() != null) return false; return true; @@ -415,4 +406,5 @@ public class SpanList extends SpanNode { public String toString() { return "SpanList with " + children().size() + " children"; } + } diff --git a/document/src/main/java/com/yahoo/document/annotation/SpanNode2AnnotationContainer.java b/document/src/main/java/com/yahoo/document/annotation/SpanNode2AnnotationContainer.java index a66639b3bfd..f563fd0d6cd 100644 --- a/document/src/main/java/com/yahoo/document/annotation/SpanNode2AnnotationContainer.java +++ b/document/src/main/java/com/yahoo/document/annotation/SpanNode2AnnotationContainer.java @@ -18,6 +18,7 @@ import java.util.List; * @author Einar M R Rosenvinge */ class SpanNode2AnnotationContainer extends AnnotationContainer { + private final Multimap<SpanNode, Annotation> spanNode2Annotation = Multimaps.newMultimap(new IdentityHashMap<>(), ArrayList::new); @Override diff --git a/document/src/main/java/com/yahoo/document/annotation/SpanTree.java b/document/src/main/java/com/yahoo/document/annotation/SpanTree.java index f785cf3b3ec..2faf17ce428 100644 --- a/document/src/main/java/com/yahoo/document/annotation/SpanTree.java +++ b/document/src/main/java/com/yahoo/document/annotation/SpanTree.java @@ -19,6 +19,7 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** * A SpanTree holds a root node of a tree of SpanNodes, and a List of Annotations pointing to these nodes @@ -431,7 +432,7 @@ public class SpanTree implements Iterable<Annotation>, SpanNodeParent, Comparabl } /** - * Adds an Annotation to the internal list of annotations for this SpanTree. Use this when + * Adds an Annotation to the internal list of annotations for this SpanTree. Use this when * adding an Annotation that shall annotate a SpanNode. Upon return, Annotation.getSpanNode() * returns the given node. * @@ -446,7 +447,7 @@ public class SpanTree implements Iterable<Annotation>, SpanNodeParent, Comparabl } /** - * Adds an Annotation to the internal list of annotations for this SpanTree. Use this when + * Adds an Annotation to the internal list of annotations for this SpanTree. Use this when * adding an Annotation that shall annotate a SpanNode. Upon return, Annotation.getSpanNode() * returns the given node. This one is unchecked and assumes that the SpanNode is valid and has * already been attached to the Annotation. @@ -538,10 +539,7 @@ public class SpanTree implements Iterable<Annotation>, SpanNodeParent, Comparabl } } - /** - * Returns an Iterator over all annotations in this tree. Note that the iteration order is non-deterministic. - * @return an Iterator over all annotations in this tree. - */ + /** Returns an Iterator over all annotations in this tree. Note that the iteration order is non-deterministic. */ public Iterator<Annotation> iterator() { return annotations.annotations().iterator(); } @@ -641,7 +639,9 @@ public class SpanTree implements Iterable<Annotation>, SpanNodeParent, Comparabl @Override public String toString() { - return "SpanTree '" + name + "'"; + return "SpanTree '" + name + "' with root: " + root + + ( annotations.annotations().size() > 5 ? "" : + ", annotations: " + annotations.annotations().stream().map(Annotation::toString).collect(Collectors.joining(", "))); } @Override diff --git a/document/src/main/java/com/yahoo/document/datatypes/Struct.java b/document/src/main/java/com/yahoo/document/datatypes/Struct.java index 0b1bbf5d3ca..bb54b41069b 100644 --- a/document/src/main/java/com/yahoo/document/datatypes/Struct.java +++ b/document/src/main/java/com/yahoo/document/datatypes/Struct.java @@ -218,8 +218,7 @@ public class Struct extends StructuredFieldValue { StringBuilder retVal = new StringBuilder(); retVal.append("Struct (").append(getDataType()).append("): "); int [] increasing = getInOrder(); - for (int i = 0; i < increasing.length; i++) { - int id = increasing[i]; + for (int id : increasing) { retVal.append(getDataType().getField(id)).append("=").append(values.get(id)).append(", "); } return retVal.toString(); diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 491b7db3c13..704155b3569 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -421,6 +421,16 @@ public class Flags { "Takes effect at redeployment", INSTANCE_ID); + public static final UnboundBooleanFlag USE_PER_DOCUMENT_THROTTLED_DELETE_BUCKET = defineFeatureFlag( + "use-per-document-throttled-delete-bucket", false, + List.of("vekterli"), "2023-11-13", "2024-03-01", + "If set, DeleteBucket operations are internally expanded to an individually persistence-" + + "throttled remove per document stored in the bucket. This makes the cost model of " + + "executing a DeleteBucket symmetrical with feeding the documents to the bucket in the " + + "first place.", + "Takes effect at redeployment", + INSTANCE_ID); + /** WARNING: public for testing: All flags should be defined in {@link Flags}. */ public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List<String> owners, String createdAt, String expiresAt, String description, diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java index 26058eeb8f3..fdfadf65400 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java @@ -15,6 +15,8 @@ import com.yahoo.vespa.indexinglanguage.linguistics.LinguisticsAnnotator; import java.util.Iterator; +import static com.yahoo.language.LinguisticsCase.toLowerCase; + /** * A filter which splits incoming text into n-grams. * @@ -68,8 +70,9 @@ public final class NGramExpression extends Expression { // annotate gram as a word term String gramString = gram.extractFrom(output.getString()); - typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList). - annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString)); + typedSpan(gram.getStart(), + gram.getCodePointCount(), + TokenType.ALPHABETIC, spanList).annotate(LinguisticsAnnotator.termAnnotation(toLowerCase(gramString), gramString)); lastPosition = gram.getStart() + gram.getCodePointCount(); } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java index 684bae3bf97..5c1bf0813c4 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java @@ -13,7 +13,7 @@ public class AnnotatorConfig implements Cloneable { private Language language; private StemMode stemMode; private boolean removeAccents; - private int maxTermOccurences; + private int maxTermOccurrences; private int maxTokenizeLength; public static final int DEFAULT_MAX_TERM_OCCURRENCES; @@ -29,7 +29,7 @@ public class AnnotatorConfig implements Cloneable { language = Language.ENGLISH; stemMode = StemMode.NONE; removeAccents = false; - maxTermOccurences = DEFAULT_MAX_TERM_OCCURRENCES; + maxTermOccurrences = DEFAULT_MAX_TERM_OCCURRENCES; maxTokenizeLength = DEFAULT_MAX_TOKENIZE_LENGTH; } @@ -37,7 +37,7 @@ public class AnnotatorConfig implements Cloneable { language = rhs.language; stemMode = rhs.stemMode; removeAccents = rhs.removeAccents; - maxTermOccurences = rhs.maxTermOccurences; + maxTermOccurrences = rhs.maxTermOccurrences; maxTokenizeLength = rhs.maxTokenizeLength; } @@ -74,11 +74,11 @@ public class AnnotatorConfig implements Cloneable { } public int getMaxTermOccurrences() { - return maxTermOccurences; + return maxTermOccurrences; } public AnnotatorConfig setMaxTermOccurrences(int maxTermCount) { - this.maxTermOccurences = maxTermCount; + this.maxTermOccurrences = maxTermCount; return this; } @@ -109,7 +109,7 @@ public class AnnotatorConfig implements Cloneable { if (removeAccents != rhs.removeAccents) { return false; } - if (maxTermOccurences != rhs.maxTermOccurences) { + if (maxTermOccurrences != rhs.maxTermOccurrences) { return false; } if (maxTokenizeLength != rhs.maxTokenizeLength) { @@ -121,6 +121,7 @@ public class AnnotatorConfig implements Cloneable { @Override public int hashCode() { return getClass().hashCode() + language.hashCode() + stemMode.hashCode() + - Boolean.valueOf(removeAccents).hashCode() + maxTermOccurences + maxTokenizeLength; + Boolean.valueOf(removeAccents).hashCode() + maxTermOccurrences + maxTokenizeLength; } + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java index 191d067effe..52cd8a8ff54 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java @@ -34,8 +34,8 @@ public class LinguisticsAnnotator { final Map<String, Integer> termOccurrences = new HashMap<>(); final int maxOccurrences; - public TermOccurrences(int maxOccurences) { - this.maxOccurrences = maxOccurences; + public TermOccurrences(int maxOccurrences) { + this.maxOccurrences = maxOccurrences; } boolean termCountBelowLimit(String term) { @@ -86,24 +86,23 @@ public class LinguisticsAnnotator { } /** - * Creates a TERM annotation which has the lowercase value as annotation (only) if it is different from the + * Creates a TERM annotation which has the term as annotation (only) if it is different from the * original. * - * @param termToLowerCase the term to lower case - * @param origTerm the original term + * @param term the term + * @param origTerm the original term * @return the created TERM annotation */ - public static Annotation lowerCaseTermAnnotation(String termToLowerCase, String origTerm) { - String annotationValue = toLowerCase(termToLowerCase); - if (annotationValue.equals(origTerm)) { + public static Annotation termAnnotation(String term, String origTerm) { + if (term.equals(origTerm)) return new Annotation(AnnotationTypes.TERM); - } - return new Annotation(AnnotationTypes.TERM, new StringFieldValue(annotationValue)); + else + return new Annotation(AnnotationTypes.TERM, new StringFieldValue(term)); } private static void addAnnotation(Span here, String term, String orig, TermOccurrences termOccurrences) { if (termOccurrences.termCountBelowLimit(term)) { - here.annotate(lowerCaseTermAnnotation(term, orig)); + here.annotate(termAnnotation(term, orig)); } } @@ -127,29 +126,24 @@ public class LinguisticsAnnotator { } if (mode == StemMode.ALL) { Span where = parent.span((int)token.getOffset(), token.getOrig().length()); - String lowercasedOrig = toLowerCase(token.getOrig()); - addAnnotation(where, token.getOrig(), token.getOrig(), termOccurrences); - String lowercasedTerm = lowercasedOrig; + String lowercasedOrig = toLowerCase(token.getOrig()); String term = token.getTokenString(); if (term != null) { - lowercasedTerm = toLowerCase(term); - } - if (! lowercasedOrig.equals(lowercasedTerm)) { addAnnotation(where, term, token.getOrig(), termOccurrences); + if ( ! term.equals(lowercasedOrig)) + addAnnotation(where, token.getOrig(), token.getOrig(), termOccurrences); } for (int i = 0; i < token.getNumStems(); i++) { String stem = token.getStem(i); - String lowercasedStem = toLowerCase(stem); - if (! (lowercasedOrig.equals(lowercasedStem) || lowercasedTerm.equals(lowercasedStem))) { + if (! (stem.equals(lowercasedOrig) || stem.equals(term))) addAnnotation(where, stem, token.getOrig(), termOccurrences); - } } } else { String term = token.getTokenString(); if (term == null || term.trim().isEmpty()) return; if (termOccurrences.termCountBelowLimit(term)) { - parent.span((int)token.getOffset(), token.getOrig().length()).annotate(lowerCaseTermAnnotation(term, token.getOrig())); + parent.span((int)token.getOffset(), token.getOrig().length()).annotate(termAnnotation(term, token.getOrig())); } } } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NGramTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NGramTestCase.java index bcde8751de8..b4e266ab3eb 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NGramTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NGramTestCase.java @@ -57,8 +57,8 @@ public class NGramTestCase { new NGramExpression(new SimpleLinguistics(), 3).execute(context); StringFieldValue value = (StringFieldValue)context.getValue(); - assertEquals("Grams are pure annotations - field value is unchanged", "en gul Bille sang... ", - value.getString()); + assertEquals("Grams are pure annotations - field value is unchanged", + "en gul Bille sang... ", value.getString()); SpanTree gramTree = value.getSpanTree(SpanTrees.LINGUISTICS); assertNotNull(gramTree); SpanList grams = (SpanList)gramTree.getRoot(); diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java index 67bff3843ee..a4dbe1fe826 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java @@ -19,7 +19,6 @@ import org.junit.Test; import org.mockito.Mockito; import java.util.*; -import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -29,8 +28,6 @@ import static org.junit.Assert.assertTrue; */ public class LinguisticsAnnotatorTestCase { - private static final AnnotatorConfig CONFIG = new AnnotatorConfig(); - @Test public void requireThatAnnotateFailsWithZeroTokens() { assertAnnotations(null, "foo"); @@ -42,7 +39,7 @@ public class LinguisticsAnnotatorTestCase { if (type.isIndexable()) { continue; } - assertAnnotations(null, "foo", newToken("foo", "bar", type)); + assertAnnotations(null, "foo", token("foo", "bar", type)); } } @@ -54,7 +51,27 @@ public class LinguisticsAnnotatorTestCase { if (!type.isIndexable()) { continue; } - assertAnnotations(expected, "foo", newToken("foo", "bar", type)); + assertAnnotations(expected, "foo", token("foo", "bar", type)); + } + } + + @Test + public void requireThatIndexableTokenStringsAreAnnotatedWithModeALL() { + SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS); + expected.spanList().span(0, 5).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("tesla"))); + var span2 = expected.spanList().span(0, 4); + span2.annotate(new Annotation(AnnotationTypes.TERM)); + span2.annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("car"))); + var span3 = expected.spanList().span(0, 8); + span3.annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("modelxes"))); + span3.annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("modelx"))); + span3.annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("mex"))); + for (TokenType type : TokenType.values()) { + if (!type.isIndexable()) continue; + assertAnnotations(expected, "Tesla cars", new AnnotatorConfig().setStemMode("ALL"), + token("Tesla", "tesla", type), + token("cars", "car", type), + SimpleToken.fromStems("ModelXes", List.of("modelxes", "modelx", "mex"))); } } @@ -63,7 +80,7 @@ public class LinguisticsAnnotatorTestCase { SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS); expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar"))); for (TokenType type : TokenType.values()) { - assertAnnotations(expected, "foo", newToken("foo", "bar", type, true)); + assertAnnotations(expected, "foo", token("foo", "bar", type, true)); } } @@ -76,21 +93,21 @@ public class LinguisticsAnnotatorTestCase { if (!specialToken && !type.isIndexable()) { continue; } - assertAnnotations(expected, "foo", newToken("foo", "foo", type, specialToken)); + assertAnnotations(expected, "foo", token("foo", "foo", type, specialToken)); } } } @Test - public void requireThatTermAnnotationsAreLowerCased() { + public void requireThatTermAnnotationsPreserveCasing() { SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS); - expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar"))); + expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("BaR"))); for (boolean specialToken : Arrays.asList(true, false)) { for (TokenType type : TokenType.values()) { if (!specialToken && !type.isIndexable()) { continue; } - assertAnnotations(expected, "foo", newToken("foo", "BAR", type, specialToken)); + assertAnnotations(expected, "foo", token("foo", "BaR", type, specialToken)); } } } @@ -102,11 +119,11 @@ public class LinguisticsAnnotatorTestCase { expected.spanList().span(3, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar"))); expected.spanList().span(6, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("baz"))); - SimpleToken token = newToken("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC) - .addComponent(newToken("FOO", "foo", TokenType.ALPHABETIC).setOffset(0)) - .addComponent(newToken("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3) - .addComponent(newToken("BAR", "bar", TokenType.ALPHABETIC).setOffset(3)) - .addComponent(newToken("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6))); + SimpleToken token = token("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC) + .addComponent(token("FOO", "foo", TokenType.ALPHABETIC).setOffset(0)) + .addComponent(token("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3) + .addComponent(token("BAR", "bar", TokenType.ALPHABETIC).setOffset(3)) + .addComponent(token("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6))); assertAnnotations(expected, "foobarbaz", token); } @@ -116,11 +133,11 @@ public class LinguisticsAnnotatorTestCase { expected.spanList().span(0, 9).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("foobarbaz"))); - SimpleToken token = newToken("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC).setSpecialToken(true) - .addComponent(newToken("FOO", "foo", TokenType.ALPHABETIC).setOffset(0)) - .addComponent(newToken("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3) - .addComponent(newToken("BAR", "bar", TokenType.ALPHABETIC).setOffset(3)) - .addComponent(newToken("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6))); + SimpleToken token = token("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC).setSpecialToken(true) + .addComponent(token("FOO", "foo", TokenType.ALPHABETIC).setOffset(0)) + .addComponent(token("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3) + .addComponent(token("BAR", "bar", TokenType.ALPHABETIC).setOffset(3)) + .addComponent(token("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6))); assertAnnotations(expected, "foobarbaz", token); } @@ -140,7 +157,8 @@ public class LinguisticsAnnotatorTestCase { continue; } assertAnnotations(expected, "foo", - newLinguistics(List.of(newToken("foo", "foo", type, specialToken)), + new AnnotatorConfig(), + newLinguistics(List.of(token("foo", "foo", type, specialToken)), Collections.singletonMap("foo", "bar"))); } } @@ -154,11 +172,9 @@ public class LinguisticsAnnotatorTestCase { StringFieldValue val = new StringFieldValue("foo"); val.setSpanTree(spanTree); - Linguistics linguistics = newLinguistics(List.of(newToken("foo", "bar", TokenType.ALPHABETIC, false)), + Linguistics linguistics = newLinguistics(List.of(token("foo", "bar", TokenType.ALPHABETIC, false)), Collections.<String, String>emptyMap()); - new LinguisticsAnnotator(linguistics, CONFIG).annotate(val); - - assertTrue(new LinguisticsAnnotator(linguistics, CONFIG).annotate(val)); + assertTrue(new LinguisticsAnnotator(linguistics, new AnnotatorConfig()).annotate(val)); assertEquals(spanTree, val.getSpanTree(SpanTrees.LINGUISTICS)); } @@ -186,7 +202,7 @@ public class LinguisticsAnnotatorTestCase { } @Test - public void requireThatMaxTermOccurencesIsHonored() { + public void requireThatMaxTermOccurrencesIsHonored() { final String inputTerm = "foo"; final String stemmedInputTerm = "bar"; // completely different from // inputTerm for safer test @@ -204,7 +220,7 @@ public class LinguisticsAnnotatorTestCase { StringBuilder input = new StringBuilder(); Token[] tokens = new Token[inputTermOccurence]; for (int i = 0; i < inputTermOccurence; ++i) { - SimpleToken t = newToken(inputTerm, stemmedInputTerm, type); + SimpleToken t = token(inputTerm, stemmedInputTerm, type); t.setOffset(i * paddedInputTerm.length()); tokens[i] = t; input.append(paddedInputTerm); @@ -214,28 +230,29 @@ public class LinguisticsAnnotatorTestCase { } // -------------------------------------------------------------------------------- - // // Utilities - // - // -------------------------------------------------------------------------------- - private static SimpleToken newToken(String orig, String stem, TokenType type) { - return newToken(orig, stem, type, false); + private static SimpleToken token(String orig, String stem, TokenType type) { + return token(orig, stem, type, false); } - private static SimpleToken newToken(String orig, String stem, TokenType type, boolean specialToken) { + private static SimpleToken token(String orig, String stem, TokenType type, boolean specialToken) { return new SimpleToken(orig).setTokenString(stem) .setType(type) .setSpecialToken(specialToken); } private static void assertAnnotations(SpanTree expected, String value, Token... tokens) { - assertAnnotations(expected, value, newLinguistics(Arrays.asList(tokens), Collections.<String, String>emptyMap())); + assertAnnotations(expected, value, new AnnotatorConfig(), newLinguistics(Arrays.asList(tokens), Collections.emptyMap())); + } + + private static void assertAnnotations(SpanTree expected, String value, AnnotatorConfig config, Token... tokens) { + assertAnnotations(expected, value, config, newLinguistics(Arrays.asList(tokens), Collections.emptyMap())); } - private static void assertAnnotations(SpanTree expected, String str, Linguistics linguistics) { + private static void assertAnnotations(SpanTree expected, String str, AnnotatorConfig config, Linguistics linguistics) { StringFieldValue val = new StringFieldValue(str); - assertEquals(expected != null, new LinguisticsAnnotator(linguistics, CONFIG).annotate(val)); + assertEquals(expected != null, new LinguisticsAnnotator(linguistics, config).annotate(val)); assertEquals(expected, val.getSpanTree(SpanTrees.LINGUISTICS)); } diff --git a/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java b/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java index 5ad6a382abd..f0439a21fec 100644 --- a/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java +++ b/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java @@ -26,6 +26,7 @@ public class LinguisticsCase { public static String toLowerCase(String in) { // def is picked from http://docs.oracle.com/javase/6/docs/api/java/lang/String.html#toLowerCase%28%29 // Also, at the time of writing, English is the default language for queries + if (in == null) return null; return Lowercase.toLowerCase(in); } diff --git a/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java b/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java index 33f5ee7e4bb..9178c2d7e09 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java +++ b/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java @@ -189,9 +189,8 @@ public class GramSplitter { @Override public boolean equals(Object o) { if (this == o) return true; - if ( ! (o instanceof Gram)) return false; + if ( ! (o instanceof Gram gram)) return false; - Gram gram = (Gram)o; if (codePointCount != gram.codePointCount) return false; if (start != gram.start) return false; return true; diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java index 6cc68c7ac14..809e9b8d133 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java @@ -15,35 +15,48 @@ import java.util.Objects; public class SimpleToken implements Token { private final List<Token> components = new ArrayList<>(); - private final String orig; + private final String original; private TokenType type = TokenType.UNKNOWN; private TokenScript script = TokenScript.UNKNOWN; private String tokenString; + private List<String> stems = null; // Any additional stems after tokenString private boolean specialToken = false; private long offset = 0; - public SimpleToken(String orig) { - this(orig, null); + public SimpleToken(String original) { + this(original, (String)null); } - public SimpleToken(String orig, String tokenString) { - this.orig = orig; + public SimpleToken(String original, String tokenString) { + this.original = original; this.tokenString = tokenString; } + /** Exposed as fromStems */ + private SimpleToken(String original, List<String> stems) { + this.type = TokenType.ALPHABETIC; // Only type which may have stems + this.original = original; + this.tokenString = stems.get(0); + this.stems = List.copyOf(stems.subList(1, stems.size())); + } + @Override public String getOrig() { - return orig; + return original; } @Override public int getNumStems() { - return tokenString != null ? 1 : 0; + return (tokenString != null ? 1 : 0) + (stems != null ? stems.size() : 0); } @Override public String getStem(int i) { - return tokenString; + if (i == 0) + return tokenString; + if (stems != null && i-1 < stems.size()) + return stems.get(i-1); + return tokenString; // TODO Vespa 9: throw new IllegalArgumentException() instead } @Override @@ -131,12 +144,12 @@ public class SimpleToken implements Token { @Override public int hashCode() { - return orig.hashCode(); + return original.hashCode(); } @Override public String toString() { - return "token '" + orig + "'"; + return "token '" + original + "'"; } public String toDetailString() { @@ -171,4 +184,8 @@ public class SimpleToken implements Token { return getType().isIndexable() && (getOrig().length() > 0); } + public static SimpleToken fromStems(String original, List<String> stems) { + return new SimpleToken(original, stems); + } + } diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java index 98a84a48095..b72d2bd6d37 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java @@ -106,7 +106,7 @@ public class SimpleTokenizer implements Tokenizer { String oldToken = token; token = stemmer.stem(token); String newToken = token; - log.log(Level.FINEST, () -> "stem '" + oldToken+"' to '" + newToken+"'"); + log.log(Level.FINEST, () -> "stem '" + oldToken + "' to '" + newToken + "'"); } String result = token; log.log(Level.FINEST, () -> "processed token is: " + result); diff --git a/parent/pom.xml b/parent/pom.xml index aeb0c9c4aa9..15aee603f17 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -317,7 +317,7 @@ --> <groupId>org.openrewrite.maven</groupId> <artifactId>rewrite-maven-plugin</artifactId> - <version>5.10.0</version> + <version>5.11.0</version> <configuration> <activeRecipes> <recipe>org.openrewrite.java.testing.junit5.JUnit5BestPractices</recipe> @@ -1126,7 +1126,7 @@ See pluginManagement of rewrite-maven-plugin for more details --> <groupId>org.openrewrite.recipe</groupId> <artifactId>rewrite-recipe-bom</artifactId> - <version>2.4.1</version> + <version>2.5.0</version> <type>pom</type> <scope>import</scope> </dependency> diff --git a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp index efe03c7b57e..1b891baa5c9 100644 --- a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp +++ b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp @@ -75,7 +75,7 @@ MatchEngine::close() LOG(debug, "Closing search interface."); { std::lock_guard<std::mutex> guard(_lock); - _closed = true; + _closed.store(true, std::memory_order_relaxed); } LOG(debug, "Handshaking with task manager."); @@ -108,7 +108,7 @@ SearchReply::UP MatchEngine::search(SearchRequest::Source request, SearchClient &client) { // We continue to allow searches if the node is in Maintenance mode - if (_closed || (!_nodeUp && !_nodeMaintenance.load(std::memory_order_relaxed))) { + if (_closed.load(std::memory_order_relaxed) || (!_nodeUp && !_nodeMaintenance.load(std::memory_order_relaxed))) { auto ret = std::make_unique<SearchReply>(); ret->setDistributionKey(_distributionKey); diff --git a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h index 9d4bfc5cce5..5365cb6a290 100644 --- a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h +++ b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h @@ -20,7 +20,7 @@ private: std::mutex _lock; const uint32_t _distributionKey; bool _async; - bool _closed; + std::atomic<bool> _closed; std::atomic<bool> _forward_issues; HandlerMap<ISearchHandler> _handlers; vespalib::ThreadStackExecutor _executor; diff --git a/standalone-container/src/main/java/com/yahoo/container/standalone/StandaloneContainerApplication.java b/standalone-container/src/main/java/com/yahoo/container/standalone/StandaloneContainerApplication.java index 860d14a9333..4e7ae5418a1 100644 --- a/standalone-container/src/main/java/com/yahoo/container/standalone/StandaloneContainerApplication.java +++ b/standalone-container/src/main/java/com/yahoo/container/standalone/StandaloneContainerApplication.java @@ -49,7 +49,6 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.Optional; -import java.util.stream.Collectors; import static com.yahoo.collections.CollectionUtil.first; @@ -296,21 +295,21 @@ public class StandaloneContainerApplication implements Application { container.initService(deployState); } - private static Element getJDiscInServices(Element element) { - List<Element> jDiscElements = new ArrayList<>(); + private static Element getContainerElementInServices(Element element) { + List<Element> containerElements = new ArrayList<>(); for (ConfigModelId cid : ContainerModelBuilder.configModelIds) { List<Element> children = XML.getChildren(element, cid.getName()); - jDiscElements.addAll(children); + containerElements.addAll(children); } - if (jDiscElements.size() == 1) { - return jDiscElements.get(0); - } else if (jDiscElements.isEmpty()) { - throw new RuntimeException("No jdisc element found under services."); + if (containerElements.size() == 1) { + return containerElements.get(0); + } else if (containerElements.isEmpty()) { + throw new RuntimeException("No container element found under services."); } else { - List<String> nameAndId = jDiscElements.stream().map(e -> e.getNodeName() + " id='" + e.getAttribute("id") + "'") + List<String> nameAndId = containerElements.stream().map(e -> e.getNodeName() + " id='" + e.getAttribute("id") + "'") .toList(); - throw new RuntimeException("Found multiple JDisc elements: " + String.join(", ", nameAndId)); + throw new RuntimeException("Found multiple container elements: " + String.join(", ", nameAndId)); } } @@ -321,7 +320,7 @@ public class StandaloneContainerApplication implements Application { if (ContainerModelBuilder.configModelIds.stream().anyMatch(id -> id.getName().equals(nodeName))) { return element; } else { - return getJDiscInServices(element); + return getContainerElementInServices(element); } } diff --git a/storage/src/tests/storageserver/mergethrottlertest.cpp b/storage/src/tests/storageserver/mergethrottlertest.cpp index 6f80ffe0727..a480ba2740f 100644 --- a/storage/src/tests/storageserver/mergethrottlertest.cpp +++ b/storage/src/tests/storageserver/mergethrottlertest.cpp @@ -1603,6 +1603,39 @@ TEST_F(MergeThrottlerTest, queued_merges_are_not_counted_towards_memory_usage) { EXPECT_EQ(throttler(0).getMetrics().estimated_merge_memory_usage.getLast(), 0_Mi); } +TEST_F(MergeThrottlerTest, enqueued_merge_not_started_if_insufficient_memory_available) { + // See `queued_merges_are_not_counted_towards_memory_usage` test for magic number rationale + const auto max_pending = throttler_max_merges_pending(0); + ASSERT_LT(max_pending, 1000); + ASSERT_GT(max_pending, 1); + throttler(0).set_max_merge_memory_usage_bytes_locking(10_Mi); + + // Fill up entire active window and enqueue a single merge + fill_throttler_queue_with_n_commands(0, 0); + _topLinks[0]->sendDown(MergeBuilder(document::BucketId(16, 1000)).nodes(0, 1, 2).unordered(true).memory_usage(11_Mi).create()); + waitUntilMergeQueueIs(throttler(0), 1, _messageWaitTime); // Should end up in queue + + // Drain all active merges. As long as we have other active merges, the enqueued merge should not + // be allowed through since it's too large. Eventually it will hit the "at least one merge must + // be allowed at any time regardless of size" exception and is dequeued. + for (uint32_t i = 0; i < max_pending; ++i) { + auto fwd_cmd = _topLinks[0]->getAndRemoveMessage(MessageType::MERGEBUCKET); + auto fwd_reply = dynamic_cast<api::MergeBucketCommand&>(*fwd_cmd).makeReply(); + + ASSERT_NO_FATAL_FAILURE(send_and_expect_reply( + std::shared_ptr<api::StorageReply>(std::move(fwd_reply)), + MessageType::MERGEBUCKET_REPLY, ReturnCode::OK)); // Unwind reply for completed merge + + if (i < max_pending - 1) { + // Merge should still be in the queue, as it requires 11 MiB, and we only have 10 MiB. + // It will eventually be executed when the window is empty (see below). + waitUntilMergeQueueIs(throttler(0), 1, _messageWaitTime); + } + } + // We've freed up the entire send window, so the over-sized merge can finally squeeze through. + waitUntilMergeQueueIs(throttler(0), 0, _messageWaitTime); +} + namespace { vespalib::HwInfo make_mem_info(uint64_t mem_size) { diff --git a/storage/src/vespa/storage/storageserver/mergethrottler.cpp b/storage/src/vespa/storage/storageserver/mergethrottler.cpp index 54a4ddbc780..b99c238f9ab 100644 --- a/storage/src/vespa/storage/storageserver/mergethrottler.cpp +++ b/storage/src/vespa/storage/storageserver/mergethrottler.cpp @@ -419,6 +419,13 @@ MergeThrottler::getNextQueuedMerge() return entry._msg; } +const api::MergeBucketCommand& +MergeThrottler::peek_merge_queue() const noexcept +{ + assert(!_queue.empty()); + return dynamic_cast<const api::MergeBucketCommand&>(*_queue.begin()->_msg); +} + void MergeThrottler::enqueue_merge_for_later_processing( const api::StorageMessage::SP& msg, @@ -549,45 +556,40 @@ MergeThrottler::rejectOutdatedQueuedMerges( // If there's a merge queued and the throttling policy allows for // the merge to be processed, do so. bool -MergeThrottler::attemptProcessNextQueuedMerge( - MessageGuard& msgGuard) +MergeThrottler::attemptProcessNextQueuedMerge(MessageGuard& msgGuard) { - if (!canProcessNewMerge()) { + if (_queue.empty()) { + return false; + } + if ( ! (canProcessNewMerge() && accepting_merge_is_within_memory_limits(peek_merge_queue()))) { // Should never reach a non-sending state when there are // no to-be-replied merges that can trigger a new processing assert(!_merges.empty()); return false; } + // If we get here, there must be something to dequeue. api::StorageMessage::SP msg = getNextQueuedMerge(); - if (msg) { - // In case of resends and whatnot, it's possible for a merge - // command to be in the queue while another higher priority - // command for the same bucket sneaks in front of it and gets - // a slot. Send BUSY in this case to make the distributor retry - // later, at which point the existing merge has hopefully gone - // through and the new one will be effectively a no-op to perform - if (!isMergeAlreadyKnown(msg)) { - LOG(spam, "Processing queued merge %s", msg->toString().c_str()); - processNewMergeCommand(msg, msgGuard); - } else { - vespalib::asciistream oss; - oss << "Queued merge " << msg->toString() << " is out of date; it has already " - "been started by someone else since it was queued"; - LOG(debug, "%s", oss.c_str()); - sendReply(dynamic_cast<const api::MergeBucketCommand&>(*msg), - api::ReturnCode(api::ReturnCode::BUSY, oss.str()), - msgGuard, _metrics->chaining); - } - return true; + assert(msg); + // In case of resends and whatnot, it's possible for a merge + // command to be in the queue while another higher priority + // command for the same bucket sneaks in front of it and gets + // a slot. Send BUSY in this case to make the distributor retry + // later, at which point the existing merge has hopefully gone + // through and the new one will be effectively a no-op to perform + if (!isMergeAlreadyKnown(msg)) { + LOG(spam, "Processing queued merge %s", msg->toString().c_str()); + processNewMergeCommand(msg, msgGuard); } else { - if (_queue.empty()) { - LOG(spam, "Queue empty - no merges to process"); - } else { - LOG(spam, "Merges queued, but throttle policy disallows further merges at this time"); - } + vespalib::asciistream oss; + oss << "Queued merge " << msg->toString() << " is out of date; it has already " + "been started by someone else since it was queued"; + LOG(debug, "%s", oss.c_str()); + sendReply(dynamic_cast<const api::MergeBucketCommand&>(*msg), + api::ReturnCode(api::ReturnCode::BUSY, oss.str()), + msgGuard, _metrics->chaining); } - return false; + return true; } bool diff --git a/storage/src/vespa/storage/storageserver/mergethrottler.h b/storage/src/vespa/storage/storageserver/mergethrottler.h index a5559c159bf..e210a8bfb8b 100644 --- a/storage/src/vespa/storage/storageserver/mergethrottler.h +++ b/storage/src/vespa/storage/storageserver/mergethrottler.h @@ -372,6 +372,7 @@ private: [[nodiscard]] bool allow_merge_despite_full_window(const api::MergeBucketCommand& cmd) const noexcept; [[nodiscard]] bool accepting_merge_is_within_memory_limits(const api::MergeBucketCommand& cmd) const noexcept; [[nodiscard]] bool may_allow_into_queue(const api::MergeBucketCommand& cmd) const noexcept; + [[nodiscard]] const api::MergeBucketCommand& peek_merge_queue() const noexcept; void sendReply(const api::MergeBucketCommand& cmd, const api::ReturnCode& result, |