From f3a6f2dedc0f6c43b7a51d348b9096a676b168c5 Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Thu, 2 May 2024 14:45:56 +0200 Subject: Restart on deploy for local LLMs --- .../model/application/validation/Validation.java | 2 + .../RestartOnDeployForLocalLLMValidator.java | 38 ++++++++++++ .../RestartOnDeployForLocalLLMValidatorTest.java | 69 ++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java create mode 100644 config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java index ed0804f7420..7f624032627 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java @@ -19,6 +19,7 @@ import com.yahoo.vespa.model.application.validation.change.IndexingModeChangeVal import com.yahoo.vespa.model.application.validation.change.NodeResourceChangeValidator; import com.yahoo.vespa.model.application.validation.change.RedundancyIncreaseValidator; import com.yahoo.vespa.model.application.validation.change.ResourcesReductionValidator; +import com.yahoo.vespa.model.application.validation.change.RestartOnDeployForLocalLLMValidator; import com.yahoo.vespa.model.application.validation.change.RestartOnDeployForOnnxModelChangesValidator; import com.yahoo.vespa.model.application.validation.change.StartupCommandChangeValidator; import com.yahoo.vespa.model.application.validation.change.StreamingSearchClusterChangeValidator; @@ -129,6 +130,7 @@ public class Validation { new CertificateRemovalChangeValidator().validate(execution); new RedundancyValidator().validate(execution); new RestartOnDeployForOnnxModelChangesValidator().validate(execution); + new RestartOnDeployForLocalLLMValidator().validate(execution); } public interface Context { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java new file mode 100644 index 00000000000..88cfcfaf67c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java @@ -0,0 +1,38 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.application.validation.change; + +import com.yahoo.vespa.model.application.validation.Validation.ChangeContext; + +import java.util.logging.Logger; + +import static java.util.logging.Level.INFO; + +/** + * If using local LLMs, this validator will make sure that restartOnDeploy is set for + * configs for this cluster. + * + * @author lesters + */ +public class RestartOnDeployForLocalLLMValidator implements ChangeValidator { + + private static final Logger log = Logger.getLogger(RestartOnDeployForLocalLLMValidator.class.getName()); + + @Override + public void validate(ChangeContext context) { + + for (var cluster : context.model().getContainerClusters().values()) { + + // For now, if a local LLM is used, force a restart of the services + // Later, be more sophisticated and only restart if redeploy does not fit in (GPU) memory + cluster.getAllComponents().forEach(component -> { + if (component.getClassId().getName().equals("ai.vespa.llm.clients.LocalLLM")) { + String message = "Restarting services in %s because of local LLM use".formatted(cluster); + log.log(INFO, message); + context.require(new VespaRestartAction(cluster.id(), message)); + } + }); + + } + } + +} diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java new file mode 100644 index 00000000000..30915ad02fc --- /dev/null +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java @@ -0,0 +1,69 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.application.validation.change; + +import com.yahoo.config.model.api.ConfigChangeAction; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.vespa.model.VespaModel; +import com.yahoo.vespa.model.application.validation.ValidationTester; +import com.yahoo.vespa.model.test.utils.VespaModelCreatorWithMockPkg; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * @author lesters + */ +public class RestartOnDeployForLocalLLMValidatorTest { + + @Test + void validate_no_restart_on_deploy() { + VespaModel current = createModelWithComponent("ai.vespa.llm.clients.OpenAI"); + VespaModel next = createModelWithComponent("ai.vespa.llm.clients.OpenAI"); + List result = validateModel(current, next); + assertEquals(0, result.size()); + } + + @Test + void validate_restart_on_deploy() { + VespaModel current = createModelWithComponent("ai.vespa.llm.clients.LocalLLM"); + VespaModel next = createModelWithComponent("ai.vespa.llm.clients.LocalLLM"); + List result = validateModel(current, next); + assertEquals(1, result.size()); + assertTrue(result.get(0).validationId().isEmpty()); + assertEquals("Restarting services in container cluster 'cluster1' because of local LLM use", result.get(0).getMessage()); + } + + private static List validateModel(VespaModel current, VespaModel next) { + return ValidationTester.validateChanges(new RestartOnDeployForLocalLLMValidator(), + next, + deployStateBuilder().previousModel(current).build()); + } + + private static VespaModel createModelWithComponent(String componentClass) { + var xml = """ + + + + + + + + + """.formatted(componentClass); + DeployState.Builder builder = deployStateBuilder(); + return new VespaModelCreatorWithMockPkg(null, xml).create(builder); + } + + private static DeployState.Builder deployStateBuilder() { + return new DeployState.Builder().properties(new TestProperties()); + } + + private static void assertStartsWith(String expected, List result) { + assertTrue(result.get(0).getMessage().startsWith(expected)); + } + +} -- cgit v1.2.3 From 1f22a18d27c99abbf81ce208d5584f58ea5b34ac Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Fri, 3 May 2024 12:22:52 +0200 Subject: Only restart if local LLM is found in both previous and next generation --- .../RestartOnDeployForLocalLLMValidator.java | 41 +++++++++++++++------- .../RestartOnDeployForLocalLLMValidatorTest.java | 13 ++++--- 2 files changed, 37 insertions(+), 17 deletions(-) (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java index 88cfcfaf67c..c9b67ca4240 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java @@ -1,11 +1,17 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.application.validation.change; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.vespa.model.VespaModel; import com.yahoo.vespa.model.application.validation.Validation.ChangeContext; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; +import java.util.HashSet; +import java.util.Set; import java.util.logging.Logger; import static java.util.logging.Level.INFO; +import static java.util.stream.Collectors.toUnmodifiableSet; /** * If using local LLMs, this validator will make sure that restartOnDeploy is set for @@ -15,24 +21,35 @@ import static java.util.logging.Level.INFO; */ public class RestartOnDeployForLocalLLMValidator implements ChangeValidator { + private static final String LOCAL_LLM_COMPONENT = "ai.vespa.llm.clients.LocalLLM"; + private static final Logger log = Logger.getLogger(RestartOnDeployForLocalLLMValidator.class.getName()); @Override public void validate(ChangeContext context) { + var previousClustersWithLocalLLM = findClustersWithLocalLLMs(context.previousModel()); + var nextClustersWithLocalLLM = findClustersWithLocalLLMs(context.model()); + + // Only restart services if we use a local LLM in both the next and previous generation + for (var clusterId : intersect(previousClustersWithLocalLLM, nextClustersWithLocalLLM)) { + String message = "Need to restart services in %s due to use of local LLM".formatted(clusterId); + context.require(new VespaRestartAction(clusterId, message)); + log.log(INFO, message); + } + } - for (var cluster : context.model().getContainerClusters().values()) { - - // For now, if a local LLM is used, force a restart of the services - // Later, be more sophisticated and only restart if redeploy does not fit in (GPU) memory - cluster.getAllComponents().forEach(component -> { - if (component.getClassId().getName().equals("ai.vespa.llm.clients.LocalLLM")) { - String message = "Restarting services in %s because of local LLM use".formatted(cluster); - log.log(INFO, message); - context.require(new VespaRestartAction(cluster.id(), message)); - } - }); + private Set findClustersWithLocalLLMs(VespaModel model) { + return model.getContainerClusters().values().stream() + .filter(cluster -> cluster.getAllComponents().stream() + .anyMatch(component -> component.getClassId().getName().equals(LOCAL_LLM_COMPONENT))) + .map(ApplicationContainerCluster::id) + .collect(toUnmodifiableSet()); + } - } + private Set intersect(Set a, Set b) { + Set result = new HashSet<>(a); + result.retainAll(b); + return result; } } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java index 30915ad02fc..311d4f39fcd 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java @@ -19,22 +19,25 @@ import static org.junit.jupiter.api.Assertions.assertTrue; */ public class RestartOnDeployForLocalLLMValidatorTest { + public static final String OPENAI_LLM_COMPONENT = "ai.vespa.llm.clients.OpenAI"; + public static final String LOCAL_LLM_COMPONENT = "ai.vespa.llm.clients.LocalLLM"; + @Test void validate_no_restart_on_deploy() { - VespaModel current = createModelWithComponent("ai.vespa.llm.clients.OpenAI"); - VespaModel next = createModelWithComponent("ai.vespa.llm.clients.OpenAI"); + VespaModel current = createModelWithComponent(OPENAI_LLM_COMPONENT); + VespaModel next = createModelWithComponent(LOCAL_LLM_COMPONENT); List result = validateModel(current, next); assertEquals(0, result.size()); } @Test void validate_restart_on_deploy() { - VespaModel current = createModelWithComponent("ai.vespa.llm.clients.LocalLLM"); - VespaModel next = createModelWithComponent("ai.vespa.llm.clients.LocalLLM"); + VespaModel current = createModelWithComponent(LOCAL_LLM_COMPONENT); + VespaModel next = createModelWithComponent(LOCAL_LLM_COMPONENT); List result = validateModel(current, next); assertEquals(1, result.size()); assertTrue(result.get(0).validationId().isEmpty()); - assertEquals("Restarting services in container cluster 'cluster1' because of local LLM use", result.get(0).getMessage()); + assertEquals("Need to restart services in cluster 'cluster1' due to use of local LLM", result.get(0).getMessage()); } private static List validateModel(VespaModel current, VespaModel next) { -- cgit v1.2.3 From 0c0868e895c2ad0c1b82c1f57992e68378d1f3b0 Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Fri, 3 May 2024 13:04:05 +0200 Subject: Use class.getName() instead of string --- .../RestartOnDeployForLocalLLMValidator.java | 2 +- .../RestartOnDeployForLocalLLMValidatorTest.java | 25 ++++++++++++++-------- 2 files changed, 17 insertions(+), 10 deletions(-) (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java index c9b67ca4240..ccfc611c3dc 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java @@ -21,7 +21,7 @@ import static java.util.stream.Collectors.toUnmodifiableSet; */ public class RestartOnDeployForLocalLLMValidator implements ChangeValidator { - private static final String LOCAL_LLM_COMPONENT = "ai.vespa.llm.clients.LocalLLM"; + public static final String LOCAL_LLM_COMPONENT = ai.vespa.llm.clients.LocalLLM.class.getName(); private static final Logger log = Logger.getLogger(RestartOnDeployForLocalLLMValidator.class.getName()); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java index 311d4f39fcd..13e91f60712 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidatorTest.java @@ -19,21 +19,20 @@ import static org.junit.jupiter.api.Assertions.assertTrue; */ public class RestartOnDeployForLocalLLMValidatorTest { - public static final String OPENAI_LLM_COMPONENT = "ai.vespa.llm.clients.OpenAI"; - public static final String LOCAL_LLM_COMPONENT = "ai.vespa.llm.clients.LocalLLM"; + private static final String LOCAL_LLM_COMPONENT = RestartOnDeployForLocalLLMValidator.LOCAL_LLM_COMPONENT; @Test void validate_no_restart_on_deploy() { - VespaModel current = createModelWithComponent(OPENAI_LLM_COMPONENT); - VespaModel next = createModelWithComponent(LOCAL_LLM_COMPONENT); + VespaModel current = createModel(); + VespaModel next = createModel(withComponent(LOCAL_LLM_COMPONENT)); List result = validateModel(current, next); assertEquals(0, result.size()); } @Test void validate_restart_on_deploy() { - VespaModel current = createModelWithComponent(LOCAL_LLM_COMPONENT); - VespaModel next = createModelWithComponent(LOCAL_LLM_COMPONENT); + VespaModel current = createModel(withComponent(LOCAL_LLM_COMPONENT)); + VespaModel next = createModel(withComponent(LOCAL_LLM_COMPONENT)); List result = validateModel(current, next); assertEquals(1, result.size()); assertTrue(result.get(0).validationId().isEmpty()); @@ -46,21 +45,29 @@ public class RestartOnDeployForLocalLLMValidatorTest { deployStateBuilder().previousModel(current).build()); } - private static VespaModel createModelWithComponent(String componentClass) { + private static VespaModel createModel(String component) { var xml = """ - + %s - """.formatted(componentClass); + """.formatted(component); DeployState.Builder builder = deployStateBuilder(); return new VespaModelCreatorWithMockPkg(null, xml).create(builder); } + private static VespaModel createModel() { + return createModel(""); + } + + private static String withComponent(String componentClass) { + return "".formatted(componentClass); + } + private static DeployState.Builder deployStateBuilder() { return new DeployState.Builder().properties(new TestProperties()); } -- cgit v1.2.3 From f9902cd7b9b5a7b86f510f6680dc6739c7c3fa82 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 6 May 2024 11:08:29 +0200 Subject: Avoid deprecated methods. --- .../validation/ConstantTensorJsonValidator.java | 20 ++++++------- .../yahoo/search/handler/Json2SingleLevelMap.java | 8 +++--- dependency-versions/pom.xml | 2 +- .../java/com/yahoo/document/json/JsonReader.java | 4 +-- .../com/yahoo/document/json/LazyTokenBuffer.java | 4 +-- .../java/com/yahoo/document/json/TokenBuffer.java | 2 +- .../document/json/document/DocumentParser.java | 13 +++++---- .../vespa/metricsproxy/service/MetricsParser.java | 33 +++++++++++----------- .../importer/lightgbm/LightGBMImporter.java | 4 +-- .../search/predicate/PredicateQueryParser.java | 11 ++++---- .../main/java/ai/vespa/feed/client/JsonFeeder.java | 14 ++++----- .../ai/vespa/feed/client/impl/HttpFeedClient.java | 4 +-- .../test/java/com/yahoo/slime/JsonBenchmark.java | 2 +- 13 files changed, 60 insertions(+), 61 deletions(-) (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java index 40c9a03b126..02a6b243054 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java @@ -132,7 +132,7 @@ public class ConstantTensorJsonValidator { private void consumeTopObject() throws IOException { for (var cur = parser.nextToken(); cur != JsonToken.END_OBJECT; cur = parser.nextToken()) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case FIELD_TYPE -> consumeTypeField(); case FIELD_VALUES -> consumeValuesField(); @@ -189,7 +189,7 @@ public class ConstantTensorJsonValidator { } for (var cur = parser.nextToken(); cur != JsonToken.END_OBJECT; cur = parser.nextToken()) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - validateNumeric(parser.getCurrentName(), parser.nextToken()); + validateNumeric(parser.currentName(), parser.nextToken()); } } @@ -199,7 +199,7 @@ public class ConstantTensorJsonValidator { boolean seenValue = false; for (int i = 0; i < 2; i++) { assertNextTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case FIELD_ADDRESS -> { validateTensorAddress(new HashSet<>(tensorDimensions.keySet())); @@ -228,13 +228,13 @@ public class ConstantTensorJsonValidator { // Iterate within the address key, value pairs while ((parser.nextToken() != JsonToken.END_OBJECT)) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - String dimensionName = parser.getCurrentName(); + String dimensionName = parser.currentName(); TensorType.Dimension dimension = tensorDimensions.get(dimensionName); if (dimension == null) { - throw new InvalidConstantTensorException(parser, String.format("Tensor dimension '%s' does not exist", parser.getCurrentName())); + throw new InvalidConstantTensorException(parser, String.format("Tensor dimension '%s' does not exist", dimensionName)); } if (!cellDimensions.contains(dimensionName)) { - throw new InvalidConstantTensorException(parser, String.format("Duplicate tensor dimension '%s'", parser.getCurrentName())); + throw new InvalidConstantTensorException(parser, String.format("Duplicate tensor dimension '%s'", dimensionName)); } cellDimensions.remove(dimensionName); validateLabel(dimension); @@ -300,7 +300,7 @@ public class ConstantTensorJsonValidator { } private void assertCurrentTokenIs(JsonToken wantedToken) { - assertTokenIs(parser.getCurrentToken(), wantedToken); + assertTokenIs(parser.currentToken(), wantedToken); } private void assertNextTokenIs(JsonToken wantedToken) throws IOException { @@ -316,11 +316,11 @@ public class ConstantTensorJsonValidator { static class InvalidConstantTensorException extends IllegalArgumentException { InvalidConstantTensorException(JsonParser parser, String message) { - super(message + " " + parser.getCurrentLocation().toString()); + super(message + " " + parser.currentLocation().toString()); } InvalidConstantTensorException(JsonParser parser, Exception base) { - super("Failed to parse JSON stream " + parser.getCurrentLocation().toString(), base); + super("Failed to parse JSON stream " + parser.currentLocation().toString(), base); } InvalidConstantTensorException(IOException base) { @@ -412,7 +412,7 @@ public class ConstantTensorJsonValidator { boolean seenValues = false; for (int i = 0; i < 2; i++) { assertNextTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case FIELD_ADDRESS -> { validateTensorAddress(new HashSet<>(mappedDims)); diff --git a/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java b/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java index 01167be6b8b..fdedbdc2fd9 100644 --- a/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java +++ b/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java @@ -64,8 +64,8 @@ class Json2SingleLevelMap { } void parse(Map map, String parent) throws IOException { - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parent + parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parent + parser.currentName(); JsonToken token = parser.nextToken(); if ((token == JsonToken.VALUE_STRING) || (token == JsonToken.VALUE_NUMBER_FLOAT) || @@ -89,9 +89,9 @@ class Json2SingleLevelMap { } private String skipChildren(JsonParser parser, byte [] input) throws IOException { - JsonLocation start = parser.getCurrentLocation(); + JsonLocation start = parser.currentLocation(); parser.skipChildren(); - JsonLocation end = parser.getCurrentLocation(); + JsonLocation end = parser.currentLocation(); int offset = (int)start.getByteOffset() - 1; return new String(input, offset, (int)(end.getByteOffset() - offset), StandardCharsets.UTF_8); } diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index f19ce87cc42..0d62fe138bc 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -37,7 +37,7 @@ 33.2.0-jre 6.0.0 3.0.0 - 2.17.0 + 2.17.1 ${jackson2.vespa.version} 2.0.1 1.2.0 diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java index 358c0cb65e4..9c621c033bd 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonReader.java +++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java @@ -105,7 +105,7 @@ public class JsonReader { String condition = null; ParsedDocumentOperation operation = null; while (JsonToken.END_OBJECT != parser.nextValue()) { - switch (parser.getCurrentName()) { + switch (parser.currentName()) { case FIELDS -> { documentParseInfo.fieldsBuffer = new LazyTokenBuffer(parser); VespaJsonDocumentReader vespaJsonDocumentReader = new VespaJsonDocumentReader(typeManager.getIgnoreUndefinedFields()); @@ -177,7 +177,7 @@ public class JsonReader { state = END_OF_FEED; throw new IllegalArgumentException(r); } - if ( ! documentParseInfo.isPresent()) { + if (documentParseInfo.isEmpty()) { state = END_OF_FEED; return null; } diff --git a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java index 0fbdd0b28c7..53ddacf6cc3 100644 --- a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java @@ -33,7 +33,7 @@ public class LazyTokenBuffer extends TokenBuffer { public Supplier lookahead() { return new Supplier<>() { int localNesting = nesting(); - Supplier buffered = LazyTokenBuffer.super.lookahead(); + final Supplier buffered = LazyTokenBuffer.super.lookahead(); @Override public Token get() { if (localNesting == 0) return null; @@ -54,7 +54,7 @@ public class LazyTokenBuffer extends TokenBuffer { JsonToken token = parser.nextValue(); if (token == null) throw new IllegalStateException("no more JSON tokens"); - return new Token(token, parser.getCurrentName(), parser.getText()); + return new Token(token, parser.currentName(), parser.getText()); } catch (IOException e) { throw new IllegalArgumentException("failed reading document JSON", e); diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java index 3a48f71c4cd..c5c022370bf 100644 --- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java @@ -99,7 +99,7 @@ public class TokenBuffer { } int addFromParser(JsonParser tokens) throws IOException { - add(tokens.currentToken(), tokens.getCurrentName(), tokens.getText()); + add(tokens.currentToken(), tokens.currentName(), tokens.getText()); return nestingOffset(tokens.currentToken()); } diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java index 77e11dcf2a8..c5bcd356c94 100644 --- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java +++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java @@ -61,7 +61,7 @@ public class DocumentParser { private boolean parseOneItem(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally) throws IOException { parser.nextValue(); processIndent(); - if (parser.getCurrentName() == null) return false; + if (parser.currentName() == null) return false; if (indentLevel == 1L) { handleIdentLevelOne(documentParseInfo, docIdAndOperationIsSetExternally); } else if (indentLevel == 2L) { @@ -85,17 +85,18 @@ public class DocumentParser { private void handleIdentLevelOne(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally) throws IOException { - JsonToken currentToken = parser.getCurrentToken(); + JsonToken currentToken = parser.currentToken(); + String currentName = parser.currentName(); if ((currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) && - CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { + CREATE_IF_NON_EXISTENT.equals(currentName)) { documentParseInfo.create = Optional.of(currentToken == JsonToken.VALUE_TRUE); - } else if (currentToken == JsonToken.VALUE_STRING && CONDITION.equals(parser.getCurrentName())) { + } else if (currentToken == JsonToken.VALUE_STRING && CONDITION.equals(currentName)) { documentParseInfo.condition = Optional.of(parser.getText()); } else if (currentToken == JsonToken.VALUE_STRING) { // Value is expected to be set in the header not in the document. Ignore any unknown field // as well. if (! docIdAndOperationIsSetExternally) { - documentParseInfo.operationType = operationNameToOperationType(parser.getCurrentName()); + documentParseInfo.operationType = operationNameToOperationType(currentName); documentParseInfo.documentId = new DocumentId(parser.getText()); } } @@ -104,7 +105,7 @@ public class DocumentParser { private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { try { // "fields" opens a dictionary and is therefore on level two which might be surprising. - if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { + if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.currentName())) { documentParseInfo.fieldsBuffer.bufferObject(parser); processIndent(); } diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java index 0e33d7dbf2f..052b8425a45 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java @@ -53,8 +53,8 @@ public class MetricsParser { throw new IOException("Expected start of object, got " + parser.currentToken()); } - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.currentName(); JsonToken token = parser.nextToken(); if (fieldName.equals("metrics")) { parseMetrics(parser, consumer); @@ -67,12 +67,12 @@ public class MetricsParser { } static private Instant parseSnapshot(JsonParser parser) throws IOException { - if (parser.getCurrentToken() != JsonToken.START_OBJECT) { + if (parser.currentToken() != JsonToken.START_OBJECT) { throw new IOException("Expected start of 'snapshot' object, got " + parser.currentToken()); } Instant timestamp = Instant.now(); - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.currentName(); JsonToken token = parser.nextToken(); if (fieldName.equals("to")) { timestamp = Instant.ofEpochSecond(parser.getLongValue()); @@ -88,12 +88,12 @@ public class MetricsParser { // 'metrics' object with 'snapshot' and 'values' arrays static private void parseMetrics(JsonParser parser, Collector consumer) throws IOException { - if (parser.getCurrentToken() != JsonToken.START_OBJECT) { + if (parser.currentToken() != JsonToken.START_OBJECT) { throw new IOException("Expected start of 'metrics' object, got " + parser.currentToken()); } Instant timestamp = Instant.now(); - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.currentName(); JsonToken token = parser.nextToken(); if (fieldName.equals("snapshot")) { timestamp = parseSnapshot(parser); @@ -109,7 +109,7 @@ public class MetricsParser { // 'values' array static private void parseMetricValues(JsonParser parser, Instant timestamp, Collector consumer) throws IOException { - if (parser.getCurrentToken() != JsonToken.START_ARRAY) { + if (parser.currentToken() != JsonToken.START_ARRAY) { throw new IOException("Expected start of 'metrics:values' array, got " + parser.currentToken()); } @@ -126,8 +126,8 @@ public class MetricsParser { String description = ""; Map dim = Map.of(); List> values = List.of(); - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.currentName(); JsonToken token = parser.nextToken(); switch (fieldName) { case "name" -> name = parser.getText(); @@ -154,8 +154,8 @@ public class MetricsParser { Set dimensions = new HashSet<>(); - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.currentName(); JsonToken token = parser.nextToken(); if (token == JsonToken.VALUE_STRING){ @@ -180,17 +180,16 @@ public class MetricsParser { private static List> parseValues(JsonParser parser) throws IOException { List> metrics = new ArrayList<>(); - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String metricName = parser.currentName(); JsonToken token = parser.nextToken(); - String metricName = fieldName; if (token == JsonToken.VALUE_NUMBER_INT) { metrics.add(Map.entry(metricName, parser.getLongValue())); } else if (token == JsonToken.VALUE_NUMBER_FLOAT) { double value = parser.getValueAsDouble(); metrics.add(Map.entry(metricName, value == ZERO_DOUBLE ? ZERO_DOUBLE : value)); } else { - throw new IllegalArgumentException("Value for aggregator '" + fieldName + "' is not a number"); + throw new IllegalArgumentException("Value for aggregator '" + metricName + "' is not a number"); } } return metrics; diff --git a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java index e1d2f8802a6..6a1e2f2562a 100644 --- a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java +++ b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java @@ -34,9 +34,9 @@ public class LightGBMImporter extends ModelImporter { private boolean probe(File modelFile) { try (JsonParser parser = Jackson.mapper().createParser(modelFile)) { while (parser.nextToken() != null) { - JsonToken token = parser.getCurrentToken(); + JsonToken token = parser.currentToken(); if (token == JsonToken.FIELD_NAME) { - if ("tree_info".equals(parser.getCurrentName())) return true; + if ("tree_info".equals(parser.currentName())) return true; } } return false; diff --git a/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java b/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java index 09487506ffe..42b6195549e 100644 --- a/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java +++ b/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java @@ -10,7 +10,6 @@ import java.util.Arrays; /** * Parses predicate queries from JSON. - * * Input JSON is assumed to have the following format: * { * "features": [ @@ -46,7 +45,7 @@ public class PredicateQueryParser { try (JsonParser parser = factory.createParser(json)) { skipToken(parser, JsonToken.START_OBJECT); while (parser.nextToken() != JsonToken.END_OBJECT) { - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case "features": parseFeatures(parser, JsonParser::getText, featureHandler); @@ -82,7 +81,7 @@ public class PredicateQueryParser { long subqueryBitmap = SubqueryBitmap.DEFAULT_VALUE; // Specifying subquery bitmap is optional. while (parser.nextToken() != JsonToken.END_OBJECT) { - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); skipToken(parser, JsonToken.VALUE_STRING, JsonToken.VALUE_NUMBER_INT); switch (fieldName) { case "k": @@ -100,11 +99,11 @@ public class PredicateQueryParser { } if (key == null) { throw new IllegalArgumentException( - String.format("Feature key is missing! (%s)", parser.getCurrentLocation())); + String.format("Feature key is missing! (%s)", parser.currentLocation())); } if (value == null) { throw new IllegalArgumentException( - String.format("Feature value is missing! (%s)", parser.getCurrentLocation())); + String.format("Feature value is missing! (%s)", parser.currentLocation())); } featureHandler.accept(key, value, subqueryBitmap); } @@ -114,7 +113,7 @@ public class PredicateQueryParser { if (Arrays.stream(expected).noneMatch(e -> e.equals(actual))) { throw new IllegalArgumentException( String.format("Expected a token in %s, got %s (%s).", - Arrays.toString(expected), actual, parser.getTokenLocation())); + Arrays.toString(expected), actual, parser.currentTokenLocation())); } } diff --git a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java index 11fb6526210..3111815b332 100644 --- a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java +++ b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java @@ -414,7 +414,7 @@ public class JsonFeeder implements Closeable { abstract String getDocumentJson(long start, long end); OperationParseException parseException(String error) { - JsonLocation location = parser.getTokenLocation(); + JsonLocation location = parser.currentLocation(); return new OperationParseException(error + " at offset " + location.getByteOffset() + " (line " + location.getLineNr() + ", column " + location.getColumnNr() + ")"); } @@ -444,13 +444,13 @@ public class JsonFeeder implements Closeable { case "create": parameters = parameters.createIfNonExistent(readBoolean()); break; case "fields": { expect(START_OBJECT); - start = parser.getTokenLocation().getByteOffset(); + start = parser.currentTokenLocation().getByteOffset(); int depth = 1; while (depth > 0) switch (parser.nextToken()) { case START_OBJECT: ++depth; break; case END_OBJECT: --depth; break; } - end = parser.getTokenLocation().getByteOffset() + 1; + end = parser.currentTokenLocation().getByteOffset() + 1; break; } default: throw parseException("Unexpected field name '" + parser.getText() + "'"); @@ -470,7 +470,7 @@ public class JsonFeeder implements Closeable { if (end >= start) throw parseException("Illegal 'fields' object for remove operation"); else - start = end = parser.getTokenLocation().getByteOffset(); // getDocumentJson advances buffer overwrite head. + start = end = parser.currentTokenLocation().getByteOffset(); // getDocumentJson advances buffer overwrite head. } else if (end < start) throw parseException("No 'fields' object for document"); @@ -486,14 +486,14 @@ public class JsonFeeder implements Closeable { private void expect(JsonToken token) throws IOException { if (parser.nextToken() != token) - throw new OperationParseException("Expected '" + token + "' at offset " + parser.getTokenLocation().getByteOffset() + + throw new OperationParseException("Expected '" + token + "' at offset " + parser.currentTokenLocation().getByteOffset() + ", but found '" + parser.currentToken() + "' (" + parser.getText() + ")"); } private String readString() throws IOException { String value = parser.nextTextValue(); if (value == null) - throw new OperationParseException("Expected '" + JsonToken.VALUE_STRING + "' at offset " + parser.getTokenLocation().getByteOffset() + + throw new OperationParseException("Expected '" + JsonToken.VALUE_STRING + "' at offset " + parser.currentTokenLocation().getByteOffset() + ", but found '" + parser.currentToken() + "' (" + parser.getText() + ")"); return value; @@ -502,7 +502,7 @@ public class JsonFeeder implements Closeable { private boolean readBoolean() throws IOException { Boolean value = parser.nextBooleanValue(); if (value == null) - throw new OperationParseException("Expected '" + JsonToken.VALUE_FALSE + "' or '" + JsonToken.VALUE_TRUE + "' at offset " + parser.getTokenLocation().getByteOffset() + + throw new OperationParseException("Expected '" + JsonToken.VALUE_FALSE + "' or '" + JsonToken.VALUE_TRUE + "' at offset " + parser.currentTokenLocation().getByteOffset() + ", but found '" + parser.currentToken() + "' (" + parser.getText() + ")"); return value; diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java index a30cfd5ec39..9dd11113c0b 100644 --- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java +++ b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java @@ -219,13 +219,13 @@ class HttpFeedClient implements FeedClient { throw new ResultParseException(documentId, "Expected 'trace' to be an array, but got '" + parser.currentToken() + "' in: " + new String(json, UTF_8)); - int start = (int) parser.getTokenLocation().getByteOffset(); + int start = (int) parser.currentTokenLocation().getByteOffset(); int depth = 1; while (depth > 0) switch (parser.nextToken()) { case START_ARRAY: ++depth; break; case END_ARRAY: --depth; break; } - int end = (int) parser.getTokenLocation().getByteOffset() + 1; + int end = (int) parser.currentTokenLocation().getByteOffset() + 1; trace = new String(json, start, end - start, UTF_8); break; default: diff --git a/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java b/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java index ee755a44010..cccc9667e11 100644 --- a/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java +++ b/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java @@ -43,7 +43,7 @@ public class JsonBenchmark { try (JsonParser jsonParser = jsonFactory.createParser(json)) { JsonToken array = jsonParser.nextToken(); for (JsonToken token = jsonParser.nextToken(); !JsonToken.END_ARRAY.equals(token); token = jsonParser.nextToken()) { - if (JsonToken.FIELD_NAME.equals(token) && "weight".equals(jsonParser.getCurrentName())) { + if (JsonToken.FIELD_NAME.equals(token) && "weight".equals(jsonParser.currentName())) { token = jsonParser.nextToken(); count += jsonParser.getLongValue(); } -- cgit v1.2.3 From 633782b31fed623d6d2f2a48d8bebbc5b9840be0 Mon Sep 17 00:00:00 2001 From: Bjørn Christian Seime Date: Mon, 6 May 2024 11:25:02 +0200 Subject: Configure signficance searcher through `SchemaInfo` --- .../main/java/com/yahoo/schema/RankProfile.java | 5 +-- .../com/yahoo/schema/derived/RawRankProfile.java | 5 --- .../java/com/yahoo/schema/derived/SchemaInfo.java | 13 ++++--- .../schema-info.cfg | 2 ++ .../neuralnet_noqueryprofile/schema-info.cfg | 4 +++ .../test/derived/rankingexpression/schema-info.cfg | 29 ++++++++++++++++ .../derived/rankprofilemodularity/schema-info.cfg | 8 +++++ .../test/derived/schemainheritance/schema-info.cfg | 4 +++ container-search/abi-spec.json | 2 ++ .../java/com/yahoo/search/schema/RankProfile.java | 11 +++++- .../search/significance/SignificanceSearcher.java | 40 ++++++++++++++-------- .../container.search.schema-info.def | 1 + .../test/SignificanceSearcherTest.java | 22 ++++++------ .../config/with_significance/rank-profiles.cfg | 3 -- 14 files changed, 107 insertions(+), 42 deletions(-) delete mode 100644 container-search/src/test/resources/config/with_significance/rank-profiles.cfg (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/schema/RankProfile.java b/config-model/src/main/java/com/yahoo/schema/RankProfile.java index cdefbbf8174..60674b5487c 100644 --- a/config-model/src/main/java/com/yahoo/schema/RankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/RankProfile.java @@ -222,9 +222,10 @@ public class RankProfile implements Cloneable { this.useSignificanceModel = useSignificanceModel; } - public Boolean useSignificanceModel() { + public boolean useSignificanceModel() { if (useSignificanceModel != null) return useSignificanceModel; - return uniquelyInherited(p -> p.useSignificanceModel(), "use-model").orElse(null); + return uniquelyInherited(p -> p.useSignificanceModel(), "use-model") + .orElse(false); // Disabled by default } /** diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java index 42586fa7d75..b057624f055 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java @@ -186,7 +186,6 @@ public class RawRankProfile { private RankingExpression globalPhaseRanking; private final int globalPhaseRerankCount; private final SerializationContext functionSerializationContext; - private Boolean useSignificanceModel; /** * Creates a raw rank profile from the given rank profile @@ -222,7 +221,6 @@ public class RawRankProfile { rankScoreDropLimit = compiled.getRankScoreDropLimit(); ignoreDefaultRankFeatures = compiled.getIgnoreDefaultRankFeatures(); rankProperties = new ArrayList<>(compiled.getRankProperties()); - useSignificanceModel = compiled.useSignificanceModel(); Map functions = compiled.getFunctions(); List functionExpressions = functions.values().stream().map(RankProfile.RankingExpressionFunction::function).toList(); @@ -481,9 +479,6 @@ public class RawRankProfile { if (targetHitsMaxAdjustmentFactor.isPresent()) { properties.add(new Pair<>("vespa.matching.nns.target_hits_max_adjustment_factor", String.valueOf(targetHitsMaxAdjustmentFactor.getAsDouble()))); } - if (useSignificanceModel != null) { - properties.add(new Pair<>("vespa.significance.use_model", String.valueOf(useSignificanceModel))); - } if (matchPhaseSettings != null) { properties.add(new Pair<>("vespa.matchphase.degradation.attribute", matchPhaseSettings.getAttribute())); properties.add(new Pair<>("vespa.matchphase.degradation.ascendingorder", matchPhaseSettings.getAscending() + "")); diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java index f996b2624db..b91404be2dd 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java @@ -183,10 +183,12 @@ public final class SchemaInfo extends Derived { private void addRankProfilesConfig(SchemaInfoConfig.Schema.Builder schemaBuilder) { for (RankProfileInfo rankProfile : rankProfiles().values()) { - var rankProfileConfig = new SchemaInfoConfig.Schema.Rankprofile.Builder(); - rankProfileConfig.name(rankProfile.name()); - rankProfileConfig.hasSummaryFeatures(rankProfile.hasSummaryFeatures()); - rankProfileConfig.hasRankFeatures(rankProfile.hasRankFeatures()); + var rankProfileConfig = new SchemaInfoConfig.Schema.Rankprofile.Builder() + .name(rankProfile.name()) + .hasSummaryFeatures(rankProfile.hasSummaryFeatures()) + .hasRankFeatures(rankProfile.hasRankFeatures()) + .significance(new SchemaInfoConfig.Schema.Rankprofile.Significance.Builder() + .useModel(rankProfile.useSignificanceModel())); for (var input : rankProfile.inputs().entrySet()) { var inputConfig = new SchemaInfoConfig.Schema.Rankprofile.Input.Builder(); inputConfig.name(input.getKey().toString()); @@ -226,6 +228,7 @@ public final class SchemaInfo extends Derived { private final String name; private final boolean hasSummaryFeatures; private final boolean hasRankFeatures; + private final boolean useSignificanceModel; private final Map inputs; public RankProfileInfo(RankProfile profile) { @@ -233,11 +236,13 @@ public final class SchemaInfo extends Derived { this.hasSummaryFeatures = ! profile.getSummaryFeatures().isEmpty(); this.hasRankFeatures = ! profile.getRankFeatures().isEmpty(); this.inputs = profile.inputs(); + useSignificanceModel = profile.useSignificanceModel(); } public String name() { return name; } public boolean hasSummaryFeatures() { return hasSummaryFeatures; } public boolean hasRankFeatures() { return hasRankFeatures; } + public boolean useSignificanceModel() { return useSignificanceModel; } public Map inputs() { return inputs; } } diff --git a/config-model/src/test/derived/imported_position_field_summary/schema-info.cfg b/config-model/src/test/derived/imported_position_field_summary/schema-info.cfg index f820ad9720b..5a474f62e07 100644 --- a/config-model/src/test/derived/imported_position_field_summary/schema-info.cfg +++ b/config-model/src/test/derived/imported_position_field_summary/schema-info.cfg @@ -53,6 +53,8 @@ schema[].summaryclass[].fields[].dynamic false schema[].rankprofile[].name "default" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "unranked" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false diff --git a/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg b/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg index 728856abbf2..8f59c21e97f 100644 --- a/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg +++ b/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg @@ -156,6 +156,7 @@ schema[].summaryclass[].fields[].dynamic false schema[].rankprofile[].name "default" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(W_0)" schema[].rankprofile[].input[].type "tensor(hidden[9],x[9])" schema[].rankprofile[].input[].name "query(b_0)" @@ -173,9 +174,11 @@ schema[].rankprofile[].input[].type "tensor()" schema[].rankprofile[].name "unranked" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "defaultRankProfile" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(W_0)" schema[].rankprofile[].input[].type "tensor(hidden[9],x[9])" schema[].rankprofile[].input[].name "query(b_0)" @@ -193,6 +196,7 @@ schema[].rankprofile[].input[].type "tensor()" schema[].rankprofile[].name "neuralNetworkProfile" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(W_0)" schema[].rankprofile[].input[].type "tensor(hidden[9],x[9])" schema[].rankprofile[].input[].name "query(b_0)" diff --git a/config-model/src/test/derived/rankingexpression/schema-info.cfg b/config-model/src/test/derived/rankingexpression/schema-info.cfg index 5bf01f10ede..f78eb7de310 100644 --- a/config-model/src/test/derived/rankingexpression/schema-info.cfg +++ b/config-model/src/test/derived/rankingexpression/schema-info.cfg @@ -148,96 +148,125 @@ schema[].summaryclass[].fields[].dynamic false schema[].rankprofile[].name "default" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures true +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "unranked" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "static" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "overflow" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "duplicates" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "whitespace1" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "whitespace2" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros2" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros3" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros3-inherited" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-inherited" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-inherited2" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-inherited3" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-refering-macros" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-refering-macros-inherited" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-refering-macros-inherited2" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-refering-macros-inherited-two-levels" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "withmf" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "withboolean" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "withglobalphase" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "layered" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(v)" schema[].rankprofile[].input[].type "tensor(v[3])" schema[].rankprofile[].name "withtfl" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(v)" schema[].rankprofile[].input[].type "tensor(v[3])" schema[].rankprofile[].name "withtfl2" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(v)" schema[].rankprofile[].input[].type "tensor(v[3])" schema[].rankprofile[].name "withnorm" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "withfusion" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "function-with-arg-as-summary-feature" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "function-with-arg-in-global-phase" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "withstringcompare" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(myquerystring)" schema[].rankprofile[].input[].type "string" schema[].rankprofile[].input[].name "query(mybadlong)" diff --git a/config-model/src/test/derived/rankprofilemodularity/schema-info.cfg b/config-model/src/test/derived/rankprofilemodularity/schema-info.cfg index 377c10d3293..68892737e63 100644 --- a/config-model/src/test/derived/rankprofilemodularity/schema-info.cfg +++ b/config-model/src/test/derived/rankprofilemodularity/schema-info.cfg @@ -18,24 +18,32 @@ schema[].summaryclass[].fields[].dynamic false schema[].rankprofile[].name "default" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "unranked" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "in_schema0" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "in_schema1" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "in_schema2" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "in_schema3" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "outside_schema1" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "outside_schema2" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false diff --git a/config-model/src/test/derived/schemainheritance/schema-info.cfg b/config-model/src/test/derived/schemainheritance/schema-info.cfg index 9fe71780c7a..466e66ad0bb 100644 --- a/config-model/src/test/derived/schemainheritance/schema-info.cfg +++ b/config-model/src/test/derived/schemainheritance/schema-info.cfg @@ -116,12 +116,16 @@ schema[].summaryclass[].fields[].dynamic false schema[].rankprofile[].name "default" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "unranked" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "child_profile" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "parent_profile" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index d85f1844b18..1c6c773afd9 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -8539,6 +8539,7 @@ "public com.yahoo.search.schema.RankProfile$Builder setHasSummaryFeatures(boolean)", "public com.yahoo.search.schema.RankProfile$Builder setHasRankFeatures(boolean)", "public com.yahoo.search.schema.RankProfile$Builder addInput(java.lang.String, com.yahoo.search.schema.RankProfile$InputType)", + "public com.yahoo.search.schema.RankProfile$Builder setUseSignificanceModel(boolean)", "public com.yahoo.search.schema.RankProfile build()" ], "fields" : [ ] @@ -8573,6 +8574,7 @@ "public com.yahoo.search.schema.Schema schema()", "public boolean hasSummaryFeatures()", "public boolean hasRankFeatures()", + "public boolean useSignificanceModel()", "public java.util.Map inputs()", "public boolean equals(java.lang.Object)", "public int hashCode()", diff --git a/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java b/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java index a5b8d328a7a..9583e9885e7 100644 --- a/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java +++ b/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java @@ -36,6 +36,7 @@ public class RankProfile { private final String name; private final boolean hasSummaryFeatures; private final boolean hasRankFeatures; + private final boolean useSignificanceModel; private final Map inputs; // Assigned when this is added to a schema @@ -45,6 +46,7 @@ public class RankProfile { this.name = builder.name; this.hasSummaryFeatures = builder.hasSummaryFeatures; this.hasRankFeatures = builder.hasRankFeatures; + this.useSignificanceModel = builder.useSignificanceModel; this.inputs = Collections.unmodifiableMap(builder.inputs); } @@ -66,6 +68,9 @@ public class RankProfile { /** Returns true if this rank profile has rank features. */ public boolean hasRankFeatures() { return hasRankFeatures; } + /** Returns true if this rank profile should use significance models. */ + public boolean useSignificanceModel() { return useSignificanceModel; } + /** Returns the inputs explicitly declared in this rank profile. */ public Map inputs() { return inputs; } @@ -76,13 +81,14 @@ public class RankProfile { if ( ! other.name.equals(this.name)) return false; if ( other.hasSummaryFeatures != this.hasSummaryFeatures) return false; if ( other.hasRankFeatures != this.hasRankFeatures) return false; + if ( other.useSignificanceModel != this.useSignificanceModel) return false; if ( ! other.inputs.equals(this.inputs)) return false; return true; } @Override public int hashCode() { - return Objects.hash(name, hasSummaryFeatures, hasRankFeatures, inputs); + return Objects.hash(name, hasSummaryFeatures, hasRankFeatures, useSignificanceModel, inputs); } @Override @@ -95,6 +101,7 @@ public class RankProfile { private final String name; private boolean hasSummaryFeatures = true; private boolean hasRankFeatures = true; + private boolean useSignificanceModel = false; private final Map inputs = new LinkedHashMap<>(); public Builder(String name) { @@ -116,6 +123,8 @@ public class RankProfile { return this; } + public Builder setUseSignificanceModel(boolean use) { this.useSignificanceModel = use; return this; } + public RankProfile build() { return new RankProfile(this); } diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java index 6cef576f967..d2e48538eee 100644 --- a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java @@ -15,11 +15,17 @@ import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.Searcher; import com.yahoo.search.query.Ranking; +import com.yahoo.search.schema.RankProfile; +import com.yahoo.search.schema.Schema; +import com.yahoo.search.schema.SchemaInfo; import com.yahoo.search.searchchain.Execution; import com.yahoo.vespa.config.search.RankProfilesConfig; +import java.util.Collection; import java.util.HashMap; +import java.util.Objects; import java.util.Optional; +import java.util.logging.Logger; import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; @@ -34,30 +40,34 @@ import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; public class SignificanceSearcher extends Searcher { public final static String SIGNIFICANCE = "Significance"; - private final SignificanceModelRegistry significanceModelRegistry; - private final RankProfilesConfig rankProfilesConfig; - private final HashMap useModel = new HashMap<>(); + private static final Logger log = Logger.getLogger(SignificanceSearcher.class.getName()); + private final SignificanceModelRegistry significanceModelRegistry; + private final SchemaInfo schemaInfo; @Inject - public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry, RankProfilesConfig rankProfilesConfig) { + public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry, SchemaInfo schemaInfo) { this.significanceModelRegistry = significanceModelRegistry; - this.rankProfilesConfig = rankProfilesConfig; - - for (RankProfilesConfig.Rankprofile profile : rankProfilesConfig.rankprofile()) { - for (RankProfilesConfig.Rankprofile.Fef.Property property : profile.fef().property()) { - if (property.name().equals("vespa.significance.use_model")) { - useModel.put(profile.name(), Boolean.parseBoolean(property.value())); - } - } - } + this.schemaInfo = schemaInfo; } @Override public Result search(Query query, Execution execution) { - Ranking ranking = query.getRanking(); - if (!useModel.containsKey(ranking.getProfile()) || !useModel.get(ranking.getProfile())) return execution.search(query); + var rankProfileName = query.getRanking().getProfile(); + var schemas = schemaInfo.newSession(query).schemas(); + var useSignficanceConfiguration = schemas.stream() + .map(schema -> schema.rankProfiles().get(rankProfileName)) + .filter(Objects::nonNull) + .map(RankProfile::useSignificanceModel) + .distinct().toList(); + if (useSignficanceConfiguration.size() != 1) { + log.fine(() -> "Inconsistent 'signficance.use-model' configuration for rank profile '%s' for schemas %s. Fallback to disabled" + .formatted(rankProfileName, schemas.stream().map(Schema::name).toList())); + return execution.search(query); + } + + if (!useSignficanceConfiguration.get(0)) return execution.search(query); Language language = query.getModel().getParsingLanguage(); Optional model = significanceModelRegistry.getModel(language); diff --git a/container-search/src/main/resources/configdefinitions/container.search.schema-info.def b/container-search/src/main/resources/configdefinitions/container.search.schema-info.def index 989fbb16973..086b47f5ae5 100644 --- a/container-search/src/main/resources/configdefinitions/container.search.schema-info.def +++ b/container-search/src/main/resources/configdefinitions/container.search.schema-info.def @@ -28,6 +28,7 @@ schema[].summaryclass[].fields[].dynamic bool default=false schema[].rankprofile[].name string schema[].rankprofile[].hasSummaryFeatures bool default=true schema[].rankprofile[].hasRankFeatures bool default=true +schema[].rankprofile[].significance.useModel bool default=false # The name of an input (query rank feature) accepted by this profile schema[].rankprofile[].input[].name string # The tensor type of an input (query rank feature) accepted by this profile diff --git a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java index ed67798b4b1..abda1df473f 100644 --- a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java +++ b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java @@ -11,6 +11,10 @@ import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.WordItem; import com.yahoo.search.Query; import com.yahoo.search.Result; +import com.yahoo.search.schema.DocumentSummary; +import com.yahoo.search.schema.RankProfile; +import com.yahoo.search.schema.Schema; +import com.yahoo.search.schema.SchemaInfo; import com.yahoo.search.searchchain.Execution; import com.yahoo.search.significance.SignificanceSearcher; import com.yahoo.vespa.config.search.RankProfilesConfig; @@ -33,24 +37,18 @@ public class SignificanceSearcherTest { SignificanceModelRegistry significanceModelRegistry; SignificanceSearcher searcher; - private static final String CONFIG_DIR = "src/test/resources/config/"; public SignificanceSearcherTest() { List models = new ArrayList<>(); models.add( Path.of("src/test/java/com/yahoo/search/significance/model/en.json")); - RankProfilesConfig rpCfg = readConfig("with_significance"); - - assertEquals(1, rpCfg.rankprofile().size()); - + var schema = new Schema.Builder("music") + .add(new DocumentSummary.Builder("default").build()) + .add(new RankProfile.Builder("significance-ranking") + .setUseSignificanceModel(true) + .build()); significanceModelRegistry = new DefaultSignificanceModelRegistry(models); - searcher = new SignificanceSearcher(significanceModelRegistry, rpCfg); - } - - @SuppressWarnings("deprecation") - private RankProfilesConfig readConfig(String subDir) { - String cfgId = "file:" + CONFIG_DIR + subDir + "/rank-profiles.cfg"; - return ConfigGetter.getConfig(RankProfilesConfig.class, cfgId); + searcher = new SignificanceSearcher(significanceModelRegistry, new SchemaInfo(List.of(schema.build()), List.of())); } private Execution createExecution(SignificanceSearcher searcher) { diff --git a/container-search/src/test/resources/config/with_significance/rank-profiles.cfg b/container-search/src/test/resources/config/with_significance/rank-profiles.cfg deleted file mode 100644 index 1dc1be62862..00000000000 --- a/container-search/src/test/resources/config/with_significance/rank-profiles.cfg +++ /dev/null @@ -1,3 +0,0 @@ -rankprofile[0].name "significance-ranking" -rankprofile[0].fef.property[0].name "vespa.significance.use_model" -rankprofile[0].fef.property[0].value "true" \ No newline at end of file -- cgit v1.2.3 From 2a08cdf582e5bbd361863e8e7bf27048e8b00389 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 6 May 2024 12:37:44 +0200 Subject: Rename max token length to max tokenize length in linguistics annotator config. --- config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java | 2 +- docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java | 2 +- .../yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java | 2 +- .../com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java | 4 ++-- indexinglanguage/src/main/javacc/IndexingParser.jj | 2 +- .../indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java index e29f683761f..1ff019038fc 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java @@ -64,7 +64,7 @@ public class TextMatch extends Processor { if (fieldMatching != null) { var maxLength = fieldMatching.maxLength(); if (maxLength != null) { - ret.setMaxTokenLength(maxLength); + ret.setMaxTokenizeLength(maxLength); } var maxTermOccurrences = fieldMatching.maxTermOccurrences(); if (maxTermOccurrences != null) { diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java index 86b0a2e78ad..3088083912b 100644 --- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java +++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java @@ -72,7 +72,7 @@ public class ScriptManager { Map> documentFieldScripts = new HashMap<>(config.ilscript().size()); ScriptParserContext parserContext = new ScriptParserContext(linguistics, embedders); parserContext.getAnnotatorConfig().setMaxTermOccurrences(config.maxtermoccurrences()); - parserContext.getAnnotatorConfig().setMaxTokenLength(config.fieldmatchmaxlength()); + parserContext.getAnnotatorConfig().setMaxTokenizeLength(config.fieldmatchmaxlength()); for (IlscriptsConfig.Ilscript ilscript : config.ilscript()) { DocumentType documentType = docTypeMgr.getDocumentType(ilscript.doctype()); diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java index b807ad4cb65..849bc075a64 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java @@ -66,7 +66,7 @@ public final class TokenizeExpression extends Expression { if (config.getStemMode() != StemMode.NONE) { ret.append(" stem:\""+config.getStemMode()+"\""); } - if (config.hasNonDefaultMaxTokenLength()) { + if (config.hasNonDefaultMaxTokenizeLength()) { ret.append(" max-length:" + config.getMaxTokenizeLength()); } if (config.hasNonDefaultMaxTermOccurrences()) { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java index 7b6f350d831..4e5ef0d90df 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java @@ -82,7 +82,7 @@ public class AnnotatorConfig implements Cloneable { return this; } - public AnnotatorConfig setMaxTokenLength(int maxTokenizeLength) { + public AnnotatorConfig setMaxTokenizeLength(int maxTokenizeLength) { this.maxTokenizeLength = maxTokenizeLength; return this; } @@ -91,7 +91,7 @@ public class AnnotatorConfig implements Cloneable { return maxTokenizeLength; } - public boolean hasNonDefaultMaxTokenLength() { + public boolean hasNonDefaultMaxTokenizeLength() { return maxTokenizeLength != DEFAULT_MAX_TOKENIZE_LENGTH; } diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj index 469d96ead60..77591d3e54e 100644 --- a/indexinglanguage/src/main/javacc/IndexingParser.jj +++ b/indexinglanguage/src/main/javacc/IndexingParser.jj @@ -689,7 +689,7 @@ AnnotatorConfig tokenizeCfg() : } { ( ( str = string() ) ? { val.setStemMode(str); } | - maxLength = integer() { val.setMaxTokenLength(maxLength); } | + maxLength = integer() { val.setMaxTokenizeLength(maxLength); } | maxTermOccurrences = integer() { val.setMaxTermOccurrences(maxTermOccurrences); } | { val.setRemoveAccents(true); } )+ { return val; } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java index 136e71564d8..461c915acef 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java @@ -194,7 +194,7 @@ public class LinguisticsAnnotatorTestCase { Linguistics linguistics = new SimpleLinguistics(); - LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, new AnnotatorConfig().setMaxTokenLength(12)); + LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, new AnnotatorConfig().setMaxTokenizeLength(12)); assertTrue(annotator.annotate(shortValue)); assertEquals(spanTree, shortValue.getSpanTree(SpanTrees.LINGUISTICS)); -- cgit v1.2.3 From 60ed9828f52f7e9851280faa5feeaaf90bab3686 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 6 May 2024 14:17:37 +0200 Subject: Add max token length to ilscripts config. --- config-model/src/test/derived/advanced/ilscripts.cfg | 1 + config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg | 1 + config-model/src/test/derived/annotationsinheritance/ilscripts.cfg | 1 + config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg | 1 + config-model/src/test/derived/annotationsreference/ilscripts.cfg | 1 + config-model/src/test/derived/annotationssimple/ilscripts.cfg | 1 + config-model/src/test/derived/arrays/ilscripts.cfg | 1 + config-model/src/test/derived/attributeprefetch/ilscripts.cfg | 1 + config-model/src/test/derived/attributes/ilscripts.cfg | 1 + config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg | 1 + config-model/src/test/derived/complex/ilscripts.cfg | 1 + config-model/src/test/derived/emptydefault/ilscripts.cfg | 1 + config-model/src/test/derived/exactmatch/ilscripts.cfg | 1 + config-model/src/test/derived/hnsw_index/ilscripts.cfg | 1 + config-model/src/test/derived/id/ilscripts.cfg | 1 + config-model/src/test/derived/indexswitches/ilscripts.cfg | 1 + config-model/src/test/derived/inheritance/ilscripts.cfg | 1 + config-model/src/test/derived/language/ilscripts.cfg | 1 + config-model/src/test/derived/lowercase/ilscripts.cfg | 1 + config-model/src/test/derived/multiplesummaries/ilscripts.cfg | 1 + config-model/src/test/derived/music/ilscripts.cfg | 1 + config-model/src/test/derived/newrank/ilscripts.cfg | 1 + config-model/src/test/derived/orderilscripts/ilscripts.cfg | 1 + config-model/src/test/derived/position_array/ilscripts.cfg | 1 + config-model/src/test/derived/position_attribute/ilscripts.cfg | 1 + config-model/src/test/derived/position_extra/ilscripts.cfg | 1 + config-model/src/test/derived/prefixexactattribute/ilscripts.cfg | 1 + config-model/src/test/derived/ranktypes/ilscripts.cfg | 1 + config-model/src/test/derived/schemainheritance/ilscripts.cfg | 1 + config-model/src/test/derived/structanyorder/ilscripts.cfg | 1 + config-model/src/test/derived/tokenization/ilscripts.cfg | 1 + config-model/src/test/derived/types/ilscripts.cfg | 1 + config-model/src/test/derived/uri_array/ilscripts.cfg | 1 + config-model/src/test/derived/uri_wset/ilscripts.cfg | 1 + configdefinitions/src/vespa/ilscripts.def | 2 ++ 35 files changed, 36 insertions(+) (limited to 'config-model/src') diff --git a/config-model/src/test/derived/advanced/ilscripts.cfg b/config-model/src/test/derived/advanced/ilscripts.cfg index 51a49502b64..d633cd97f0c 100644 --- a/config-model/src/test/derived/advanced/ilscripts.cfg +++ b/config-model/src/test/derived/advanced/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "advanced" ilscript[].docfield[] "debug_src" diff --git a/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg b/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg index 767c3af3c19..53dc789fbb7 100644 --- a/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg @@ -1,3 +1,4 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsimplicitstruct" diff --git a/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg b/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg index d8e6c882b80..b0a69c5408a 100644 --- a/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg @@ -1,3 +1,4 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsinheritance" diff --git a/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg b/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg index ae4ea621583..5ec1f839429 100644 --- a/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg @@ -1,3 +1,4 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsinheritance2" diff --git a/config-model/src/test/derived/annotationsreference/ilscripts.cfg b/config-model/src/test/derived/annotationsreference/ilscripts.cfg index 812f5e44545..eaa20043be8 100644 --- a/config-model/src/test/derived/annotationsreference/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsreference/ilscripts.cfg @@ -1,3 +1,4 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsreference" diff --git a/config-model/src/test/derived/annotationssimple/ilscripts.cfg b/config-model/src/test/derived/annotationssimple/ilscripts.cfg index 9d0962df5be..af179221eb4 100644 --- a/config-model/src/test/derived/annotationssimple/ilscripts.cfg +++ b/config-model/src/test/derived/annotationssimple/ilscripts.cfg @@ -1,3 +1,4 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationssimple" diff --git a/config-model/src/test/derived/arrays/ilscripts.cfg b/config-model/src/test/derived/arrays/ilscripts.cfg index 98cff642d9e..3f2dae48552 100644 --- a/config-model/src/test/derived/arrays/ilscripts.cfg +++ b/config-model/src/test/derived/arrays/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "arrays" ilscript[].docfield[] "tags" diff --git a/config-model/src/test/derived/attributeprefetch/ilscripts.cfg b/config-model/src/test/derived/attributeprefetch/ilscripts.cfg index dec054b33f0..5a3784f7cb9 100644 --- a/config-model/src/test/derived/attributeprefetch/ilscripts.cfg +++ b/config-model/src/test/derived/attributeprefetch/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "prefetch" ilscript[].docfield[] "singlebyte" diff --git a/config-model/src/test/derived/attributes/ilscripts.cfg b/config-model/src/test/derived/attributes/ilscripts.cfg index 6d3ef2799d9..58279759e5f 100644 --- a/config-model/src/test/derived/attributes/ilscripts.cfg +++ b/config-model/src/test/derived/attributes/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "attributes" ilscript[].docfield[] "a1" diff --git a/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg b/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg index c20c321ebcf..0b925da4778 100644 --- a/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg +++ b/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "test" ilscript[].docfield[] "str_1" diff --git a/config-model/src/test/derived/complex/ilscripts.cfg b/config-model/src/test/derived/complex/ilscripts.cfg index 4405d2fda40..7d025e15703 100644 --- a/config-model/src/test/derived/complex/ilscripts.cfg +++ b/config-model/src/test/derived/complex/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "complex" ilscript[].docfield[] "title" diff --git a/config-model/src/test/derived/emptydefault/ilscripts.cfg b/config-model/src/test/derived/emptydefault/ilscripts.cfg index e4242153bce..bbb8e5c556c 100644 --- a/config-model/src/test/derived/emptydefault/ilscripts.cfg +++ b/config-model/src/test/derived/emptydefault/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "emptydefault" ilscript[].docfield[] "one" diff --git a/config-model/src/test/derived/exactmatch/ilscripts.cfg b/config-model/src/test/derived/exactmatch/ilscripts.cfg index 21dfbd1371b..1d1bd6d5e8a 100644 --- a/config-model/src/test/derived/exactmatch/ilscripts.cfg +++ b/config-model/src/test/derived/exactmatch/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "exactmatch" ilscript[].docfield[] "tag" diff --git a/config-model/src/test/derived/hnsw_index/ilscripts.cfg b/config-model/src/test/derived/hnsw_index/ilscripts.cfg index e48f116f468..c811b93c3df 100644 --- a/config-model/src/test/derived/hnsw_index/ilscripts.cfg +++ b/config-model/src/test/derived/hnsw_index/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "test" ilscript[].docfield[] "t1" diff --git a/config-model/src/test/derived/id/ilscripts.cfg b/config-model/src/test/derived/id/ilscripts.cfg index d3ab29f6cd8..121e305059e 100644 --- a/config-model/src/test/derived/id/ilscripts.cfg +++ b/config-model/src/test/derived/id/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "id" ilscript[].docfield[] "uri" diff --git a/config-model/src/test/derived/indexswitches/ilscripts.cfg b/config-model/src/test/derived/indexswitches/ilscripts.cfg index 472c1f95cb0..454f675c0a2 100644 --- a/config-model/src/test/derived/indexswitches/ilscripts.cfg +++ b/config-model/src/test/derived/indexswitches/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "indexswitches" ilscript[].docfield[] "title" diff --git a/config-model/src/test/derived/inheritance/ilscripts.cfg b/config-model/src/test/derived/inheritance/ilscripts.cfg index d4c804773f0..c966f32a502 100644 --- a/config-model/src/test/derived/inheritance/ilscripts.cfg +++ b/config-model/src/test/derived/inheritance/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "child" ilscript[].docfield[] "onlygrandparent" diff --git a/config-model/src/test/derived/language/ilscripts.cfg b/config-model/src/test/derived/language/ilscripts.cfg index 1860f180839..d0abc08f1e0 100644 --- a/config-model/src/test/derived/language/ilscripts.cfg +++ b/config-model/src/test/derived/language/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "language" ilscript[].docfield[] "language" diff --git a/config-model/src/test/derived/lowercase/ilscripts.cfg b/config-model/src/test/derived/lowercase/ilscripts.cfg index 8ba4bfa3349..49515e50df4 100644 --- a/config-model/src/test/derived/lowercase/ilscripts.cfg +++ b/config-model/src/test/derived/lowercase/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "lowercase" ilscript[].docfield[] "single_field_source" diff --git a/config-model/src/test/derived/multiplesummaries/ilscripts.cfg b/config-model/src/test/derived/multiplesummaries/ilscripts.cfg index 0cdf921de25..4a6de4154f8 100644 --- a/config-model/src/test/derived/multiplesummaries/ilscripts.cfg +++ b/config-model/src/test/derived/multiplesummaries/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "multiplesummaries" ilscript[].docfield[] "a" diff --git a/config-model/src/test/derived/music/ilscripts.cfg b/config-model/src/test/derived/music/ilscripts.cfg index f90cdb15baa..f79e8824b69 100644 --- a/config-model/src/test/derived/music/ilscripts.cfg +++ b/config-model/src/test/derived/music/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "music" ilscript[].docfield[] "bgndata" diff --git a/config-model/src/test/derived/newrank/ilscripts.cfg b/config-model/src/test/derived/newrank/ilscripts.cfg index b02e09a0496..487d2fca902 100644 --- a/config-model/src/test/derived/newrank/ilscripts.cfg +++ b/config-model/src/test/derived/newrank/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "newrank" ilscript[].docfield[] "bgndata" diff --git a/config-model/src/test/derived/orderilscripts/ilscripts.cfg b/config-model/src/test/derived/orderilscripts/ilscripts.cfg index 0ed1589af0a..4918e23efc6 100644 --- a/config-model/src/test/derived/orderilscripts/ilscripts.cfg +++ b/config-model/src/test/derived/orderilscripts/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "orderilscripts" ilscript[].docfield[] "foo" diff --git a/config-model/src/test/derived/position_array/ilscripts.cfg b/config-model/src/test/derived/position_array/ilscripts.cfg index ecafbc4a025..3f7611b25d8 100644 --- a/config-model/src/test/derived/position_array/ilscripts.cfg +++ b/config-model/src/test/derived/position_array/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "position_array" ilscript[].docfield[] "pos" diff --git a/config-model/src/test/derived/position_attribute/ilscripts.cfg b/config-model/src/test/derived/position_attribute/ilscripts.cfg index d2fc8503ce5..fbd1a293418 100644 --- a/config-model/src/test/derived/position_attribute/ilscripts.cfg +++ b/config-model/src/test/derived/position_attribute/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "position_attribute" ilscript[].docfield[] "pos" diff --git a/config-model/src/test/derived/position_extra/ilscripts.cfg b/config-model/src/test/derived/position_extra/ilscripts.cfg index a86dcec92ec..4645798723c 100644 --- a/config-model/src/test/derived/position_extra/ilscripts.cfg +++ b/config-model/src/test/derived/position_extra/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "position_extra" ilscript[].docfield[] "pos_str" diff --git a/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg b/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg index 40c7843a0a4..2d1904cf9d8 100644 --- a/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg +++ b/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "prefixexactattribute" ilscript[].docfield[] "indexfield0" diff --git a/config-model/src/test/derived/ranktypes/ilscripts.cfg b/config-model/src/test/derived/ranktypes/ilscripts.cfg index adcd2f70c70..22526d1aa23 100644 --- a/config-model/src/test/derived/ranktypes/ilscripts.cfg +++ b/config-model/src/test/derived/ranktypes/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "ranktypes" ilscript[].docfield[] "title" diff --git a/config-model/src/test/derived/schemainheritance/ilscripts.cfg b/config-model/src/test/derived/schemainheritance/ilscripts.cfg index f7324920fe7..b1ba947f1dc 100644 --- a/config-model/src/test/derived/schemainheritance/ilscripts.cfg +++ b/config-model/src/test/derived/schemainheritance/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "child" ilscript[].docfield[] "pf1" diff --git a/config-model/src/test/derived/structanyorder/ilscripts.cfg b/config-model/src/test/derived/structanyorder/ilscripts.cfg index c07f04b3021..a806bc1b712 100644 --- a/config-model/src/test/derived/structanyorder/ilscripts.cfg +++ b/config-model/src/test/derived/structanyorder/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsimplicitstruct" ilscript[].docfield[] "structfield" diff --git a/config-model/src/test/derived/tokenization/ilscripts.cfg b/config-model/src/test/derived/tokenization/ilscripts.cfg index c08b6a54c83..cad8ec81879 100644 --- a/config-model/src/test/derived/tokenization/ilscripts.cfg +++ b/config-model/src/test/derived/tokenization/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "tokenization" ilscript[].docfield[] "text" diff --git a/config-model/src/test/derived/types/ilscripts.cfg b/config-model/src/test/derived/types/ilscripts.cfg index 17bed90deb4..73befb221ce 100644 --- a/config-model/src/test/derived/types/ilscripts.cfg +++ b/config-model/src/test/derived/types/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "types" ilscript[].docfield[] "abyte" diff --git a/config-model/src/test/derived/uri_array/ilscripts.cfg b/config-model/src/test/derived/uri_array/ilscripts.cfg index 3dd97e5c11f..0dc87b513ce 100644 --- a/config-model/src/test/derived/uri_array/ilscripts.cfg +++ b/config-model/src/test/derived/uri_array/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "uri_array" ilscript[].docfield[] "my_uri" diff --git a/config-model/src/test/derived/uri_wset/ilscripts.cfg b/config-model/src/test/derived/uri_wset/ilscripts.cfg index 48e07ef9959..cc45ee5ad8f 100644 --- a/config-model/src/test/derived/uri_wset/ilscripts.cfg +++ b/config-model/src/test/derived/uri_wset/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "uri_wset" ilscript[].docfield[] "my_uri" diff --git a/configdefinitions/src/vespa/ilscripts.def b/configdefinitions/src/vespa/ilscripts.def index acb06abb755..95dd1559106 100644 --- a/configdefinitions/src/vespa/ilscripts.def +++ b/configdefinitions/src/vespa/ilscripts.def @@ -3,6 +3,8 @@ namespace=vespa.configdefinition ## The maximum number of occurrences of a given term to index per field maxtermoccurrences int default=10000 +## The maximum number of bytes for a token +maxtokenlength int default=1000 fieldmatchmaxlength int default=1000000 ilscript[].doctype string -- cgit v1.2.3 From 4567057679e35b925e0939c7c4efe32f15bdd4d8 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 6 May 2024 15:07:45 +0200 Subject: Revert "Update jackson2.vespa.version to v2.17.0" --- .../validation/ConstantTensorJsonValidator.java | 20 ++++++------- .../yahoo/search/handler/Json2SingleLevelMap.java | 8 +++--- .../search/rendering/JsonRendererTestCase.java | 2 +- dependency-versions/pom.xml | 2 +- .../java/com/yahoo/document/json/JsonReader.java | 4 +-- .../com/yahoo/document/json/LazyTokenBuffer.java | 4 +-- .../java/com/yahoo/document/json/TokenBuffer.java | 2 +- .../document/json/document/DocumentParser.java | 13 ++++----- .../vespa/metricsproxy/service/MetricsParser.java | 33 +++++++++++----------- .../importer/lightgbm/LightGBMImporter.java | 4 +-- .../search/predicate/PredicateQueryParser.java | 11 ++++---- .../main/java/ai/vespa/feed/client/JsonFeeder.java | 14 ++++----- .../ai/vespa/feed/client/impl/HttpFeedClient.java | 4 +-- .../test/java/com/yahoo/slime/JsonBenchmark.java | 2 +- 14 files changed, 62 insertions(+), 61 deletions(-) (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java index 02a6b243054..40c9a03b126 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java @@ -132,7 +132,7 @@ public class ConstantTensorJsonValidator { private void consumeTopObject() throws IOException { for (var cur = parser.nextToken(); cur != JsonToken.END_OBJECT; cur = parser.nextToken()) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.currentName(); + String fieldName = parser.getCurrentName(); switch (fieldName) { case FIELD_TYPE -> consumeTypeField(); case FIELD_VALUES -> consumeValuesField(); @@ -189,7 +189,7 @@ public class ConstantTensorJsonValidator { } for (var cur = parser.nextToken(); cur != JsonToken.END_OBJECT; cur = parser.nextToken()) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - validateNumeric(parser.currentName(), parser.nextToken()); + validateNumeric(parser.getCurrentName(), parser.nextToken()); } } @@ -199,7 +199,7 @@ public class ConstantTensorJsonValidator { boolean seenValue = false; for (int i = 0; i < 2; i++) { assertNextTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.currentName(); + String fieldName = parser.getCurrentName(); switch (fieldName) { case FIELD_ADDRESS -> { validateTensorAddress(new HashSet<>(tensorDimensions.keySet())); @@ -228,13 +228,13 @@ public class ConstantTensorJsonValidator { // Iterate within the address key, value pairs while ((parser.nextToken() != JsonToken.END_OBJECT)) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - String dimensionName = parser.currentName(); + String dimensionName = parser.getCurrentName(); TensorType.Dimension dimension = tensorDimensions.get(dimensionName); if (dimension == null) { - throw new InvalidConstantTensorException(parser, String.format("Tensor dimension '%s' does not exist", dimensionName)); + throw new InvalidConstantTensorException(parser, String.format("Tensor dimension '%s' does not exist", parser.getCurrentName())); } if (!cellDimensions.contains(dimensionName)) { - throw new InvalidConstantTensorException(parser, String.format("Duplicate tensor dimension '%s'", dimensionName)); + throw new InvalidConstantTensorException(parser, String.format("Duplicate tensor dimension '%s'", parser.getCurrentName())); } cellDimensions.remove(dimensionName); validateLabel(dimension); @@ -300,7 +300,7 @@ public class ConstantTensorJsonValidator { } private void assertCurrentTokenIs(JsonToken wantedToken) { - assertTokenIs(parser.currentToken(), wantedToken); + assertTokenIs(parser.getCurrentToken(), wantedToken); } private void assertNextTokenIs(JsonToken wantedToken) throws IOException { @@ -316,11 +316,11 @@ public class ConstantTensorJsonValidator { static class InvalidConstantTensorException extends IllegalArgumentException { InvalidConstantTensorException(JsonParser parser, String message) { - super(message + " " + parser.currentLocation().toString()); + super(message + " " + parser.getCurrentLocation().toString()); } InvalidConstantTensorException(JsonParser parser, Exception base) { - super("Failed to parse JSON stream " + parser.currentLocation().toString(), base); + super("Failed to parse JSON stream " + parser.getCurrentLocation().toString(), base); } InvalidConstantTensorException(IOException base) { @@ -412,7 +412,7 @@ public class ConstantTensorJsonValidator { boolean seenValues = false; for (int i = 0; i < 2; i++) { assertNextTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.currentName(); + String fieldName = parser.getCurrentName(); switch (fieldName) { case FIELD_ADDRESS -> { validateTensorAddress(new HashSet<>(mappedDims)); diff --git a/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java b/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java index fdedbdc2fd9..01167be6b8b 100644 --- a/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java +++ b/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java @@ -64,8 +64,8 @@ class Json2SingleLevelMap { } void parse(Map map, String parent) throws IOException { - for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parent + parser.currentName(); + for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parent + parser.getCurrentName(); JsonToken token = parser.nextToken(); if ((token == JsonToken.VALUE_STRING) || (token == JsonToken.VALUE_NUMBER_FLOAT) || @@ -89,9 +89,9 @@ class Json2SingleLevelMap { } private String skipChildren(JsonParser parser, byte [] input) throws IOException { - JsonLocation start = parser.currentLocation(); + JsonLocation start = parser.getCurrentLocation(); parser.skipChildren(); - JsonLocation end = parser.currentLocation(); + JsonLocation end = parser.getCurrentLocation(); int offset = (int)start.getByteOffset() - 1; return new String(input, offset, (int)(end.getByteOffset() - offset), StandardCharsets.UTF_8); } diff --git a/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java b/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java index 611df6ad284..ffa6c82e941 100644 --- a/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java @@ -1532,7 +1532,7 @@ public class JsonRendererTestCase { + "}"; assertEquals( "Unexpected character ('a' (code 97)): was expecting comma to separate Object entries\n" + - " at [Source: (String)\"{ \"root\": { \"invalidvalue\": 1adsf, }}\"; line: 1, column: 40]", + " at [Source: (String)\"{ \"root\": { \"invalidvalue\": 1adsf, }}\"; line: 1, column: 41]", validateJSON(json)); } diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index d27318aaaf9..0876674e8c6 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -37,7 +37,7 @@ 33.2.0-jre 6.0.0 3.0.0 - 2.17.1 + 2.16.2 ${jackson2.vespa.version} 2.0.1 1.2.0 diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java index 9c621c033bd..358c0cb65e4 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonReader.java +++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java @@ -105,7 +105,7 @@ public class JsonReader { String condition = null; ParsedDocumentOperation operation = null; while (JsonToken.END_OBJECT != parser.nextValue()) { - switch (parser.currentName()) { + switch (parser.getCurrentName()) { case FIELDS -> { documentParseInfo.fieldsBuffer = new LazyTokenBuffer(parser); VespaJsonDocumentReader vespaJsonDocumentReader = new VespaJsonDocumentReader(typeManager.getIgnoreUndefinedFields()); @@ -177,7 +177,7 @@ public class JsonReader { state = END_OF_FEED; throw new IllegalArgumentException(r); } - if (documentParseInfo.isEmpty()) { + if ( ! documentParseInfo.isPresent()) { state = END_OF_FEED; return null; } diff --git a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java index 53ddacf6cc3..0fbdd0b28c7 100644 --- a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java @@ -33,7 +33,7 @@ public class LazyTokenBuffer extends TokenBuffer { public Supplier lookahead() { return new Supplier<>() { int localNesting = nesting(); - final Supplier buffered = LazyTokenBuffer.super.lookahead(); + Supplier buffered = LazyTokenBuffer.super.lookahead(); @Override public Token get() { if (localNesting == 0) return null; @@ -54,7 +54,7 @@ public class LazyTokenBuffer extends TokenBuffer { JsonToken token = parser.nextValue(); if (token == null) throw new IllegalStateException("no more JSON tokens"); - return new Token(token, parser.currentName(), parser.getText()); + return new Token(token, parser.getCurrentName(), parser.getText()); } catch (IOException e) { throw new IllegalArgumentException("failed reading document JSON", e); diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java index c5c022370bf..3a48f71c4cd 100644 --- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java @@ -99,7 +99,7 @@ public class TokenBuffer { } int addFromParser(JsonParser tokens) throws IOException { - add(tokens.currentToken(), tokens.currentName(), tokens.getText()); + add(tokens.currentToken(), tokens.getCurrentName(), tokens.getText()); return nestingOffset(tokens.currentToken()); } diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java index c5bcd356c94..77e11dcf2a8 100644 --- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java +++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java @@ -61,7 +61,7 @@ public class DocumentParser { private boolean parseOneItem(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally) throws IOException { parser.nextValue(); processIndent(); - if (parser.currentName() == null) return false; + if (parser.getCurrentName() == null) return false; if (indentLevel == 1L) { handleIdentLevelOne(documentParseInfo, docIdAndOperationIsSetExternally); } else if (indentLevel == 2L) { @@ -85,18 +85,17 @@ public class DocumentParser { private void handleIdentLevelOne(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally) throws IOException { - JsonToken currentToken = parser.currentToken(); - String currentName = parser.currentName(); + JsonToken currentToken = parser.getCurrentToken(); if ((currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) && - CREATE_IF_NON_EXISTENT.equals(currentName)) { + CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { documentParseInfo.create = Optional.of(currentToken == JsonToken.VALUE_TRUE); - } else if (currentToken == JsonToken.VALUE_STRING && CONDITION.equals(currentName)) { + } else if (currentToken == JsonToken.VALUE_STRING && CONDITION.equals(parser.getCurrentName())) { documentParseInfo.condition = Optional.of(parser.getText()); } else if (currentToken == JsonToken.VALUE_STRING) { // Value is expected to be set in the header not in the document. Ignore any unknown field // as well. if (! docIdAndOperationIsSetExternally) { - documentParseInfo.operationType = operationNameToOperationType(currentName); + documentParseInfo.operationType = operationNameToOperationType(parser.getCurrentName()); documentParseInfo.documentId = new DocumentId(parser.getText()); } } @@ -105,7 +104,7 @@ public class DocumentParser { private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { try { // "fields" opens a dictionary and is therefore on level two which might be surprising. - if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.currentName())) { + if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { documentParseInfo.fieldsBuffer.bufferObject(parser); processIndent(); } diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java index 052b8425a45..0e33d7dbf2f 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java @@ -53,8 +53,8 @@ public class MetricsParser { throw new IOException("Expected start of object, got " + parser.currentToken()); } - for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.currentName(); + for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.getCurrentName(); JsonToken token = parser.nextToken(); if (fieldName.equals("metrics")) { parseMetrics(parser, consumer); @@ -67,12 +67,12 @@ public class MetricsParser { } static private Instant parseSnapshot(JsonParser parser) throws IOException { - if (parser.currentToken() != JsonToken.START_OBJECT) { + if (parser.getCurrentToken() != JsonToken.START_OBJECT) { throw new IOException("Expected start of 'snapshot' object, got " + parser.currentToken()); } Instant timestamp = Instant.now(); - for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.currentName(); + for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.getCurrentName(); JsonToken token = parser.nextToken(); if (fieldName.equals("to")) { timestamp = Instant.ofEpochSecond(parser.getLongValue()); @@ -88,12 +88,12 @@ public class MetricsParser { // 'metrics' object with 'snapshot' and 'values' arrays static private void parseMetrics(JsonParser parser, Collector consumer) throws IOException { - if (parser.currentToken() != JsonToken.START_OBJECT) { + if (parser.getCurrentToken() != JsonToken.START_OBJECT) { throw new IOException("Expected start of 'metrics' object, got " + parser.currentToken()); } Instant timestamp = Instant.now(); - for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.currentName(); + for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.getCurrentName(); JsonToken token = parser.nextToken(); if (fieldName.equals("snapshot")) { timestamp = parseSnapshot(parser); @@ -109,7 +109,7 @@ public class MetricsParser { // 'values' array static private void parseMetricValues(JsonParser parser, Instant timestamp, Collector consumer) throws IOException { - if (parser.currentToken() != JsonToken.START_ARRAY) { + if (parser.getCurrentToken() != JsonToken.START_ARRAY) { throw new IOException("Expected start of 'metrics:values' array, got " + parser.currentToken()); } @@ -126,8 +126,8 @@ public class MetricsParser { String description = ""; Map dim = Map.of(); List> values = List.of(); - for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.currentName(); + for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.getCurrentName(); JsonToken token = parser.nextToken(); switch (fieldName) { case "name" -> name = parser.getText(); @@ -154,8 +154,8 @@ public class MetricsParser { Set dimensions = new HashSet<>(); - for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.currentName(); + for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.getCurrentName(); JsonToken token = parser.nextToken(); if (token == JsonToken.VALUE_STRING){ @@ -180,16 +180,17 @@ public class MetricsParser { private static List> parseValues(JsonParser parser) throws IOException { List> metrics = new ArrayList<>(); - for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String metricName = parser.currentName(); + for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.getCurrentName(); JsonToken token = parser.nextToken(); + String metricName = fieldName; if (token == JsonToken.VALUE_NUMBER_INT) { metrics.add(Map.entry(metricName, parser.getLongValue())); } else if (token == JsonToken.VALUE_NUMBER_FLOAT) { double value = parser.getValueAsDouble(); metrics.add(Map.entry(metricName, value == ZERO_DOUBLE ? ZERO_DOUBLE : value)); } else { - throw new IllegalArgumentException("Value for aggregator '" + metricName + "' is not a number"); + throw new IllegalArgumentException("Value for aggregator '" + fieldName + "' is not a number"); } } return metrics; diff --git a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java index 6a1e2f2562a..e1d2f8802a6 100644 --- a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java +++ b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java @@ -34,9 +34,9 @@ public class LightGBMImporter extends ModelImporter { private boolean probe(File modelFile) { try (JsonParser parser = Jackson.mapper().createParser(modelFile)) { while (parser.nextToken() != null) { - JsonToken token = parser.currentToken(); + JsonToken token = parser.getCurrentToken(); if (token == JsonToken.FIELD_NAME) { - if ("tree_info".equals(parser.currentName())) return true; + if ("tree_info".equals(parser.getCurrentName())) return true; } } return false; diff --git a/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java b/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java index 42b6195549e..09487506ffe 100644 --- a/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java +++ b/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java @@ -10,6 +10,7 @@ import java.util.Arrays; /** * Parses predicate queries from JSON. + * * Input JSON is assumed to have the following format: * { * "features": [ @@ -45,7 +46,7 @@ public class PredicateQueryParser { try (JsonParser parser = factory.createParser(json)) { skipToken(parser, JsonToken.START_OBJECT); while (parser.nextToken() != JsonToken.END_OBJECT) { - String fieldName = parser.currentName(); + String fieldName = parser.getCurrentName(); switch (fieldName) { case "features": parseFeatures(parser, JsonParser::getText, featureHandler); @@ -81,7 +82,7 @@ public class PredicateQueryParser { long subqueryBitmap = SubqueryBitmap.DEFAULT_VALUE; // Specifying subquery bitmap is optional. while (parser.nextToken() != JsonToken.END_OBJECT) { - String fieldName = parser.currentName(); + String fieldName = parser.getCurrentName(); skipToken(parser, JsonToken.VALUE_STRING, JsonToken.VALUE_NUMBER_INT); switch (fieldName) { case "k": @@ -99,11 +100,11 @@ public class PredicateQueryParser { } if (key == null) { throw new IllegalArgumentException( - String.format("Feature key is missing! (%s)", parser.currentLocation())); + String.format("Feature key is missing! (%s)", parser.getCurrentLocation())); } if (value == null) { throw new IllegalArgumentException( - String.format("Feature value is missing! (%s)", parser.currentLocation())); + String.format("Feature value is missing! (%s)", parser.getCurrentLocation())); } featureHandler.accept(key, value, subqueryBitmap); } @@ -113,7 +114,7 @@ public class PredicateQueryParser { if (Arrays.stream(expected).noneMatch(e -> e.equals(actual))) { throw new IllegalArgumentException( String.format("Expected a token in %s, got %s (%s).", - Arrays.toString(expected), actual, parser.currentTokenLocation())); + Arrays.toString(expected), actual, parser.getTokenLocation())); } } diff --git a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java index 3111815b332..11fb6526210 100644 --- a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java +++ b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java @@ -414,7 +414,7 @@ public class JsonFeeder implements Closeable { abstract String getDocumentJson(long start, long end); OperationParseException parseException(String error) { - JsonLocation location = parser.currentLocation(); + JsonLocation location = parser.getTokenLocation(); return new OperationParseException(error + " at offset " + location.getByteOffset() + " (line " + location.getLineNr() + ", column " + location.getColumnNr() + ")"); } @@ -444,13 +444,13 @@ public class JsonFeeder implements Closeable { case "create": parameters = parameters.createIfNonExistent(readBoolean()); break; case "fields": { expect(START_OBJECT); - start = parser.currentTokenLocation().getByteOffset(); + start = parser.getTokenLocation().getByteOffset(); int depth = 1; while (depth > 0) switch (parser.nextToken()) { case START_OBJECT: ++depth; break; case END_OBJECT: --depth; break; } - end = parser.currentTokenLocation().getByteOffset() + 1; + end = parser.getTokenLocation().getByteOffset() + 1; break; } default: throw parseException("Unexpected field name '" + parser.getText() + "'"); @@ -470,7 +470,7 @@ public class JsonFeeder implements Closeable { if (end >= start) throw parseException("Illegal 'fields' object for remove operation"); else - start = end = parser.currentTokenLocation().getByteOffset(); // getDocumentJson advances buffer overwrite head. + start = end = parser.getTokenLocation().getByteOffset(); // getDocumentJson advances buffer overwrite head. } else if (end < start) throw parseException("No 'fields' object for document"); @@ -486,14 +486,14 @@ public class JsonFeeder implements Closeable { private void expect(JsonToken token) throws IOException { if (parser.nextToken() != token) - throw new OperationParseException("Expected '" + token + "' at offset " + parser.currentTokenLocation().getByteOffset() + + throw new OperationParseException("Expected '" + token + "' at offset " + parser.getTokenLocation().getByteOffset() + ", but found '" + parser.currentToken() + "' (" + parser.getText() + ")"); } private String readString() throws IOException { String value = parser.nextTextValue(); if (value == null) - throw new OperationParseException("Expected '" + JsonToken.VALUE_STRING + "' at offset " + parser.currentTokenLocation().getByteOffset() + + throw new OperationParseException("Expected '" + JsonToken.VALUE_STRING + "' at offset " + parser.getTokenLocation().getByteOffset() + ", but found '" + parser.currentToken() + "' (" + parser.getText() + ")"); return value; @@ -502,7 +502,7 @@ public class JsonFeeder implements Closeable { private boolean readBoolean() throws IOException { Boolean value = parser.nextBooleanValue(); if (value == null) - throw new OperationParseException("Expected '" + JsonToken.VALUE_FALSE + "' or '" + JsonToken.VALUE_TRUE + "' at offset " + parser.currentTokenLocation().getByteOffset() + + throw new OperationParseException("Expected '" + JsonToken.VALUE_FALSE + "' or '" + JsonToken.VALUE_TRUE + "' at offset " + parser.getTokenLocation().getByteOffset() + ", but found '" + parser.currentToken() + "' (" + parser.getText() + ")"); return value; diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java index 9dd11113c0b..a30cfd5ec39 100644 --- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java +++ b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java @@ -219,13 +219,13 @@ class HttpFeedClient implements FeedClient { throw new ResultParseException(documentId, "Expected 'trace' to be an array, but got '" + parser.currentToken() + "' in: " + new String(json, UTF_8)); - int start = (int) parser.currentTokenLocation().getByteOffset(); + int start = (int) parser.getTokenLocation().getByteOffset(); int depth = 1; while (depth > 0) switch (parser.nextToken()) { case START_ARRAY: ++depth; break; case END_ARRAY: --depth; break; } - int end = (int) parser.currentTokenLocation().getByteOffset() + 1; + int end = (int) parser.getTokenLocation().getByteOffset() + 1; trace = new String(json, start, end - start, UTF_8); break; default: diff --git a/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java b/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java index cccc9667e11..ee755a44010 100644 --- a/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java +++ b/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java @@ -43,7 +43,7 @@ public class JsonBenchmark { try (JsonParser jsonParser = jsonFactory.createParser(json)) { JsonToken array = jsonParser.nextToken(); for (JsonToken token = jsonParser.nextToken(); !JsonToken.END_ARRAY.equals(token); token = jsonParser.nextToken()) { - if (JsonToken.FIELD_NAME.equals(token) && "weight".equals(jsonParser.currentName())) { + if (JsonToken.FIELD_NAME.equals(token) && "weight".equals(jsonParser.getCurrentName())) { token = jsonParser.nextToken(); count += jsonParser.getLongValue(); } -- cgit v1.2.3 From 01dd9fd4a69d49922089c46cd6c9be66dced9687 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 6 May 2024 13:31:38 +0000 Subject: Avoid methods deprecated in jackson 2.17.1 --- .../validation/ConstantTensorJsonValidator.java | 20 ++++++------- .../yahoo/search/handler/Json2SingleLevelMap.java | 8 +++--- .../java/com/yahoo/document/json/JsonReader.java | 4 +-- .../com/yahoo/document/json/LazyTokenBuffer.java | 4 +-- .../java/com/yahoo/document/json/TokenBuffer.java | 2 +- .../document/json/document/DocumentParser.java | 13 +++++---- .../vespa/metricsproxy/service/MetricsParser.java | 33 +++++++++++----------- .../importer/lightgbm/LightGBMImporter.java | 4 +-- .../search/predicate/PredicateQueryParser.java | 11 ++++---- .../main/java/ai/vespa/feed/client/JsonFeeder.java | 14 ++++----- .../ai/vespa/feed/client/impl/HttpFeedClient.java | 4 +-- .../test/java/com/yahoo/slime/JsonBenchmark.java | 2 +- 12 files changed, 59 insertions(+), 60 deletions(-) (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java index 40c9a03b126..02a6b243054 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java @@ -132,7 +132,7 @@ public class ConstantTensorJsonValidator { private void consumeTopObject() throws IOException { for (var cur = parser.nextToken(); cur != JsonToken.END_OBJECT; cur = parser.nextToken()) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case FIELD_TYPE -> consumeTypeField(); case FIELD_VALUES -> consumeValuesField(); @@ -189,7 +189,7 @@ public class ConstantTensorJsonValidator { } for (var cur = parser.nextToken(); cur != JsonToken.END_OBJECT; cur = parser.nextToken()) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - validateNumeric(parser.getCurrentName(), parser.nextToken()); + validateNumeric(parser.currentName(), parser.nextToken()); } } @@ -199,7 +199,7 @@ public class ConstantTensorJsonValidator { boolean seenValue = false; for (int i = 0; i < 2; i++) { assertNextTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case FIELD_ADDRESS -> { validateTensorAddress(new HashSet<>(tensorDimensions.keySet())); @@ -228,13 +228,13 @@ public class ConstantTensorJsonValidator { // Iterate within the address key, value pairs while ((parser.nextToken() != JsonToken.END_OBJECT)) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - String dimensionName = parser.getCurrentName(); + String dimensionName = parser.currentName(); TensorType.Dimension dimension = tensorDimensions.get(dimensionName); if (dimension == null) { - throw new InvalidConstantTensorException(parser, String.format("Tensor dimension '%s' does not exist", parser.getCurrentName())); + throw new InvalidConstantTensorException(parser, String.format("Tensor dimension '%s' does not exist", dimensionName)); } if (!cellDimensions.contains(dimensionName)) { - throw new InvalidConstantTensorException(parser, String.format("Duplicate tensor dimension '%s'", parser.getCurrentName())); + throw new InvalidConstantTensorException(parser, String.format("Duplicate tensor dimension '%s'", dimensionName)); } cellDimensions.remove(dimensionName); validateLabel(dimension); @@ -300,7 +300,7 @@ public class ConstantTensorJsonValidator { } private void assertCurrentTokenIs(JsonToken wantedToken) { - assertTokenIs(parser.getCurrentToken(), wantedToken); + assertTokenIs(parser.currentToken(), wantedToken); } private void assertNextTokenIs(JsonToken wantedToken) throws IOException { @@ -316,11 +316,11 @@ public class ConstantTensorJsonValidator { static class InvalidConstantTensorException extends IllegalArgumentException { InvalidConstantTensorException(JsonParser parser, String message) { - super(message + " " + parser.getCurrentLocation().toString()); + super(message + " " + parser.currentLocation().toString()); } InvalidConstantTensorException(JsonParser parser, Exception base) { - super("Failed to parse JSON stream " + parser.getCurrentLocation().toString(), base); + super("Failed to parse JSON stream " + parser.currentLocation().toString(), base); } InvalidConstantTensorException(IOException base) { @@ -412,7 +412,7 @@ public class ConstantTensorJsonValidator { boolean seenValues = false; for (int i = 0; i < 2; i++) { assertNextTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case FIELD_ADDRESS -> { validateTensorAddress(new HashSet<>(mappedDims)); diff --git a/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java b/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java index 01167be6b8b..fdedbdc2fd9 100644 --- a/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java +++ b/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java @@ -64,8 +64,8 @@ class Json2SingleLevelMap { } void parse(Map map, String parent) throws IOException { - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parent + parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parent + parser.currentName(); JsonToken token = parser.nextToken(); if ((token == JsonToken.VALUE_STRING) || (token == JsonToken.VALUE_NUMBER_FLOAT) || @@ -89,9 +89,9 @@ class Json2SingleLevelMap { } private String skipChildren(JsonParser parser, byte [] input) throws IOException { - JsonLocation start = parser.getCurrentLocation(); + JsonLocation start = parser.currentLocation(); parser.skipChildren(); - JsonLocation end = parser.getCurrentLocation(); + JsonLocation end = parser.currentLocation(); int offset = (int)start.getByteOffset() - 1; return new String(input, offset, (int)(end.getByteOffset() - offset), StandardCharsets.UTF_8); } diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java index 358c0cb65e4..9c621c033bd 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonReader.java +++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java @@ -105,7 +105,7 @@ public class JsonReader { String condition = null; ParsedDocumentOperation operation = null; while (JsonToken.END_OBJECT != parser.nextValue()) { - switch (parser.getCurrentName()) { + switch (parser.currentName()) { case FIELDS -> { documentParseInfo.fieldsBuffer = new LazyTokenBuffer(parser); VespaJsonDocumentReader vespaJsonDocumentReader = new VespaJsonDocumentReader(typeManager.getIgnoreUndefinedFields()); @@ -177,7 +177,7 @@ public class JsonReader { state = END_OF_FEED; throw new IllegalArgumentException(r); } - if ( ! documentParseInfo.isPresent()) { + if (documentParseInfo.isEmpty()) { state = END_OF_FEED; return null; } diff --git a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java index 0fbdd0b28c7..53ddacf6cc3 100644 --- a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java @@ -33,7 +33,7 @@ public class LazyTokenBuffer extends TokenBuffer { public Supplier lookahead() { return new Supplier<>() { int localNesting = nesting(); - Supplier buffered = LazyTokenBuffer.super.lookahead(); + final Supplier buffered = LazyTokenBuffer.super.lookahead(); @Override public Token get() { if (localNesting == 0) return null; @@ -54,7 +54,7 @@ public class LazyTokenBuffer extends TokenBuffer { JsonToken token = parser.nextValue(); if (token == null) throw new IllegalStateException("no more JSON tokens"); - return new Token(token, parser.getCurrentName(), parser.getText()); + return new Token(token, parser.currentName(), parser.getText()); } catch (IOException e) { throw new IllegalArgumentException("failed reading document JSON", e); diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java index 3a48f71c4cd..c5c022370bf 100644 --- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java @@ -99,7 +99,7 @@ public class TokenBuffer { } int addFromParser(JsonParser tokens) throws IOException { - add(tokens.currentToken(), tokens.getCurrentName(), tokens.getText()); + add(tokens.currentToken(), tokens.currentName(), tokens.getText()); return nestingOffset(tokens.currentToken()); } diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java index 77e11dcf2a8..c5bcd356c94 100644 --- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java +++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java @@ -61,7 +61,7 @@ public class DocumentParser { private boolean parseOneItem(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally) throws IOException { parser.nextValue(); processIndent(); - if (parser.getCurrentName() == null) return false; + if (parser.currentName() == null) return false; if (indentLevel == 1L) { handleIdentLevelOne(documentParseInfo, docIdAndOperationIsSetExternally); } else if (indentLevel == 2L) { @@ -85,17 +85,18 @@ public class DocumentParser { private void handleIdentLevelOne(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally) throws IOException { - JsonToken currentToken = parser.getCurrentToken(); + JsonToken currentToken = parser.currentToken(); + String currentName = parser.currentName(); if ((currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) && - CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { + CREATE_IF_NON_EXISTENT.equals(currentName)) { documentParseInfo.create = Optional.of(currentToken == JsonToken.VALUE_TRUE); - } else if (currentToken == JsonToken.VALUE_STRING && CONDITION.equals(parser.getCurrentName())) { + } else if (currentToken == JsonToken.VALUE_STRING && CONDITION.equals(currentName)) { documentParseInfo.condition = Optional.of(parser.getText()); } else if (currentToken == JsonToken.VALUE_STRING) { // Value is expected to be set in the header not in the document. Ignore any unknown field // as well. if (! docIdAndOperationIsSetExternally) { - documentParseInfo.operationType = operationNameToOperationType(parser.getCurrentName()); + documentParseInfo.operationType = operationNameToOperationType(currentName); documentParseInfo.documentId = new DocumentId(parser.getText()); } } @@ -104,7 +105,7 @@ public class DocumentParser { private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { try { // "fields" opens a dictionary and is therefore on level two which might be surprising. - if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { + if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.currentName())) { documentParseInfo.fieldsBuffer.bufferObject(parser); processIndent(); } diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java index 0e33d7dbf2f..052b8425a45 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/MetricsParser.java @@ -53,8 +53,8 @@ public class MetricsParser { throw new IOException("Expected start of object, got " + parser.currentToken()); } - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.currentName(); JsonToken token = parser.nextToken(); if (fieldName.equals("metrics")) { parseMetrics(parser, consumer); @@ -67,12 +67,12 @@ public class MetricsParser { } static private Instant parseSnapshot(JsonParser parser) throws IOException { - if (parser.getCurrentToken() != JsonToken.START_OBJECT) { + if (parser.currentToken() != JsonToken.START_OBJECT) { throw new IOException("Expected start of 'snapshot' object, got " + parser.currentToken()); } Instant timestamp = Instant.now(); - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.currentName(); JsonToken token = parser.nextToken(); if (fieldName.equals("to")) { timestamp = Instant.ofEpochSecond(parser.getLongValue()); @@ -88,12 +88,12 @@ public class MetricsParser { // 'metrics' object with 'snapshot' and 'values' arrays static private void parseMetrics(JsonParser parser, Collector consumer) throws IOException { - if (parser.getCurrentToken() != JsonToken.START_OBJECT) { + if (parser.currentToken() != JsonToken.START_OBJECT) { throw new IOException("Expected start of 'metrics' object, got " + parser.currentToken()); } Instant timestamp = Instant.now(); - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.currentName(); JsonToken token = parser.nextToken(); if (fieldName.equals("snapshot")) { timestamp = parseSnapshot(parser); @@ -109,7 +109,7 @@ public class MetricsParser { // 'values' array static private void parseMetricValues(JsonParser parser, Instant timestamp, Collector consumer) throws IOException { - if (parser.getCurrentToken() != JsonToken.START_ARRAY) { + if (parser.currentToken() != JsonToken.START_ARRAY) { throw new IOException("Expected start of 'metrics:values' array, got " + parser.currentToken()); } @@ -126,8 +126,8 @@ public class MetricsParser { String description = ""; Map dim = Map.of(); List> values = List.of(); - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.currentName(); JsonToken token = parser.nextToken(); switch (fieldName) { case "name" -> name = parser.getText(); @@ -154,8 +154,8 @@ public class MetricsParser { Set dimensions = new HashSet<>(); - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parser.currentName(); JsonToken token = parser.nextToken(); if (token == JsonToken.VALUE_STRING){ @@ -180,17 +180,16 @@ public class MetricsParser { private static List> parseValues(JsonParser parser) throws IOException { List> metrics = new ArrayList<>(); - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String metricName = parser.currentName(); JsonToken token = parser.nextToken(); - String metricName = fieldName; if (token == JsonToken.VALUE_NUMBER_INT) { metrics.add(Map.entry(metricName, parser.getLongValue())); } else if (token == JsonToken.VALUE_NUMBER_FLOAT) { double value = parser.getValueAsDouble(); metrics.add(Map.entry(metricName, value == ZERO_DOUBLE ? ZERO_DOUBLE : value)); } else { - throw new IllegalArgumentException("Value for aggregator '" + fieldName + "' is not a number"); + throw new IllegalArgumentException("Value for aggregator '" + metricName + "' is not a number"); } } return metrics; diff --git a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java index e1d2f8802a6..6a1e2f2562a 100644 --- a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java +++ b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/lightgbm/LightGBMImporter.java @@ -34,9 +34,9 @@ public class LightGBMImporter extends ModelImporter { private boolean probe(File modelFile) { try (JsonParser parser = Jackson.mapper().createParser(modelFile)) { while (parser.nextToken() != null) { - JsonToken token = parser.getCurrentToken(); + JsonToken token = parser.currentToken(); if (token == JsonToken.FIELD_NAME) { - if ("tree_info".equals(parser.getCurrentName())) return true; + if ("tree_info".equals(parser.currentName())) return true; } } return false; diff --git a/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java b/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java index 09487506ffe..42b6195549e 100644 --- a/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java +++ b/predicate-search-core/src/main/java/com/yahoo/search/predicate/PredicateQueryParser.java @@ -10,7 +10,6 @@ import java.util.Arrays; /** * Parses predicate queries from JSON. - * * Input JSON is assumed to have the following format: * { * "features": [ @@ -46,7 +45,7 @@ public class PredicateQueryParser { try (JsonParser parser = factory.createParser(json)) { skipToken(parser, JsonToken.START_OBJECT); while (parser.nextToken() != JsonToken.END_OBJECT) { - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case "features": parseFeatures(parser, JsonParser::getText, featureHandler); @@ -82,7 +81,7 @@ public class PredicateQueryParser { long subqueryBitmap = SubqueryBitmap.DEFAULT_VALUE; // Specifying subquery bitmap is optional. while (parser.nextToken() != JsonToken.END_OBJECT) { - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); skipToken(parser, JsonToken.VALUE_STRING, JsonToken.VALUE_NUMBER_INT); switch (fieldName) { case "k": @@ -100,11 +99,11 @@ public class PredicateQueryParser { } if (key == null) { throw new IllegalArgumentException( - String.format("Feature key is missing! (%s)", parser.getCurrentLocation())); + String.format("Feature key is missing! (%s)", parser.currentLocation())); } if (value == null) { throw new IllegalArgumentException( - String.format("Feature value is missing! (%s)", parser.getCurrentLocation())); + String.format("Feature value is missing! (%s)", parser.currentLocation())); } featureHandler.accept(key, value, subqueryBitmap); } @@ -114,7 +113,7 @@ public class PredicateQueryParser { if (Arrays.stream(expected).noneMatch(e -> e.equals(actual))) { throw new IllegalArgumentException( String.format("Expected a token in %s, got %s (%s).", - Arrays.toString(expected), actual, parser.getTokenLocation())); + Arrays.toString(expected), actual, parser.currentTokenLocation())); } } diff --git a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java index 11fb6526210..3111815b332 100644 --- a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java +++ b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/JsonFeeder.java @@ -414,7 +414,7 @@ public class JsonFeeder implements Closeable { abstract String getDocumentJson(long start, long end); OperationParseException parseException(String error) { - JsonLocation location = parser.getTokenLocation(); + JsonLocation location = parser.currentLocation(); return new OperationParseException(error + " at offset " + location.getByteOffset() + " (line " + location.getLineNr() + ", column " + location.getColumnNr() + ")"); } @@ -444,13 +444,13 @@ public class JsonFeeder implements Closeable { case "create": parameters = parameters.createIfNonExistent(readBoolean()); break; case "fields": { expect(START_OBJECT); - start = parser.getTokenLocation().getByteOffset(); + start = parser.currentTokenLocation().getByteOffset(); int depth = 1; while (depth > 0) switch (parser.nextToken()) { case START_OBJECT: ++depth; break; case END_OBJECT: --depth; break; } - end = parser.getTokenLocation().getByteOffset() + 1; + end = parser.currentTokenLocation().getByteOffset() + 1; break; } default: throw parseException("Unexpected field name '" + parser.getText() + "'"); @@ -470,7 +470,7 @@ public class JsonFeeder implements Closeable { if (end >= start) throw parseException("Illegal 'fields' object for remove operation"); else - start = end = parser.getTokenLocation().getByteOffset(); // getDocumentJson advances buffer overwrite head. + start = end = parser.currentTokenLocation().getByteOffset(); // getDocumentJson advances buffer overwrite head. } else if (end < start) throw parseException("No 'fields' object for document"); @@ -486,14 +486,14 @@ public class JsonFeeder implements Closeable { private void expect(JsonToken token) throws IOException { if (parser.nextToken() != token) - throw new OperationParseException("Expected '" + token + "' at offset " + parser.getTokenLocation().getByteOffset() + + throw new OperationParseException("Expected '" + token + "' at offset " + parser.currentTokenLocation().getByteOffset() + ", but found '" + parser.currentToken() + "' (" + parser.getText() + ")"); } private String readString() throws IOException { String value = parser.nextTextValue(); if (value == null) - throw new OperationParseException("Expected '" + JsonToken.VALUE_STRING + "' at offset " + parser.getTokenLocation().getByteOffset() + + throw new OperationParseException("Expected '" + JsonToken.VALUE_STRING + "' at offset " + parser.currentTokenLocation().getByteOffset() + ", but found '" + parser.currentToken() + "' (" + parser.getText() + ")"); return value; @@ -502,7 +502,7 @@ public class JsonFeeder implements Closeable { private boolean readBoolean() throws IOException { Boolean value = parser.nextBooleanValue(); if (value == null) - throw new OperationParseException("Expected '" + JsonToken.VALUE_FALSE + "' or '" + JsonToken.VALUE_TRUE + "' at offset " + parser.getTokenLocation().getByteOffset() + + throw new OperationParseException("Expected '" + JsonToken.VALUE_FALSE + "' or '" + JsonToken.VALUE_TRUE + "' at offset " + parser.currentTokenLocation().getByteOffset() + ", but found '" + parser.currentToken() + "' (" + parser.getText() + ")"); return value; diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java index a30cfd5ec39..9dd11113c0b 100644 --- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java +++ b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java @@ -219,13 +219,13 @@ class HttpFeedClient implements FeedClient { throw new ResultParseException(documentId, "Expected 'trace' to be an array, but got '" + parser.currentToken() + "' in: " + new String(json, UTF_8)); - int start = (int) parser.getTokenLocation().getByteOffset(); + int start = (int) parser.currentTokenLocation().getByteOffset(); int depth = 1; while (depth > 0) switch (parser.nextToken()) { case START_ARRAY: ++depth; break; case END_ARRAY: --depth; break; } - int end = (int) parser.getTokenLocation().getByteOffset() + 1; + int end = (int) parser.currentTokenLocation().getByteOffset() + 1; trace = new String(json, start, end - start, UTF_8); break; default: diff --git a/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java b/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java index ee755a44010..cccc9667e11 100644 --- a/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java +++ b/vespajlib/src/test/java/com/yahoo/slime/JsonBenchmark.java @@ -43,7 +43,7 @@ public class JsonBenchmark { try (JsonParser jsonParser = jsonFactory.createParser(json)) { JsonToken array = jsonParser.nextToken(); for (JsonToken token = jsonParser.nextToken(); !JsonToken.END_ARRAY.equals(token); token = jsonParser.nextToken()) { - if (JsonToken.FIELD_NAME.equals(token) && "weight".equals(jsonParser.getCurrentName())) { + if (JsonToken.FIELD_NAME.equals(token) && "weight".equals(jsonParser.currentName())) { token = jsonParser.nextToken(); count += jsonParser.getLongValue(); } -- cgit v1.2.3 From 548ee1a5950a972ec5ef6a3be3d18f68035322a5 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 7 May 2024 11:01:33 +0200 Subject: Enable setting max-token-length in field match. --- .../java/com/yahoo/schema/document/Matching.java | 4 +++ .../yahoo/schema/parser/ConvertParsedFields.java | 1 + .../yahoo/schema/parser/ParsedMatchSettings.java | 3 ++ .../com/yahoo/schema/processing/ExactMatch.java | 15 ++++++++-- .../com/yahoo/schema/processing/TextMatch.java | 4 +++ config-model/src/main/javacc/SchemaParser.jj | 14 +++++++++- .../yahoo/schema/parser/SchemaParserTestCase.java | 17 ++++++++++++ .../processing/IndexingScriptRewriterTestCase.java | 19 +++++++++++++ .../expressions/ExactExpression.java | 32 ++++++++++++++++++++-- .../expressions/TokenizeExpression.java | 3 ++ .../linguistics/AnnotatorConfig.java | 25 ++++++++++++++++- .../linguistics/LinguisticsAnnotator.java | 23 +++++++++++----- indexinglanguage/src/main/javacc/IndexingParser.jj | 12 ++++++-- .../expressions/ExactTestCase.java | 9 ++++++ .../expressions/TokenizeTestCase.java | 11 ++++++++ .../linguistics/AnnotatorConfigTestCase.java | 5 ++++ .../parser/ExpressionTestCase.java | 2 ++ 17 files changed, 181 insertions(+), 18 deletions(-) (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java index 9d68553fa80..33256fa8586 100644 --- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java +++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java @@ -33,6 +33,8 @@ public class Matching implements Cloneable, Serializable { private Integer maxLength; /** Maximum number of occurrences for each term */ private Integer maxTermOccurrences; + /** Maximum number of characters in a token. */ + private Integer maxTokenLength; private String exactMatchTerminator = null; @@ -61,6 +63,8 @@ public class Matching implements Cloneable, Serializable { public Matching maxLength(int maxLength) { this.maxLength = maxLength; return this; } public Integer maxTermOccurrences() { return maxTermOccurrences; } public Matching maxTermOccurrences(int maxTermOccurrences) { this.maxTermOccurrences = maxTermOccurrences; return this; } + public Integer maxTokenLength() { return maxTokenLength; } + public Matching maxTokenLength(int maxTokenLength) { this.maxTokenLength = maxTokenLength; return this; } public boolean isTypeUserSet() { return typeUserSet; } public MatchAlgorithm getAlgorithm() { return algorithm; } diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java index 7659a1e6562..173eebe2a94 100644 --- a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java @@ -44,6 +44,7 @@ public class ConvertParsedFields { parsed.getGramSize().ifPresent(gramSize -> field.getMatching().setGramSize(gramSize)); parsed.getMaxLength().ifPresent(maxLength -> field.getMatching().maxLength(maxLength)); parsed.getMaxTermOccurrences().ifPresent(maxTermOccurrences -> field.getMatching().maxTermOccurrences(maxTermOccurrences)); + parsed.getMaxTokenLength().ifPresent(maxTokenLength -> field.getMatching().maxTokenLength(maxTokenLength)); parsed.getMatchAlgorithm().ifPresent (matchingAlgorithm -> field.setMatchingAlgorithm(matchingAlgorithm)); parsed.getExactTerminator().ifPresent diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java index c7d1a215ce3..bac2c894283 100644 --- a/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java @@ -23,6 +23,7 @@ public class ParsedMatchSettings { private Integer gramSize = null; private Integer maxLength = null; private Integer maxTermOccurrences = null; + private Integer maxTokenLength = null; Optional getMatchType() { return Optional.ofNullable(matchType); } Optional getMatchCase() { return Optional.ofNullable(matchCase); } @@ -31,6 +32,7 @@ public class ParsedMatchSettings { Optional getGramSize() { return Optional.ofNullable(gramSize); } Optional getMaxLength() { return Optional.ofNullable(maxLength); } Optional getMaxTermOccurrences() { return Optional.ofNullable(maxTermOccurrences); } + Optional getMaxTokenLength() { return Optional.ofNullable(maxTokenLength); } // TODO - consider allowing each set only once: void setType(MatchType value) { this.matchType = value; } @@ -40,5 +42,6 @@ public class ParsedMatchSettings { void setGramSize(int value) { this.gramSize = value; } void setMaxLength(int value) { this.maxLength = value; } void setMaxTermOccurrences(int value) { this.maxTermOccurrences = value; } + void setMaxTokenLength(int value) { this.maxTokenLength = value; } } diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java index 056c37a9830..4313ceb4be1 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression; import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; import com.yahoo.vespa.model.container.search.QueryProfiles; /** @@ -75,7 +76,11 @@ public class ExactMatch extends Processor { } ScriptExpression script = field.getIndexingScript(); if (new ExpressionSearcher<>(IndexExpression.class).containedIn(script)) { - field.setIndexingScript(schema.getName(), (ScriptExpression)new MyProvider(schema).convert(field.getIndexingScript())); + var maxTokenLength = field.getMatching().maxTokenLength(); + if (maxTokenLength == null) { + maxTokenLength = AnnotatorConfig.getDefaultMaxTokenLength(); + } + field.setIndexingScript(schema.getName(), (ScriptExpression)new MyProvider(schema, maxTokenLength).convert(field.getIndexingScript())); } } @@ -85,8 +90,12 @@ public class ExactMatch extends Processor { private static class MyProvider extends TypedTransformProvider { - MyProvider(Schema schema) { + private int maxTokenLength; + + MyProvider(Schema schema, int maxTokenLength) + { super(ExactExpression.class, schema); + this.maxTokenLength = maxTokenLength; } @Override @@ -96,7 +105,7 @@ public class ExactMatch extends Processor { @Override protected Expression newTransform(DataType fieldType) { - Expression exp = new ExactExpression(); + Expression exp = new ExactExpression(maxTokenLength); if (fieldType instanceof CollectionDataType) { exp = new ForEachExpression(exp); } diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java index 1ff019038fc..3f23cbc9b2d 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java @@ -70,6 +70,10 @@ public class TextMatch extends Processor { if (maxTermOccurrences != null) { ret.setMaxTermOccurrences(maxTermOccurrences); } + var maxTokenLength = fieldMatching.maxTokenLength(); + if (maxTokenLength != null) { + ret.setMaxTokenLength(maxTokenLength); + } } return ret; } diff --git a/config-model/src/main/javacc/SchemaParser.jj b/config-model/src/main/javacc/SchemaParser.jj index b40f2d0796d..1365c133932 100644 --- a/config-model/src/main/javacc/SchemaParser.jj +++ b/config-model/src/main/javacc/SchemaParser.jj @@ -183,6 +183,7 @@ TOKEN : | < GRAM_SIZE: "gram-size" > | < MAX_LENGTH: "max-length" > | < MAX_OCCURRENCES: "max-occurrences" > +| < MAX_TOKEN_LENGTH: "max-token-length" > | < PREFIX: "prefix" > | < SUBSTRING: "substring" > | < SUFFIX: "suffix" > @@ -1368,7 +1369,8 @@ void matchType(ParsedMatchSettings matchInfo) : { } */ void matchItem(ParsedMatchSettings matchInfo) : { } { - ( matchType(matchInfo) | exactTerminator(matchInfo) | gramSize(matchInfo) | matchSize(matchInfo) | maxTermOccurrences(matchInfo)) + ( matchType(matchInfo) | exactTerminator(matchInfo) | gramSize(matchInfo) | matchSize(matchInfo) | + maxTermOccurrences(matchInfo) | maxTokenLength(matchInfo) ) } void exactTerminator(ParsedMatchSettings matchInfo) : @@ -1413,6 +1415,16 @@ void maxTermOccurrences(ParsedMatchSettings matchInfo) : } } +void maxTokenLength(ParsedMatchSettings matchInfo) : +{ + int maxTokenLength; +} +{ + maxTokenLength = integer() { + matchInfo.setMaxTokenLength(maxTokenLength); + } +} + /** * Consumes a rank statement of a field element. * diff --git a/config-model/src/test/java/com/yahoo/schema/parser/SchemaParserTestCase.java b/config-model/src/test/java/com/yahoo/schema/parser/SchemaParserTestCase.java index 34ca6c30a61..4186e352388 100644 --- a/config-model/src/test/java/com/yahoo/schema/parser/SchemaParserTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/parser/SchemaParserTestCase.java @@ -170,6 +170,23 @@ public class SchemaParserTestCase { assertEquals(11, field.matchSettings().getMaxTermOccurrences().get()); } + @Test + void maxTokenLengthCanBeParsed() throws Exception { + String input = joinLines + ("schema foo {", + " document foo {", + " field bar type string {", + " indexing: summary | index", + " match { max-token-length: 11 }", + " }", + " }", + "}"); + ParsedSchema schema = parseString(input); + var field = schema.getDocument().getFields().get(0); + assertEquals("bar", field.name()); + assertEquals(11, field.matchSettings().getMaxTokenLength().get()); + } + void checkFileParses(String fileName) throws Exception { var schema = parseFile(fileName); assertNotNull(schema); diff --git a/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java b/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java index de99d46b9ca..355a810f5ff 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java @@ -10,6 +10,7 @@ import com.yahoo.schema.Schema; import com.yahoo.schema.ApplicationBuilder; import com.yahoo.schema.AbstractSchemaTestCase; import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.MatchType; import com.yahoo.schema.document.SDDocumentType; import com.yahoo.schema.document.SDField; import com.yahoo.vespa.documentmodel.SummaryField; @@ -155,6 +156,24 @@ public class IndexingScriptRewriterTestCase extends AbstractSchemaTestCase { field); } + @Test + void requireThatMaxTokenLengthIsPropagated() { + var field = new SDField("test", DataType.STRING); + field.getMatching().maxTokenLength(10); + field.parseIndexingScript("test", "{ summary | index }"); + assertIndexingScript("{ input test | tokenize normalize stem:\"BEST\" max-token-length:10 | summary test | index test; }", + field); + } + + @Test + void requireThatMaxTokenLengthIsPropagatedForWordMatch() { + var field = new SDField("test", DataType.STRING); + field.getMatching().maxTokenLength(10).setType(MatchType.WORD); + field.parseIndexingScript("test", "{ summary | index }"); + assertIndexingScript("{ input test | exact max-token-length:10 | summary test | index test; }", + field); + } + private static void assertIndexingScript(String expectedScript, SDField unprocessedField) { assertEquals(expectedScript, processField(unprocessedField).toString()); diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExactExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExactExpression.java index 855430f45fc..7481363b737 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExactExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExactExpression.java @@ -12,6 +12,9 @@ import com.yahoo.document.annotation.SpanTrees; import com.yahoo.document.datatypes.IntegerFieldValue; import com.yahoo.document.datatypes.StringFieldValue; import com.yahoo.language.process.TokenType; +import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; + +import java.util.OptionalInt; import static com.yahoo.language.LinguisticsCase.toLowerCase; @@ -20,8 +23,19 @@ import static com.yahoo.language.LinguisticsCase.toLowerCase; */ public final class ExactExpression extends Expression { - public ExactExpression() { + private int maxTokenLength; + + private ExactExpression(OptionalInt maxTokenLength) { super(DataType.STRING); + this.maxTokenLength = maxTokenLength.isPresent() ? maxTokenLength.getAsInt() : AnnotatorConfig.getDefaultMaxTokenLength(); + } + + public ExactExpression() { + this(OptionalInt.empty());; + } + + public ExactExpression(int maxTokenLength) { + this(OptionalInt.of(maxTokenLength)); } @Override @@ -36,6 +50,12 @@ public final class ExactExpression extends Expression { String next = toLowerCase(prev); SpanTree tree = output.getSpanTree(SpanTrees.LINGUISTICS); + if (next.length() > maxTokenLength) { + if (tree != null) { + output.removeSpanTree(SpanTrees.LINGUISTICS); + } + return; + } SpanList root; if (tree == null) { root = new SpanList(); @@ -64,8 +84,14 @@ public final class ExactExpression extends Expression { } @Override - public String toString() { - return "exact"; + public String toString() + { + StringBuilder ret = new StringBuilder(); + ret.append("exact"); + if (maxTokenLength != AnnotatorConfig.getDefaultMaxTokenLength()) { + ret.append(" max-token-length:" + maxTokenLength); + } + return ret.toString(); } @Override diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java index 849bc075a64..a3c404e50c3 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java @@ -69,6 +69,9 @@ public final class TokenizeExpression extends Expression { if (config.hasNonDefaultMaxTokenizeLength()) { ret.append(" max-length:" + config.getMaxTokenizeLength()); } + if (config.hasNonDefaultMaxTokenLength()) { + ret.append(" max-token-length:" + config.getMaxTokenLength()); + } if (config.hasNonDefaultMaxTermOccurrences()) { ret.append(" max-occurrences:" + config.getMaxTermOccurrences()); } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java index 4e5ef0d90df..6522e284fc8 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java @@ -14,14 +14,17 @@ public class AnnotatorConfig implements Cloneable { private StemMode stemMode; private boolean removeAccents; private int maxTermOccurrences; + private int maxTokenLength; private int maxTokenizeLength; public static final int DEFAULT_MAX_TERM_OCCURRENCES; + private static final int DEFAULT_MAX_TOKEN_LENGTH; private static final int DEFAULT_MAX_TOKENIZE_LENGTH; static { IlscriptsConfig defaults = new IlscriptsConfig(new IlscriptsConfig.Builder()); DEFAULT_MAX_TERM_OCCURRENCES = defaults.maxtermoccurrences(); + DEFAULT_MAX_TOKEN_LENGTH = defaults.maxtokenlength(); DEFAULT_MAX_TOKENIZE_LENGTH = defaults.fieldmatchmaxlength(); } @@ -30,6 +33,7 @@ public class AnnotatorConfig implements Cloneable { stemMode = StemMode.NONE; removeAccents = false; maxTermOccurrences = DEFAULT_MAX_TERM_OCCURRENCES; + maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; maxTokenizeLength = DEFAULT_MAX_TOKENIZE_LENGTH; } @@ -38,6 +42,7 @@ public class AnnotatorConfig implements Cloneable { stemMode = rhs.stemMode; removeAccents = rhs.removeAccents; maxTermOccurrences = rhs.maxTermOccurrences; + maxTokenLength = rhs.maxTokenLength; maxTokenizeLength = rhs.maxTokenizeLength; } @@ -82,6 +87,17 @@ public class AnnotatorConfig implements Cloneable { return this; } + public AnnotatorConfig setMaxTokenLength(int maxTokenLength) { + this.maxTokenLength = maxTokenLength; + return this; + } + + public int getMaxTokenLength() { + return maxTokenLength; + } + + public static int getDefaultMaxTokenLength() { return DEFAULT_MAX_TOKEN_LENGTH; } + public AnnotatorConfig setMaxTokenizeLength(int maxTokenizeLength) { this.maxTokenizeLength = maxTokenizeLength; return this; @@ -91,6 +107,10 @@ public class AnnotatorConfig implements Cloneable { return maxTokenizeLength; } + public boolean hasNonDefaultMaxTokenLength() { + return maxTokenLength != DEFAULT_MAX_TOKEN_LENGTH; + } + public boolean hasNonDefaultMaxTokenizeLength() { return maxTokenizeLength != DEFAULT_MAX_TOKENIZE_LENGTH; } @@ -116,6 +136,9 @@ public class AnnotatorConfig implements Cloneable { if (maxTermOccurrences != rhs.maxTermOccurrences) { return false; } + if (maxTokenLength != rhs.maxTokenLength) { + return false; + } if (maxTokenizeLength != rhs.maxTokenizeLength) { return false; } @@ -125,7 +148,7 @@ public class AnnotatorConfig implements Cloneable { @Override public int hashCode() { return getClass().hashCode() + language.hashCode() + stemMode.hashCode() + - Boolean.valueOf(removeAccents).hashCode() + maxTermOccurrences + maxTokenizeLength; + Boolean.valueOf(removeAccents).hashCode() + maxTermOccurrences + maxTokenLength + maxTokenizeLength; } } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java index 86d4e91a567..913b874c6f6 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java @@ -78,7 +78,8 @@ public class LinguisticsAnnotator { TermOccurrences termOccurrences = new TermOccurrences(config.getMaxTermOccurrences()); SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS); for (Token token : tokens) - addAnnotationSpan(text.getString(), tree.spanList(), token, config.getStemMode(), termOccurrences); + addAnnotationSpan(text.getString(), tree.spanList(), token, config.getStemMode(), termOccurrences, + config.getMaxTokenLength()); if (tree.numAnnotations() == 0) return false; text.setSpanTree(tree); @@ -100,17 +101,22 @@ public class LinguisticsAnnotator { return new Annotation(AnnotationTypes.TERM, new StringFieldValue(term)); } - private static void addAnnotation(Span here, String term, String orig, TermOccurrences termOccurrences) { + private static void addAnnotation(Span here, String term, String orig, TermOccurrences termOccurrences, + int maxTokenLength) { + if (term.length() > maxTokenLength) { + return; + } if (termOccurrences.termCountBelowLimit(term)) { here.annotate(termAnnotation(term, orig)); } } - private static void addAnnotationSpan(String input, SpanList parent, Token token, StemMode mode, TermOccurrences termOccurrences) { + private static void addAnnotationSpan(String input, SpanList parent, Token token, StemMode mode, + TermOccurrences termOccurrences, int maxTokenLength) { if ( ! token.isSpecialToken()) { if (token.getNumComponents() > 0) { for (int i = 0; i < token.getNumComponents(); ++i) { - addAnnotationSpan(input, parent, token.getComponent(i), mode, termOccurrences); + addAnnotationSpan(input, parent, token.getComponent(i), mode, termOccurrences, maxTokenLength); } return; } @@ -130,18 +136,21 @@ public class LinguisticsAnnotator { String lowercasedOrig = toLowerCase(token.getOrig()); String term = token.getTokenString(); if (term != null) { - addAnnotation(where, term, token.getOrig(), termOccurrences); + addAnnotation(where, term, token.getOrig(), termOccurrences, maxTokenLength); if ( ! term.equals(lowercasedOrig)) - addAnnotation(where, lowercasedOrig, token.getOrig(), termOccurrences); + addAnnotation(where, lowercasedOrig, token.getOrig(), termOccurrences, maxTokenLength); } for (int i = 0; i < token.getNumStems(); i++) { String stem = token.getStem(i); if (! (stem.equals(lowercasedOrig) || stem.equals(term))) - addAnnotation(where, stem, token.getOrig(), termOccurrences); + addAnnotation(where, stem, token.getOrig(), termOccurrences, maxTokenLength); } } else { String term = token.getTokenString(); if (term == null || term.trim().isEmpty()) return; + if (term.length() > maxTokenLength) { + return; + } if (termOccurrences.termCountBelowLimit(term)) { parent.span((int)token.getOffset(), token.getOrig().length()).annotate(termAnnotation(term, token.getOrig())); } diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj index 77591d3e54e..29ca5270db8 100644 --- a/indexinglanguage/src/main/javacc/IndexingParser.jj +++ b/indexinglanguage/src/main/javacc/IndexingParser.jj @@ -174,6 +174,7 @@ TOKEN : | | | + | | | | @@ -407,10 +408,13 @@ Expression embedExp() : { return new EmbedExpression(embedders, embedderId, embedderArguments); } } -Expression exactExp() : { } +Expression exactExp() : { - ( ) - { return new ExactExpression(); } + int maxTokenLength = annotatorCfg.getMaxTokenLength(); +} +{ + ( [ maxTokenLength = integer() ] ) + { return new ExactExpression(maxTokenLength); } } Expression flattenExp() : { } @@ -686,11 +690,13 @@ AnnotatorConfig tokenizeCfg() : String str = "SHORTEST"; Integer maxLength; Integer maxTermOccurrences; + Integer maxTokenLength; } { ( ( str = string() ) ? { val.setStemMode(str); } | maxLength = integer() { val.setMaxTokenizeLength(maxLength); } | maxTermOccurrences = integer() { val.setMaxTermOccurrences(maxTermOccurrences); } | + maxTokenLength = integer() { val.setMaxTokenLength(maxTokenLength); } | { val.setRemoveAccents(true); } )+ { return val; } } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java index 403d1820f70..b338c45f7a4 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java @@ -62,6 +62,15 @@ public class ExactTestCase { assertAnnotation(0, 3, null, (StringFieldValue)ctx.getValue()); } + @Test + public void requireThatLongStringsAreNotAnnotated() { + ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter()); + ctx.setValue(new StringFieldValue("foo")); + new ExactExpression(2).execute(ctx); + + assertNull(((StringFieldValue)ctx.getValue()).getSpanTree(SpanTrees.LINGUISTICS)); + } + @Test public void requireThatEmptyStringsAreNotAnnotated() { ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter()); diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java index 01ffbe359f3..7ed3ab410a3 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java @@ -62,4 +62,15 @@ public class TokenizeTestCase { assertTrue(val instanceof StringFieldValue); assertNotNull(((StringFieldValue)val).getSpanTree(SpanTrees.LINGUISTICS)); } + + @Test + public void requireThatLongWordIsDropped() { + ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter()); + ctx.setValue(new StringFieldValue("foo")); + new TokenizeExpression(new SimpleLinguistics(), new AnnotatorConfig().setMaxTokenLength(2)).execute(ctx); + + FieldValue val = ctx.getValue(); + assertTrue(val instanceof StringFieldValue); + assertNull(((StringFieldValue)val).getSpanTree(SpanTrees.LINGUISTICS)); + } } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java index 0d34d2841fd..c3131e28906 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java @@ -27,6 +27,8 @@ public class AnnotatorConfigTestCase { assertTrue(config.getRemoveAccents()); config.setRemoveAccents(false); assertFalse(config.getRemoveAccents()); + config.setMaxTokenLength(10); + assertEquals(10, config.getMaxTokenLength()); } @Test @@ -35,11 +37,13 @@ public class AnnotatorConfigTestCase { config.setLanguage(Language.ARABIC); config.setStemMode(StemMode.SHORTEST); config.setRemoveAccents(!config.getRemoveAccents()); + config.setMaxTokenLength(11); AnnotatorConfig other = new AnnotatorConfig(config); assertEquals(config.getLanguage(), other.getLanguage()); assertEquals(config.getStemMode(), other.getStemMode()); assertEquals(config.getRemoveAccents(), other.getRemoveAccents()); + assertEquals(config.getMaxTokenLength(), other.getMaxTokenLength()); } @Test @@ -49,6 +53,7 @@ public class AnnotatorConfigTestCase { assertFalse(config.equals(newConfig(Language.SPANISH, StemMode.SHORTEST, false))); assertFalse(config.equals(newConfig(Language.DUTCH, StemMode.SHORTEST, false))); assertFalse(config.equals(newConfig(Language.DUTCH, StemMode.NONE, false))); + assertNotEquals(config, newConfig(Language.DUTCH, StemMode.NONE, true).setMaxTokenLength(10)); assertEquals(config, newConfig(Language.DUTCH, StemMode.NONE, true)); assertEquals(config.hashCode(), newConfig(Language.DUTCH, StemMode.NONE, true).hashCode()); } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java index a7ed7ae3e72..1b7c6973f1e 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java @@ -27,6 +27,7 @@ public class ExpressionTestCase { assertExpression(ClearStateExpression.class, "clear_state"); assertExpression(EchoExpression.class, "echo"); assertExpression(ExactExpression.class, "exact"); + assertExpression(ExactExpression.class, "exact max-token-length: 10", Optional.of("exact max-token-length:10")); assertExpression(FlattenExpression.class, "flatten"); assertExpression(ForEachExpression.class, "for_each { 1 }"); assertExpression(GetFieldExpression.class, "get_field field1"); @@ -73,6 +74,7 @@ public class ExpressionTestCase { assertExpression(TokenizeExpression.class, "tokenize stem:\"ALL\""); assertExpression(TokenizeExpression.class, "tokenize normalize"); assertExpression(TokenizeExpression.class, "tokenize max-occurrences: 15", Optional.of("tokenize max-occurrences:15")); + assertExpression(TokenizeExpression.class, "tokenize max-token-length: 15", Optional.of("tokenize max-token-length:15")); assertExpression(ToLongExpression.class, "to_long"); assertExpression(ToPositionExpression.class, "to_pos"); assertExpression(ToStringExpression.class, "to_string"); -- cgit v1.2.3 From 96b9b3006a73715e1caa8c4caef81ed7aa5ef09f Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 7 May 2024 16:21:23 +0200 Subject: Unit test that struct field with index and exact match gives expected log warning. --- .../schema/processing/TypedTransformProvider.java | 4 ++++ .../validation/ComplexFieldsValidatorTestCase.java | 26 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java b/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java index 8ccc8870419..3d4934ed841 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java @@ -29,6 +29,10 @@ public abstract class TypedTransformProvider extends ValueTransformProvider { protected final boolean requiresTransform(Expression exp) { if (exp instanceof OutputExpression) { String fieldName = ((OutputExpression)exp).getFieldName(); + if (fieldName == null) { + // Incomplete output expressions never require a transform. + return false; + } if (exp instanceof AttributeExpression) { Attribute attribute = schema.getAttribute(fieldName); if (attribute == null) diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ComplexFieldsValidatorTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ComplexFieldsValidatorTestCase.java index ae1db366c9f..2e51a425f6d 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ComplexFieldsValidatorTestCase.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ComplexFieldsValidatorTestCase.java @@ -142,6 +142,32 @@ public class ComplexFieldsValidatorTestCase { "Remove setting or change to 'indexing: attribute' if needed for matching.")); } + @Test + void logs_warning_when_complex_fields_have_struct_fields_with_index_and_exact_match() throws IOException, SAXException { + var logger = new MyLogger(); + createModelAndValidate(joinLines( + "schema test {", + " document test {", + " field nesteds type array {", + " struct-field foo {", + " indexing: attribute | index", + " match {", + " exact", + " exact-terminator: '@@'", + " }", + " }", + " }", + " struct nested {", + " field foo type string {}", + " }", + " }", + "}"), logger); + assertTrue(logger.message.toString().contains("For cluster 'mycluster', schema 'test': " + + "The following complex fields have struct fields with 'indexing: index' which is " + + "not supported and has no effect: nesteds (nesteds.foo). " + + "Remove setting or change to 'indexing: attribute' if needed for matching.")); + } + @Test void validation_passes_when_only_supported_struct_field_attributes_are_used() throws IOException, SAXException { createModelAndValidate(joinLines("search test {", -- cgit v1.2.3 From 306c16b72b60c1d2a0cad29ce347f1f9ff368b7f Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 13 May 2024 15:53:33 +0200 Subject: No need to implement TypedKey interface naymore. --- .../main/java/com/yahoo/vespa/documentmodel/SummaryField.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java index d50d5e36134..785b45d8def 100644 --- a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java +++ b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java @@ -3,10 +3,12 @@ package com.yahoo.vespa.documentmodel; import com.yahoo.document.DataType; import com.yahoo.document.Field; -import com.yahoo.schema.document.TypedKey; import java.io.Serializable; -import java.util.*; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; import java.util.stream.Collectors; import static com.yahoo.text.Lowercase.toLowerCase; @@ -16,7 +18,7 @@ import static com.yahoo.text.Lowercase.toLowerCase; * * @author bratseth */ -public class SummaryField extends Field implements Cloneable, TypedKey { +public class SummaryField extends Field implements Cloneable { /** A source (field name). */ public static class Source implements Serializable { @@ -62,7 +64,7 @@ public class SummaryField extends Field implements Cloneable, TypedKey { */ private Set sources = new java.util.LinkedHashSet<>(); - private Set destinations =new java.util.LinkedHashSet<>(); + private Set destinations = new java.util.LinkedHashSet<>(); /** True if this field was defined implicitly */ private boolean implicit = false; -- cgit v1.2.3 From 2b8a5e1671934ae4749484157d3d1a8acddcacb1 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 13 May 2024 15:57:14 +0200 Subject: GC TypedKey --- .../main/java/com/yahoo/schema/document/SDField.java | 6 +++--- .../java/com/yahoo/schema/document/TypedKey.java | 20 -------------------- 2 files changed, 3 insertions(+), 23 deletions(-) delete mode 100644 config-model/src/main/java/com/yahoo/schema/document/TypedKey.java (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/schema/document/SDField.java b/config-model/src/main/java/com/yahoo/schema/document/SDField.java index f165141b16e..2483fa47667 100644 --- a/config-model/src/main/java/com/yahoo/schema/document/SDField.java +++ b/config-model/src/main/java/com/yahoo/schema/document/SDField.java @@ -46,7 +46,7 @@ import java.util.TreeMap; * * @author bratseth */ -public class SDField extends Field implements TypedKey, ImmutableSDField { +public class SDField extends Field implements ImmutableSDField { /** Use this field for modifying index-structure, even if it doesn't have any indexing code */ private boolean indexStructureField = false; @@ -315,7 +315,7 @@ public class SDField extends Field implements TypedKey, ImmutableSDField { supplyStructField.accept(field.getName(), field.getDataType()); } } - if ((subType == null) && (structFields.size() > 0)) { + if ((subType == null) && (!structFields.isEmpty())) { throw new IllegalArgumentException("Cannot find matching (repo=" + sdoc + ") for subfields in " + this + " [" + getDataType() + getDataType().getClass() + "] with " + structFields.size() + " struct fields"); @@ -627,7 +627,7 @@ public class SDField extends Field implements TypedKey, ImmutableSDField { public Attribute addAttribute(Attribute attribute) { String name = attribute.getName(); - if (name == null || "".equals(name)) { + if (name == null || name.isEmpty()) { name = getName(); attribute.setName(name); } diff --git a/config-model/src/main/java/com/yahoo/schema/document/TypedKey.java b/config-model/src/main/java/com/yahoo/schema/document/TypedKey.java deleted file mode 100644 index 652d21d7f7d..00000000000 --- a/config-model/src/main/java/com/yahoo/schema/document/TypedKey.java +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.schema.document; - -import com.yahoo.document.DataType; - -/** - * Common interface for various typed key (or field definitions). - * Used by code which wants to use common algorithms for dealing with typed keys, like the logical mapping - * - * @author bratseth - */ -public interface TypedKey { - - String getName(); - - void setDataType(DataType type); - - DataType getDataType(); - -} -- cgit v1.2.3 From c606238e6f094e12bd224bef2adce544dd34638a Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 13 May 2024 16:05:43 +0200 Subject: Don't implicitly set word match for tensor attribute fields. --- .../schema/processing/AttributesImplicitWord.java | 3 ++ .../src/test/derived/attributes/index-info.cfg | 2 -- .../nearestneighbor_streaming/vsmfields.cfg | 8 ++--- .../src/test/derived/tensor/index-info.cfg | 10 ------ .../change/search/ContentClusterFixture.java | 15 +++++--- .../search/IndexingScriptChangeValidatorTest.java | 40 ++++++++++++++++++++++ 6 files changed, 58 insertions(+), 20 deletions(-) (limited to 'config-model/src') diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AttributesImplicitWord.java b/config-model/src/main/java/com/yahoo/schema/processing/AttributesImplicitWord.java index 767593b82d0..769f0c9de92 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/AttributesImplicitWord.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/AttributesImplicitWord.java @@ -2,6 +2,7 @@ package com.yahoo.schema.processing; import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.TensorDataType; import com.yahoo.schema.RankProfileRegistry; import com.yahoo.document.DataType; import com.yahoo.schema.Schema; @@ -45,6 +46,8 @@ public class AttributesImplicitWord extends Processor { private boolean fieldImplicitlyWordMatch(ImmutableSDField field) { // numeric types should not trigger exact-match query parsing if (field.getDataType().getPrimitiveType() instanceof NumericDataType) return false; + // Tensor type should not trigger exact-match query parsing + if (field.getDataType() instanceof TensorDataType) return false; return (! field.hasIndex() && !field.getAttributes().isEmpty() diff --git a/config-model/src/test/derived/attributes/index-info.cfg b/config-model/src/test/derived/attributes/index-info.cfg index 1d4e8f485b3..245cff48d15 100644 --- a/config-model/src/test/derived/attributes/index-info.cfg +++ b/config-model/src/test/derived/attributes/index-info.cfg @@ -175,8 +175,6 @@ indexinfo[].command[].indexname "a13" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "a13" indexinfo[].command[].command "type tensor(x{})" -indexinfo[].command[].indexname "a13" -indexinfo[].command[].command "word" indexinfo[].command[].indexname "a7_arr" indexinfo[].command[].command "lowercase" indexinfo[].command[].indexname "a7_arr" diff --git a/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg b/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg index ab9a96f819b..ec06d01f05a 100644 --- a/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg +++ b/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg @@ -3,25 +3,25 @@ searchall 1 fieldspec[].name "vec_a" fieldspec[].searchmethod NEAREST_NEIGHBOR fieldspec[].arg1 "EUCLIDEAN" -fieldspec[].normalize LOWERCASE +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "vec_b" fieldspec[].searchmethod NEAREST_NEIGHBOR fieldspec[].arg1 "ANGULAR" -fieldspec[].normalize LOWERCASE +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "vec_c" fieldspec[].searchmethod NEAREST_NEIGHBOR fieldspec[].arg1 "INNERPRODUCT" -fieldspec[].normalize LOWERCASE +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "vec_d" fieldspec[].searchmethod NONE fieldspec[].arg1 "" -fieldspec[].normalize LOWERCASE +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE documenttype[].name "test" diff --git a/config-model/src/test/derived/tensor/index-info.cfg b/config-model/src/test/derived/tensor/index-info.cfg index c9ce2433e17..2402f074837 100644 --- a/config-model/src/test/derived/tensor/index-info.cfg +++ b/config-model/src/test/derived/tensor/index-info.cfg @@ -9,26 +9,18 @@ indexinfo[].command[].indexname "f2" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "f2" indexinfo[].command[].command "type tensor(x[2],y[1])" -indexinfo[].command[].indexname "f2" -indexinfo[].command[].command "word" indexinfo[].command[].indexname "f3" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "f3" indexinfo[].command[].command "type tensor(x{})" -indexinfo[].command[].indexname "f3" -indexinfo[].command[].command "word" indexinfo[].command[].indexname "f4" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "f4" indexinfo[].command[].command "type tensor(x[10],y[10])" -indexinfo[].command[].indexname "f4" -indexinfo[].command[].command "word" indexinfo[].command[].indexname "f5" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "f5" indexinfo[].command[].command "type tensor(x[10])" -indexinfo[].command[].indexname "f5" -indexinfo[].command[].command "word" indexinfo[].command[].indexname "f6" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "f6" @@ -39,5 +31,3 @@ indexinfo[].command[].indexname "f7" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "f7" indexinfo[].command[].command "type tensor(p{},x[5])" -indexinfo[].command[].indexname "f7" -indexinfo[].command[].command "word" diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/search/ContentClusterFixture.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/search/ContentClusterFixture.java index 8778f0c26c0..0677cabafb0 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/search/ContentClusterFixture.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/search/ContentClusterFixture.java @@ -29,11 +29,18 @@ public abstract class ContentClusterFixture { nextCluster = createCluster(nextSd); } + protected ContentClusterFixture(ContentCluster currentCluster, ContentCluster nextCluster) { + this.currentCluster = currentCluster; + this.nextCluster = nextCluster; + } + public ContentClusterFixture(String entireSd) throws Exception { - currentCluster = new ContentClusterBuilder().build( - ContentClusterUtils.createMockRoot(List.of(entireSd))); - nextCluster = new ContentClusterBuilder().build( - ContentClusterUtils.createMockRoot(List.of(entireSd))); + currentCluster = createClusterFromEntireSd(entireSd); + nextCluster = createClusterFromEntireSd(entireSd); + } + + protected static ContentCluster createClusterFromEntireSd(String sdContent) throws Exception { + return new ContentClusterBuilder().build(ContentClusterUtils.createMockRoot(List.of(sdContent))); } private static ContentCluster createCluster(String sdContent) throws Exception { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeValidatorTest.java index cd54a20523f..247f01068fa 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeValidatorTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeValidatorTest.java @@ -39,6 +39,21 @@ public class IndexingScriptChangeValidatorTest { } } + private static class ComplexFixture extends ContentClusterFixture { + IndexingScriptChangeValidator validator; + public ComplexFixture(String currentSd, String nextSd) throws Exception { + super(createClusterFromEntireSd(currentSd), createClusterFromEntireSd(nextSd)); + validator = new IndexingScriptChangeValidator(ClusterSpec.Id.from("test"), + currentDb().getDerivedConfiguration().getSchema(), + nextDb().getDerivedConfiguration().getSchema()); + } + + @Override + public List validate() { + return validator.validate(); + } + } + private static class ScriptFixture { private final ScriptExpression currentScript; @@ -56,6 +71,9 @@ public class IndexingScriptChangeValidatorTest { private static final String FIELD = "field f1 type string"; private static final String FIELD_F2 = "field f2 type string"; + private static final String TENSOR_FIELD_F1 = "field f1 type tensor(x[2])"; + private static final String TENSOR_FIELD_F2 = "field f2 type tensor(x[2])"; + private static final String TENSOR_FIELD_F3 = "field f3 type tensor(x[2])"; private static VespaConfigChangeAction expectedReindexingAction(String changedMsg, String fromScript, String toScript) { return expectedReindexingAction("f1", changedMsg, fromScript, toScript); @@ -114,6 +132,28 @@ public class IndexingScriptChangeValidatorTest { "{ input f1 | ngram 3 | index f1; }")); } + @Test + void requireThatAddingIndexAspectForExtraTensorFieldWithChangedInputRequireReindexing() throws Exception { + new ComplexFixture(joinLines("schema test {", + " document test {", + " " + TENSOR_FIELD_F1 + " { }", + " " + TENSOR_FIELD_F2 + " { }", + " }", + " " + TENSOR_FIELD_F3 + " { indexing: input f1 | attribute }", + "}"), + joinLines("schema test {", + " document test {", + " " + TENSOR_FIELD_F1 + " { }", + " " + TENSOR_FIELD_F2 + " { }", + " }", + " " + TENSOR_FIELD_F3 + " { indexing: input f2 | index | attribute }", + "}")). + assertValidation(List.of(expectedReindexingAction("f3", "add index aspect", + "{ input f1 | attribute f3; }", + "{ input f2 | index f3 | attribute f3; }"))); + } + + @Test void requireThatSettingDynamicSummaryIsOk() throws Exception { new Fixture(FIELD + " { indexing: summary }", -- cgit v1.2.3