diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-05-07 11:01:33 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-05-07 11:01:33 +0200 |
commit | 548ee1a5950a972ec5ef6a3be3d18f68035322a5 (patch) | |
tree | 55aff147a6c7453a2d0bd991b4f69c5b19452cd8 /config-model/src/main | |
parent | bd054f47f0b72deb604c77fb62a888695574eb31 (diff) |
Enable setting max-token-length in field match.
Diffstat (limited to 'config-model/src/main')
6 files changed, 37 insertions, 4 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java index 9d68553fa80..33256fa8586 100644 --- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java +++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java @@ -33,6 +33,8 @@ public class Matching implements Cloneable, Serializable { private Integer maxLength; /** Maximum number of occurrences for each term */ private Integer maxTermOccurrences; + /** Maximum number of characters in a token. */ + private Integer maxTokenLength; private String exactMatchTerminator = null; @@ -61,6 +63,8 @@ public class Matching implements Cloneable, Serializable { public Matching maxLength(int maxLength) { this.maxLength = maxLength; return this; } public Integer maxTermOccurrences() { return maxTermOccurrences; } public Matching maxTermOccurrences(int maxTermOccurrences) { this.maxTermOccurrences = maxTermOccurrences; return this; } + public Integer maxTokenLength() { return maxTokenLength; } + public Matching maxTokenLength(int maxTokenLength) { this.maxTokenLength = maxTokenLength; return this; } public boolean isTypeUserSet() { return typeUserSet; } public MatchAlgorithm getAlgorithm() { return algorithm; } diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java index 7659a1e6562..173eebe2a94 100644 --- a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java @@ -44,6 +44,7 @@ public class ConvertParsedFields { parsed.getGramSize().ifPresent(gramSize -> field.getMatching().setGramSize(gramSize)); parsed.getMaxLength().ifPresent(maxLength -> field.getMatching().maxLength(maxLength)); parsed.getMaxTermOccurrences().ifPresent(maxTermOccurrences -> field.getMatching().maxTermOccurrences(maxTermOccurrences)); + parsed.getMaxTokenLength().ifPresent(maxTokenLength -> field.getMatching().maxTokenLength(maxTokenLength)); parsed.getMatchAlgorithm().ifPresent (matchingAlgorithm -> field.setMatchingAlgorithm(matchingAlgorithm)); parsed.getExactTerminator().ifPresent diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java index c7d1a215ce3..bac2c894283 100644 --- a/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java @@ -23,6 +23,7 @@ public class ParsedMatchSettings { private Integer gramSize = null; private Integer maxLength = null; private Integer maxTermOccurrences = null; + private Integer maxTokenLength = null; Optional<MatchType> getMatchType() { return Optional.ofNullable(matchType); } Optional<Case> getMatchCase() { return Optional.ofNullable(matchCase); } @@ -31,6 +32,7 @@ public class ParsedMatchSettings { Optional<Integer> getGramSize() { return Optional.ofNullable(gramSize); } Optional<Integer> getMaxLength() { return Optional.ofNullable(maxLength); } Optional<Integer> getMaxTermOccurrences() { return Optional.ofNullable(maxTermOccurrences); } + Optional<Integer> getMaxTokenLength() { return Optional.ofNullable(maxTokenLength); } // TODO - consider allowing each set only once: void setType(MatchType value) { this.matchType = value; } @@ -40,5 +42,6 @@ public class ParsedMatchSettings { void setGramSize(int value) { this.gramSize = value; } void setMaxLength(int value) { this.maxLength = value; } void setMaxTermOccurrences(int value) { this.maxTermOccurrences = value; } + void setMaxTokenLength(int value) { this.maxTokenLength = value; } } diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java index 056c37a9830..4313ceb4be1 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression; import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; import com.yahoo.vespa.model.container.search.QueryProfiles; /** @@ -75,7 +76,11 @@ public class ExactMatch extends Processor { } ScriptExpression script = field.getIndexingScript(); if (new ExpressionSearcher<>(IndexExpression.class).containedIn(script)) { - field.setIndexingScript(schema.getName(), (ScriptExpression)new MyProvider(schema).convert(field.getIndexingScript())); + var maxTokenLength = field.getMatching().maxTokenLength(); + if (maxTokenLength == null) { + maxTokenLength = AnnotatorConfig.getDefaultMaxTokenLength(); + } + field.setIndexingScript(schema.getName(), (ScriptExpression)new MyProvider(schema, maxTokenLength).convert(field.getIndexingScript())); } } @@ -85,8 +90,12 @@ public class ExactMatch extends Processor { private static class MyProvider extends TypedTransformProvider { - MyProvider(Schema schema) { + private int maxTokenLength; + + MyProvider(Schema schema, int maxTokenLength) + { super(ExactExpression.class, schema); + this.maxTokenLength = maxTokenLength; } @Override @@ -96,7 +105,7 @@ public class ExactMatch extends Processor { @Override protected Expression newTransform(DataType fieldType) { - Expression exp = new ExactExpression(); + Expression exp = new ExactExpression(maxTokenLength); if (fieldType instanceof CollectionDataType) { exp = new ForEachExpression(exp); } diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java index 1ff019038fc..3f23cbc9b2d 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java @@ -70,6 +70,10 @@ public class TextMatch extends Processor { if (maxTermOccurrences != null) { ret.setMaxTermOccurrences(maxTermOccurrences); } + var maxTokenLength = fieldMatching.maxTokenLength(); + if (maxTokenLength != null) { + ret.setMaxTokenLength(maxTokenLength); + } } return ret; } diff --git a/config-model/src/main/javacc/SchemaParser.jj b/config-model/src/main/javacc/SchemaParser.jj index b40f2d0796d..1365c133932 100644 --- a/config-model/src/main/javacc/SchemaParser.jj +++ b/config-model/src/main/javacc/SchemaParser.jj @@ -183,6 +183,7 @@ TOKEN : | < GRAM_SIZE: "gram-size" > | < MAX_LENGTH: "max-length" > | < MAX_OCCURRENCES: "max-occurrences" > +| < MAX_TOKEN_LENGTH: "max-token-length" > | < PREFIX: "prefix" > | < SUBSTRING: "substring" > | < SUFFIX: "suffix" > @@ -1368,7 +1369,8 @@ void matchType(ParsedMatchSettings matchInfo) : { } */ void matchItem(ParsedMatchSettings matchInfo) : { } { - ( matchType(matchInfo) | exactTerminator(matchInfo) | gramSize(matchInfo) | matchSize(matchInfo) | maxTermOccurrences(matchInfo)) + ( matchType(matchInfo) | exactTerminator(matchInfo) | gramSize(matchInfo) | matchSize(matchInfo) | + maxTermOccurrences(matchInfo) | maxTokenLength(matchInfo) ) } void exactTerminator(ParsedMatchSettings matchInfo) : @@ -1413,6 +1415,16 @@ void maxTermOccurrences(ParsedMatchSettings matchInfo) : } } +void maxTokenLength(ParsedMatchSettings matchInfo) : +{ + int maxTokenLength; +} +{ + <MAX_TOKEN_LENGTH> <COLON> maxTokenLength = integer() { + matchInfo.setMaxTokenLength(maxTokenLength); + } +} + /** * Consumes a rank statement of a field element. * |