diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-01-04 11:58:01 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-01-04 13:37:52 +0100 |
commit | abf6ddb95051163f7c383b989d9148cbef19ab24 (patch) | |
tree | 98b87a86dc0cf849db87ed9f57c30685da9373bd /indexinglanguage | |
parent | 1f7bfaf60d05e4b55a926b46e10623448ce31308 (diff) |
Enable setting max-occurrences in field match.
Diffstat (limited to 'indexinglanguage')
4 files changed, 20 insertions, 0 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java index 169b79a62af..b807ad4cb65 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java @@ -69,6 +69,9 @@ public final class TokenizeExpression extends Expression { if (config.hasNonDefaultMaxTokenLength()) { ret.append(" max-length:" + config.getMaxTokenizeLength()); } + if (config.hasNonDefaultMaxTermOccurrences()) { + ret.append(" max-occurrences:" + config.getMaxTermOccurrences()); + } return ret.toString(); } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java index 5c1bf0813c4..7b6f350d831 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java @@ -95,6 +95,10 @@ public class AnnotatorConfig implements Cloneable { return maxTokenizeLength != DEFAULT_MAX_TOKENIZE_LENGTH; } + public boolean hasNonDefaultMaxTermOccurrences() { + return maxTermOccurrences != DEFAULT_MAX_TERM_OCCURRENCES; + } + @Override public boolean equals(Object obj) { if (!(obj instanceof AnnotatorConfig rhs)) { diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj index ea05f33d745..42bbd26cee6 100644 --- a/indexinglanguage/src/main/javacc/IndexingParser.jj +++ b/indexinglanguage/src/main/javacc/IndexingParser.jj @@ -173,6 +173,7 @@ TOKEN : <JOIN: "join"> | <LOWER_CASE: "lowercase"> | <MAX_LENGTH: "max-length"> | + <MAX_OCCURRENCES: "max-occurrences"> | <NGRAM: "ngram"> | <NORMALIZE: "normalize"> | <NOW: "now"> | @@ -664,10 +665,12 @@ AnnotatorConfig tokenizeCfg() : AnnotatorConfig val = new AnnotatorConfig(annotatorCfg); String str = "SHORTEST"; Integer maxLength; + Integer maxTermOccurrences; } { ( <STEM> ( <COLON> str = string() ) ? { val.setStemMode(str); } | <MAX_LENGTH> <COLON> maxLength = integer() { val.setMaxTokenLength(maxLength); } | + <MAX_OCCURRENCES> <COLON> maxTermOccurrences = integer() { val.setMaxTermOccurrences(maxTermOccurrences); } | <NORMALIZE> { val.setRemoveAccents(true); } )+ { return val; } } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java index 6acc2bf32f3..a7ed7ae3e72 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java @@ -7,6 +7,8 @@ import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.expressions.*; import org.junit.Test; +import java.util.Optional; + import static org.junit.Assert.assertEquals; /** @@ -70,6 +72,7 @@ public class ExpressionTestCase { assertExpression(TokenizeExpression.class, "tokenize stem:\"ALL\" normalize"); assertExpression(TokenizeExpression.class, "tokenize stem:\"ALL\""); assertExpression(TokenizeExpression.class, "tokenize normalize"); + assertExpression(TokenizeExpression.class, "tokenize max-occurrences: 15", Optional.of("tokenize max-occurrences:15")); assertExpression(ToLongExpression.class, "to_long"); assertExpression(ToPositionExpression.class, "to_pos"); assertExpression(ToStringExpression.class, "to_string"); @@ -85,9 +88,16 @@ public class ExpressionTestCase { } private static void assertExpression(Class expectedClass, String str) throws ParseException { + assertExpression(expectedClass, str, Optional.empty()); + } + + private static void assertExpression(Class expectedClass, String str, Optional<String> expStr) throws ParseException { Linguistics linguistics = new SimpleLinguistics(); Expression foo = Expression.fromString(str, linguistics, Embedder.throwsOnUse.asMap()); assertEquals(expectedClass, foo.getClass()); + if (expStr.isPresent()) { + assertEquals(expStr.get(), foo.toString()); + } Expression bar = Expression.fromString(foo.toString(), linguistics, Embedder.throwsOnUse.asMap()); assertEquals(foo.hashCode(), bar.hashCode()); assertEquals(foo, bar); |