diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-01-04 11:58:01 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-01-04 13:37:52 +0100 |
commit | abf6ddb95051163f7c383b989d9148cbef19ab24 (patch) | |
tree | 98b87a86dc0cf849db87ed9f57c30685da9373bd /indexinglanguage/src/main | |
parent | 1f7bfaf60d05e4b55a926b46e10623448ce31308 (diff) |
Enable setting max-occurrences in field match.
Diffstat (limited to 'indexinglanguage/src/main')
3 files changed, 10 insertions, 0 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java index 169b79a62af..b807ad4cb65 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java @@ -69,6 +69,9 @@ public final class TokenizeExpression extends Expression { if (config.hasNonDefaultMaxTokenLength()) { ret.append(" max-length:" + config.getMaxTokenizeLength()); } + if (config.hasNonDefaultMaxTermOccurrences()) { + ret.append(" max-occurrences:" + config.getMaxTermOccurrences()); + } return ret.toString(); } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java index 5c1bf0813c4..7b6f350d831 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java @@ -95,6 +95,10 @@ public class AnnotatorConfig implements Cloneable { return maxTokenizeLength != DEFAULT_MAX_TOKENIZE_LENGTH; } + public boolean hasNonDefaultMaxTermOccurrences() { + return maxTermOccurrences != DEFAULT_MAX_TERM_OCCURRENCES; + } + @Override public boolean equals(Object obj) { if (!(obj instanceof AnnotatorConfig rhs)) { diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj index ea05f33d745..42bbd26cee6 100644 --- a/indexinglanguage/src/main/javacc/IndexingParser.jj +++ b/indexinglanguage/src/main/javacc/IndexingParser.jj @@ -173,6 +173,7 @@ TOKEN : <JOIN: "join"> | <LOWER_CASE: "lowercase"> | <MAX_LENGTH: "max-length"> | + <MAX_OCCURRENCES: "max-occurrences"> | <NGRAM: "ngram"> | <NORMALIZE: "normalize"> | <NOW: "now"> | @@ -664,10 +665,12 @@ AnnotatorConfig tokenizeCfg() : AnnotatorConfig val = new AnnotatorConfig(annotatorCfg); String str = "SHORTEST"; Integer maxLength; + Integer maxTermOccurrences; } { ( <STEM> ( <COLON> str = string() ) ? { val.setStemMode(str); } | <MAX_LENGTH> <COLON> maxLength = integer() { val.setMaxTokenLength(maxLength); } | + <MAX_OCCURRENCES> <COLON> maxTermOccurrences = integer() { val.setMaxTermOccurrences(maxTermOccurrences); } | <NORMALIZE> { val.setRemoveAccents(true); } )+ { return val; } } |