summaryrefslogtreecommitdiffstats
path: root/indexinglanguage/src/main
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2024-01-04 11:58:01 +0100
committerTor Egge <Tor.Egge@online.no>2024-01-04 13:37:52 +0100
commitabf6ddb95051163f7c383b989d9148cbef19ab24 (patch)
tree98b87a86dc0cf849db87ed9f57c30685da9373bd /indexinglanguage/src/main
parent1f7bfaf60d05e4b55a926b46e10623448ce31308 (diff)
Enable setting max-occurrences in field match.
Diffstat (limited to 'indexinglanguage/src/main')
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java3
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java4
-rw-r--r--indexinglanguage/src/main/javacc/IndexingParser.jj3
3 files changed, 10 insertions, 0 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java
index 169b79a62af..b807ad4cb65 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java
@@ -69,6 +69,9 @@ public final class TokenizeExpression extends Expression {
if (config.hasNonDefaultMaxTokenLength()) {
ret.append(" max-length:" + config.getMaxTokenizeLength());
}
+ if (config.hasNonDefaultMaxTermOccurrences()) {
+ ret.append(" max-occurrences:" + config.getMaxTermOccurrences());
+ }
return ret.toString();
}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
index 5c1bf0813c4..7b6f350d831 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
@@ -95,6 +95,10 @@ public class AnnotatorConfig implements Cloneable {
return maxTokenizeLength != DEFAULT_MAX_TOKENIZE_LENGTH;
}
+ public boolean hasNonDefaultMaxTermOccurrences() {
+ return maxTermOccurrences != DEFAULT_MAX_TERM_OCCURRENCES;
+ }
+
@Override
public boolean equals(Object obj) {
if (!(obj instanceof AnnotatorConfig rhs)) {
diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj
index ea05f33d745..42bbd26cee6 100644
--- a/indexinglanguage/src/main/javacc/IndexingParser.jj
+++ b/indexinglanguage/src/main/javacc/IndexingParser.jj
@@ -173,6 +173,7 @@ TOKEN :
<JOIN: "join"> |
<LOWER_CASE: "lowercase"> |
<MAX_LENGTH: "max-length"> |
+ <MAX_OCCURRENCES: "max-occurrences"> |
<NGRAM: "ngram"> |
<NORMALIZE: "normalize"> |
<NOW: "now"> |
@@ -664,10 +665,12 @@ AnnotatorConfig tokenizeCfg() :
AnnotatorConfig val = new AnnotatorConfig(annotatorCfg);
String str = "SHORTEST";
Integer maxLength;
+ Integer maxTermOccurrences;
}
{
( <STEM> ( <COLON> str = string() ) ? { val.setStemMode(str); } |
<MAX_LENGTH> <COLON> maxLength = integer() { val.setMaxTokenLength(maxLength); } |
+ <MAX_OCCURRENCES> <COLON> maxTermOccurrences = integer() { val.setMaxTermOccurrences(maxTermOccurrences); } |
<NORMALIZE> { val.setRemoveAccents(true); } )+
{ return val; }
}