aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2024-05-07 11:01:33 +0200
committerTor Egge <Tor.Egge@online.no>2024-05-07 11:01:33 +0200
commit548ee1a5950a972ec5ef6a3be3d18f68035322a5 (patch)
tree55aff147a6c7453a2d0bd991b4f69c5b19452cd8 /config-model/src/main
parentbd054f47f0b72deb604c77fb62a888695574eb31 (diff)
Enable setting max-token-length in field match.
Diffstat (limited to 'config-model/src/main')
-rw-r--r--config-model/src/main/java/com/yahoo/schema/document/Matching.java4
-rw-r--r--config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java1
-rw-r--r--config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java3
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java15
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java4
-rw-r--r--config-model/src/main/javacc/SchemaParser.jj14
6 files changed, 37 insertions, 4 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java
index 9d68553fa80..33256fa8586 100644
--- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java
+++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java
@@ -33,6 +33,8 @@ public class Matching implements Cloneable, Serializable {
private Integer maxLength;
/** Maximum number of occurrences for each term */
private Integer maxTermOccurrences;
+ /** Maximum number of characters in a token. */
+ private Integer maxTokenLength;
private String exactMatchTerminator = null;
@@ -61,6 +63,8 @@ public class Matching implements Cloneable, Serializable {
public Matching maxLength(int maxLength) { this.maxLength = maxLength; return this; }
public Integer maxTermOccurrences() { return maxTermOccurrences; }
public Matching maxTermOccurrences(int maxTermOccurrences) { this.maxTermOccurrences = maxTermOccurrences; return this; }
+ public Integer maxTokenLength() { return maxTokenLength; }
+ public Matching maxTokenLength(int maxTokenLength) { this.maxTokenLength = maxTokenLength; return this; }
public boolean isTypeUserSet() { return typeUserSet; }
public MatchAlgorithm getAlgorithm() { return algorithm; }
diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
index 7659a1e6562..173eebe2a94 100644
--- a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
+++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
@@ -44,6 +44,7 @@ public class ConvertParsedFields {
parsed.getGramSize().ifPresent(gramSize -> field.getMatching().setGramSize(gramSize));
parsed.getMaxLength().ifPresent(maxLength -> field.getMatching().maxLength(maxLength));
parsed.getMaxTermOccurrences().ifPresent(maxTermOccurrences -> field.getMatching().maxTermOccurrences(maxTermOccurrences));
+ parsed.getMaxTokenLength().ifPresent(maxTokenLength -> field.getMatching().maxTokenLength(maxTokenLength));
parsed.getMatchAlgorithm().ifPresent
(matchingAlgorithm -> field.setMatchingAlgorithm(matchingAlgorithm));
parsed.getExactTerminator().ifPresent
diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java
index c7d1a215ce3..bac2c894283 100644
--- a/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java
+++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java
@@ -23,6 +23,7 @@ public class ParsedMatchSettings {
private Integer gramSize = null;
private Integer maxLength = null;
private Integer maxTermOccurrences = null;
+ private Integer maxTokenLength = null;
Optional<MatchType> getMatchType() { return Optional.ofNullable(matchType); }
Optional<Case> getMatchCase() { return Optional.ofNullable(matchCase); }
@@ -31,6 +32,7 @@ public class ParsedMatchSettings {
Optional<Integer> getGramSize() { return Optional.ofNullable(gramSize); }
Optional<Integer> getMaxLength() { return Optional.ofNullable(maxLength); }
Optional<Integer> getMaxTermOccurrences() { return Optional.ofNullable(maxTermOccurrences); }
+ Optional<Integer> getMaxTokenLength() { return Optional.ofNullable(maxTokenLength); }
// TODO - consider allowing each set only once:
void setType(MatchType value) { this.matchType = value; }
@@ -40,5 +42,6 @@ public class ParsedMatchSettings {
void setGramSize(int value) { this.gramSize = value; }
void setMaxLength(int value) { this.maxLength = value; }
void setMaxTermOccurrences(int value) { this.maxTermOccurrences = value; }
+ void setMaxTokenLength(int value) { this.maxTokenLength = value; }
}
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java
index 056c37a9830..4313ceb4be1 100644
--- a/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java
+++ b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java
@@ -16,6 +16,7 @@ import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression;
import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression;
import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
+import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig;
import com.yahoo.vespa.model.container.search.QueryProfiles;
/**
@@ -75,7 +76,11 @@ public class ExactMatch extends Processor {
}
ScriptExpression script = field.getIndexingScript();
if (new ExpressionSearcher<>(IndexExpression.class).containedIn(script)) {
- field.setIndexingScript(schema.getName(), (ScriptExpression)new MyProvider(schema).convert(field.getIndexingScript()));
+ var maxTokenLength = field.getMatching().maxTokenLength();
+ if (maxTokenLength == null) {
+ maxTokenLength = AnnotatorConfig.getDefaultMaxTokenLength();
+ }
+ field.setIndexingScript(schema.getName(), (ScriptExpression)new MyProvider(schema, maxTokenLength).convert(field.getIndexingScript()));
}
}
@@ -85,8 +90,12 @@ public class ExactMatch extends Processor {
private static class MyProvider extends TypedTransformProvider {
- MyProvider(Schema schema) {
+ private int maxTokenLength;
+
+ MyProvider(Schema schema, int maxTokenLength)
+ {
super(ExactExpression.class, schema);
+ this.maxTokenLength = maxTokenLength;
}
@Override
@@ -96,7 +105,7 @@ public class ExactMatch extends Processor {
@Override
protected Expression newTransform(DataType fieldType) {
- Expression exp = new ExactExpression();
+ Expression exp = new ExactExpression(maxTokenLength);
if (fieldType instanceof CollectionDataType) {
exp = new ForEachExpression(exp);
}
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java
index 1ff019038fc..3f23cbc9b2d 100644
--- a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java
+++ b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java
@@ -70,6 +70,10 @@ public class TextMatch extends Processor {
if (maxTermOccurrences != null) {
ret.setMaxTermOccurrences(maxTermOccurrences);
}
+ var maxTokenLength = fieldMatching.maxTokenLength();
+ if (maxTokenLength != null) {
+ ret.setMaxTokenLength(maxTokenLength);
+ }
}
return ret;
}
diff --git a/config-model/src/main/javacc/SchemaParser.jj b/config-model/src/main/javacc/SchemaParser.jj
index b40f2d0796d..1365c133932 100644
--- a/config-model/src/main/javacc/SchemaParser.jj
+++ b/config-model/src/main/javacc/SchemaParser.jj
@@ -183,6 +183,7 @@ TOKEN :
| < GRAM_SIZE: "gram-size" >
| < MAX_LENGTH: "max-length" >
| < MAX_OCCURRENCES: "max-occurrences" >
+| < MAX_TOKEN_LENGTH: "max-token-length" >
| < PREFIX: "prefix" >
| < SUBSTRING: "substring" >
| < SUFFIX: "suffix" >
@@ -1368,7 +1369,8 @@ void matchType(ParsedMatchSettings matchInfo) : { }
*/
void matchItem(ParsedMatchSettings matchInfo) : { }
{
- ( matchType(matchInfo) | exactTerminator(matchInfo) | gramSize(matchInfo) | matchSize(matchInfo) | maxTermOccurrences(matchInfo))
+ ( matchType(matchInfo) | exactTerminator(matchInfo) | gramSize(matchInfo) | matchSize(matchInfo) |
+ maxTermOccurrences(matchInfo) | maxTokenLength(matchInfo) )
}
void exactTerminator(ParsedMatchSettings matchInfo) :
@@ -1413,6 +1415,16 @@ void maxTermOccurrences(ParsedMatchSettings matchInfo) :
}
}
+void maxTokenLength(ParsedMatchSettings matchInfo) :
+{
+ int maxTokenLength;
+}
+{
+ <MAX_TOKEN_LENGTH> <COLON> maxTokenLength = integer() {
+ matchInfo.setMaxTokenLength(maxTokenLength);
+ }
+}
+
/**
* Consumes a rank statement of a field element.
*