aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-model/src/main/java/com/yahoo/schema/document/Matching.java4
-rw-r--r--config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java1
-rw-r--r--config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java3
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java15
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java4
-rw-r--r--config-model/src/main/javacc/SchemaParser.jj14
-rw-r--r--config-model/src/test/java/com/yahoo/schema/parser/SchemaParserTestCase.java17
-rw-r--r--config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java19
-rw-r--r--container-search/src/test/java/com/yahoo/vespa/streamingvisitors/StreamingSearcherTestCase.java1
-rw-r--r--dependency-versions/pom.xml4
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExactExpression.java32
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java3
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java25
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java23
-rw-r--r--indexinglanguage/src/main/javacc/IndexingParser.jj12
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java9
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java11
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java5
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java2
-rw-r--r--vespa-athenz/src/main/java/com/yahoo/vespa/athenz/utils/SiaUtils.java6
-rw-r--r--vespa-athenz/src/test/java/com/yahoo/vespa/athenz/utils/SiaUtilsTest.java1
-rw-r--r--vespalog/src/logger/runserver.cpp35
22 files changed, 205 insertions, 41 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java
index 9d68553fa80..33256fa8586 100644
--- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java
+++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java
@@ -33,6 +33,8 @@ public class Matching implements Cloneable, Serializable {
private Integer maxLength;
/** Maximum number of occurrences for each term */
private Integer maxTermOccurrences;
+ /** Maximum number of characters in a token. */
+ private Integer maxTokenLength;
private String exactMatchTerminator = null;
@@ -61,6 +63,8 @@ public class Matching implements Cloneable, Serializable {
public Matching maxLength(int maxLength) { this.maxLength = maxLength; return this; }
public Integer maxTermOccurrences() { return maxTermOccurrences; }
public Matching maxTermOccurrences(int maxTermOccurrences) { this.maxTermOccurrences = maxTermOccurrences; return this; }
+ public Integer maxTokenLength() { return maxTokenLength; }
+ public Matching maxTokenLength(int maxTokenLength) { this.maxTokenLength = maxTokenLength; return this; }
public boolean isTypeUserSet() { return typeUserSet; }
public MatchAlgorithm getAlgorithm() { return algorithm; }
diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
index 7659a1e6562..173eebe2a94 100644
--- a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
+++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
@@ -44,6 +44,7 @@ public class ConvertParsedFields {
parsed.getGramSize().ifPresent(gramSize -> field.getMatching().setGramSize(gramSize));
parsed.getMaxLength().ifPresent(maxLength -> field.getMatching().maxLength(maxLength));
parsed.getMaxTermOccurrences().ifPresent(maxTermOccurrences -> field.getMatching().maxTermOccurrences(maxTermOccurrences));
+ parsed.getMaxTokenLength().ifPresent(maxTokenLength -> field.getMatching().maxTokenLength(maxTokenLength));
parsed.getMatchAlgorithm().ifPresent
(matchingAlgorithm -> field.setMatchingAlgorithm(matchingAlgorithm));
parsed.getExactTerminator().ifPresent
diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java
index c7d1a215ce3..bac2c894283 100644
--- a/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java
+++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java
@@ -23,6 +23,7 @@ public class ParsedMatchSettings {
private Integer gramSize = null;
private Integer maxLength = null;
private Integer maxTermOccurrences = null;
+ private Integer maxTokenLength = null;
Optional<MatchType> getMatchType() { return Optional.ofNullable(matchType); }
Optional<Case> getMatchCase() { return Optional.ofNullable(matchCase); }
@@ -31,6 +32,7 @@ public class ParsedMatchSettings {
Optional<Integer> getGramSize() { return Optional.ofNullable(gramSize); }
Optional<Integer> getMaxLength() { return Optional.ofNullable(maxLength); }
Optional<Integer> getMaxTermOccurrences() { return Optional.ofNullable(maxTermOccurrences); }
+ Optional<Integer> getMaxTokenLength() { return Optional.ofNullable(maxTokenLength); }
// TODO - consider allowing each set only once:
void setType(MatchType value) { this.matchType = value; }
@@ -40,5 +42,6 @@ public class ParsedMatchSettings {
void setGramSize(int value) { this.gramSize = value; }
void setMaxLength(int value) { this.maxLength = value; }
void setMaxTermOccurrences(int value) { this.maxTermOccurrences = value; }
+ void setMaxTokenLength(int value) { this.maxTokenLength = value; }
}
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java
index 056c37a9830..4313ceb4be1 100644
--- a/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java
+++ b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java
@@ -16,6 +16,7 @@ import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression;
import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression;
import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
+import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig;
import com.yahoo.vespa.model.container.search.QueryProfiles;
/**
@@ -75,7 +76,11 @@ public class ExactMatch extends Processor {
}
ScriptExpression script = field.getIndexingScript();
if (new ExpressionSearcher<>(IndexExpression.class).containedIn(script)) {
- field.setIndexingScript(schema.getName(), (ScriptExpression)new MyProvider(schema).convert(field.getIndexingScript()));
+ var maxTokenLength = field.getMatching().maxTokenLength();
+ if (maxTokenLength == null) {
+ maxTokenLength = AnnotatorConfig.getDefaultMaxTokenLength();
+ }
+ field.setIndexingScript(schema.getName(), (ScriptExpression)new MyProvider(schema, maxTokenLength).convert(field.getIndexingScript()));
}
}
@@ -85,8 +90,12 @@ public class ExactMatch extends Processor {
private static class MyProvider extends TypedTransformProvider {
- MyProvider(Schema schema) {
+ private int maxTokenLength;
+
+ MyProvider(Schema schema, int maxTokenLength)
+ {
super(ExactExpression.class, schema);
+ this.maxTokenLength = maxTokenLength;
}
@Override
@@ -96,7 +105,7 @@ public class ExactMatch extends Processor {
@Override
protected Expression newTransform(DataType fieldType) {
- Expression exp = new ExactExpression();
+ Expression exp = new ExactExpression(maxTokenLength);
if (fieldType instanceof CollectionDataType) {
exp = new ForEachExpression(exp);
}
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java
index 1ff019038fc..3f23cbc9b2d 100644
--- a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java
+++ b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java
@@ -70,6 +70,10 @@ public class TextMatch extends Processor {
if (maxTermOccurrences != null) {
ret.setMaxTermOccurrences(maxTermOccurrences);
}
+ var maxTokenLength = fieldMatching.maxTokenLength();
+ if (maxTokenLength != null) {
+ ret.setMaxTokenLength(maxTokenLength);
+ }
}
return ret;
}
diff --git a/config-model/src/main/javacc/SchemaParser.jj b/config-model/src/main/javacc/SchemaParser.jj
index b40f2d0796d..1365c133932 100644
--- a/config-model/src/main/javacc/SchemaParser.jj
+++ b/config-model/src/main/javacc/SchemaParser.jj
@@ -183,6 +183,7 @@ TOKEN :
| < GRAM_SIZE: "gram-size" >
| < MAX_LENGTH: "max-length" >
| < MAX_OCCURRENCES: "max-occurrences" >
+| < MAX_TOKEN_LENGTH: "max-token-length" >
| < PREFIX: "prefix" >
| < SUBSTRING: "substring" >
| < SUFFIX: "suffix" >
@@ -1368,7 +1369,8 @@ void matchType(ParsedMatchSettings matchInfo) : { }
*/
void matchItem(ParsedMatchSettings matchInfo) : { }
{
- ( matchType(matchInfo) | exactTerminator(matchInfo) | gramSize(matchInfo) | matchSize(matchInfo) | maxTermOccurrences(matchInfo))
+ ( matchType(matchInfo) | exactTerminator(matchInfo) | gramSize(matchInfo) | matchSize(matchInfo) |
+ maxTermOccurrences(matchInfo) | maxTokenLength(matchInfo) )
}
void exactTerminator(ParsedMatchSettings matchInfo) :
@@ -1413,6 +1415,16 @@ void maxTermOccurrences(ParsedMatchSettings matchInfo) :
}
}
+void maxTokenLength(ParsedMatchSettings matchInfo) :
+{
+ int maxTokenLength;
+}
+{
+ <MAX_TOKEN_LENGTH> <COLON> maxTokenLength = integer() {
+ matchInfo.setMaxTokenLength(maxTokenLength);
+ }
+}
+
/**
* Consumes a rank statement of a field element.
*
diff --git a/config-model/src/test/java/com/yahoo/schema/parser/SchemaParserTestCase.java b/config-model/src/test/java/com/yahoo/schema/parser/SchemaParserTestCase.java
index 34ca6c30a61..4186e352388 100644
--- a/config-model/src/test/java/com/yahoo/schema/parser/SchemaParserTestCase.java
+++ b/config-model/src/test/java/com/yahoo/schema/parser/SchemaParserTestCase.java
@@ -170,6 +170,23 @@ public class SchemaParserTestCase {
assertEquals(11, field.matchSettings().getMaxTermOccurrences().get());
}
+ @Test
+ void maxTokenLengthCanBeParsed() throws Exception {
+ String input = joinLines
+ ("schema foo {",
+ " document foo {",
+ " field bar type string {",
+ " indexing: summary | index",
+ " match { max-token-length: 11 }",
+ " }",
+ " }",
+ "}");
+ ParsedSchema schema = parseString(input);
+ var field = schema.getDocument().getFields().get(0);
+ assertEquals("bar", field.name());
+ assertEquals(11, field.matchSettings().getMaxTokenLength().get());
+ }
+
void checkFileParses(String fileName) throws Exception {
var schema = parseFile(fileName);
assertNotNull(schema);
diff --git a/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java b/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java
index de99d46b9ca..355a810f5ff 100644
--- a/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java
+++ b/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java
@@ -10,6 +10,7 @@ import com.yahoo.schema.Schema;
import com.yahoo.schema.ApplicationBuilder;
import com.yahoo.schema.AbstractSchemaTestCase;
import com.yahoo.schema.document.BooleanIndexDefinition;
+import com.yahoo.schema.document.MatchType;
import com.yahoo.schema.document.SDDocumentType;
import com.yahoo.schema.document.SDField;
import com.yahoo.vespa.documentmodel.SummaryField;
@@ -155,6 +156,24 @@ public class IndexingScriptRewriterTestCase extends AbstractSchemaTestCase {
field);
}
+ @Test
+ void requireThatMaxTokenLengthIsPropagated() {
+ var field = new SDField("test", DataType.STRING);
+ field.getMatching().maxTokenLength(10);
+ field.parseIndexingScript("test", "{ summary | index }");
+ assertIndexingScript("{ input test | tokenize normalize stem:\"BEST\" max-token-length:10 | summary test | index test; }",
+ field);
+ }
+
+ @Test
+ void requireThatMaxTokenLengthIsPropagatedForWordMatch() {
+ var field = new SDField("test", DataType.STRING);
+ field.getMatching().maxTokenLength(10).setType(MatchType.WORD);
+ field.parseIndexingScript("test", "{ summary | index }");
+ assertIndexingScript("{ input test | exact max-token-length:10 | summary test | index test; }",
+ field);
+ }
+
private static void assertIndexingScript(String expectedScript, SDField unprocessedField) {
assertEquals(expectedScript,
processField(unprocessedField).toString());
diff --git a/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/StreamingSearcherTestCase.java b/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/StreamingSearcherTestCase.java
index cd9ef708920..25b54267242 100644
--- a/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/StreamingSearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/StreamingSearcherTestCase.java
@@ -167,6 +167,7 @@ public class StreamingSearcherTestCase {
Query[] queries = new Query[4]; // Increase coverage
for (int i = 0; i<queries.length; i++) {
Query query = new Query(queryString);
+ query.setTimeout(1000);
if (i == 0) {
} else if (i == 1) {
query.getPresentation().setSummary("summary");
diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml
index f9041eb3ccc..13c01a42f06 100644
--- a/dependency-versions/pom.xml
+++ b/dependency-versions/pom.xml
@@ -68,7 +68,7 @@
<assertj.vespa.version>3.25.3</assertj.vespa.version>
<!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories -->
- <aws-sdk.vespa.version>1.12.716</aws-sdk.vespa.version>
+ <aws-sdk.vespa.version>1.12.717</aws-sdk.vespa.version>
<athenz.vespa.version>1.11.57</athenz.vespa.version>
<!-- Athenz END -->
@@ -117,7 +117,7 @@
<junit.vespa.version>5.10.2</junit.vespa.version>
<junit.platform.vespa.version>1.10.2</junit.platform.vespa.version>
<junit4.vespa.version>4.13.2</junit4.vespa.version>
- <kherud.llama.vespa.version>3.0.1</kherud.llama.vespa.version>
+ <kherud.llama.vespa.version>3.0.2</kherud.llama.vespa.version>
<luben.zstd.vespa.version>1.5.6-3</luben.zstd.vespa.version>
<lucene.vespa.version>9.10.0</lucene.vespa.version>
<maven-archiver.vespa.version>3.6.2</maven-archiver.vespa.version>
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExactExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExactExpression.java
index 855430f45fc..7481363b737 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExactExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExactExpression.java
@@ -12,6 +12,9 @@ import com.yahoo.document.annotation.SpanTrees;
import com.yahoo.document.datatypes.IntegerFieldValue;
import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.language.process.TokenType;
+import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig;
+
+import java.util.OptionalInt;
import static com.yahoo.language.LinguisticsCase.toLowerCase;
@@ -20,8 +23,19 @@ import static com.yahoo.language.LinguisticsCase.toLowerCase;
*/
public final class ExactExpression extends Expression {
- public ExactExpression() {
+ private int maxTokenLength;
+
+ private ExactExpression(OptionalInt maxTokenLength) {
super(DataType.STRING);
+ this.maxTokenLength = maxTokenLength.isPresent() ? maxTokenLength.getAsInt() : AnnotatorConfig.getDefaultMaxTokenLength();
+ }
+
+ public ExactExpression() {
+ this(OptionalInt.empty());;
+ }
+
+ public ExactExpression(int maxTokenLength) {
+ this(OptionalInt.of(maxTokenLength));
}
@Override
@@ -36,6 +50,12 @@ public final class ExactExpression extends Expression {
String next = toLowerCase(prev);
SpanTree tree = output.getSpanTree(SpanTrees.LINGUISTICS);
+ if (next.length() > maxTokenLength) {
+ if (tree != null) {
+ output.removeSpanTree(SpanTrees.LINGUISTICS);
+ }
+ return;
+ }
SpanList root;
if (tree == null) {
root = new SpanList();
@@ -64,8 +84,14 @@ public final class ExactExpression extends Expression {
}
@Override
- public String toString() {
- return "exact";
+ public String toString()
+ {
+ StringBuilder ret = new StringBuilder();
+ ret.append("exact");
+ if (maxTokenLength != AnnotatorConfig.getDefaultMaxTokenLength()) {
+ ret.append(" max-token-length:" + maxTokenLength);
+ }
+ return ret.toString();
}
@Override
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java
index 849bc075a64..a3c404e50c3 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java
@@ -69,6 +69,9 @@ public final class TokenizeExpression extends Expression {
if (config.hasNonDefaultMaxTokenizeLength()) {
ret.append(" max-length:" + config.getMaxTokenizeLength());
}
+ if (config.hasNonDefaultMaxTokenLength()) {
+ ret.append(" max-token-length:" + config.getMaxTokenLength());
+ }
if (config.hasNonDefaultMaxTermOccurrences()) {
ret.append(" max-occurrences:" + config.getMaxTermOccurrences());
}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
index 4e5ef0d90df..6522e284fc8 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
@@ -14,14 +14,17 @@ public class AnnotatorConfig implements Cloneable {
private StemMode stemMode;
private boolean removeAccents;
private int maxTermOccurrences;
+ private int maxTokenLength;
private int maxTokenizeLength;
public static final int DEFAULT_MAX_TERM_OCCURRENCES;
+ private static final int DEFAULT_MAX_TOKEN_LENGTH;
private static final int DEFAULT_MAX_TOKENIZE_LENGTH;
static {
IlscriptsConfig defaults = new IlscriptsConfig(new IlscriptsConfig.Builder());
DEFAULT_MAX_TERM_OCCURRENCES = defaults.maxtermoccurrences();
+ DEFAULT_MAX_TOKEN_LENGTH = defaults.maxtokenlength();
DEFAULT_MAX_TOKENIZE_LENGTH = defaults.fieldmatchmaxlength();
}
@@ -30,6 +33,7 @@ public class AnnotatorConfig implements Cloneable {
stemMode = StemMode.NONE;
removeAccents = false;
maxTermOccurrences = DEFAULT_MAX_TERM_OCCURRENCES;
+ maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
maxTokenizeLength = DEFAULT_MAX_TOKENIZE_LENGTH;
}
@@ -38,6 +42,7 @@ public class AnnotatorConfig implements Cloneable {
stemMode = rhs.stemMode;
removeAccents = rhs.removeAccents;
maxTermOccurrences = rhs.maxTermOccurrences;
+ maxTokenLength = rhs.maxTokenLength;
maxTokenizeLength = rhs.maxTokenizeLength;
}
@@ -82,6 +87,17 @@ public class AnnotatorConfig implements Cloneable {
return this;
}
+ public AnnotatorConfig setMaxTokenLength(int maxTokenLength) {
+ this.maxTokenLength = maxTokenLength;
+ return this;
+ }
+
+ public int getMaxTokenLength() {
+ return maxTokenLength;
+ }
+
+ public static int getDefaultMaxTokenLength() { return DEFAULT_MAX_TOKEN_LENGTH; }
+
public AnnotatorConfig setMaxTokenizeLength(int maxTokenizeLength) {
this.maxTokenizeLength = maxTokenizeLength;
return this;
@@ -91,6 +107,10 @@ public class AnnotatorConfig implements Cloneable {
return maxTokenizeLength;
}
+ public boolean hasNonDefaultMaxTokenLength() {
+ return maxTokenLength != DEFAULT_MAX_TOKEN_LENGTH;
+ }
+
public boolean hasNonDefaultMaxTokenizeLength() {
return maxTokenizeLength != DEFAULT_MAX_TOKENIZE_LENGTH;
}
@@ -116,6 +136,9 @@ public class AnnotatorConfig implements Cloneable {
if (maxTermOccurrences != rhs.maxTermOccurrences) {
return false;
}
+ if (maxTokenLength != rhs.maxTokenLength) {
+ return false;
+ }
if (maxTokenizeLength != rhs.maxTokenizeLength) {
return false;
}
@@ -125,7 +148,7 @@ public class AnnotatorConfig implements Cloneable {
@Override
public int hashCode() {
return getClass().hashCode() + language.hashCode() + stemMode.hashCode() +
- Boolean.valueOf(removeAccents).hashCode() + maxTermOccurrences + maxTokenizeLength;
+ Boolean.valueOf(removeAccents).hashCode() + maxTermOccurrences + maxTokenLength + maxTokenizeLength;
}
}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
index 86d4e91a567..913b874c6f6 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
@@ -78,7 +78,8 @@ public class LinguisticsAnnotator {
TermOccurrences termOccurrences = new TermOccurrences(config.getMaxTermOccurrences());
SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS);
for (Token token : tokens)
- addAnnotationSpan(text.getString(), tree.spanList(), token, config.getStemMode(), termOccurrences);
+ addAnnotationSpan(text.getString(), tree.spanList(), token, config.getStemMode(), termOccurrences,
+ config.getMaxTokenLength());
if (tree.numAnnotations() == 0) return false;
text.setSpanTree(tree);
@@ -100,17 +101,22 @@ public class LinguisticsAnnotator {
return new Annotation(AnnotationTypes.TERM, new StringFieldValue(term));
}
- private static void addAnnotation(Span here, String term, String orig, TermOccurrences termOccurrences) {
+ private static void addAnnotation(Span here, String term, String orig, TermOccurrences termOccurrences,
+ int maxTokenLength) {
+ if (term.length() > maxTokenLength) {
+ return;
+ }
if (termOccurrences.termCountBelowLimit(term)) {
here.annotate(termAnnotation(term, orig));
}
}
- private static void addAnnotationSpan(String input, SpanList parent, Token token, StemMode mode, TermOccurrences termOccurrences) {
+ private static void addAnnotationSpan(String input, SpanList parent, Token token, StemMode mode,
+ TermOccurrences termOccurrences, int maxTokenLength) {
if ( ! token.isSpecialToken()) {
if (token.getNumComponents() > 0) {
for (int i = 0; i < token.getNumComponents(); ++i) {
- addAnnotationSpan(input, parent, token.getComponent(i), mode, termOccurrences);
+ addAnnotationSpan(input, parent, token.getComponent(i), mode, termOccurrences, maxTokenLength);
}
return;
}
@@ -130,18 +136,21 @@ public class LinguisticsAnnotator {
String lowercasedOrig = toLowerCase(token.getOrig());
String term = token.getTokenString();
if (term != null) {
- addAnnotation(where, term, token.getOrig(), termOccurrences);
+ addAnnotation(where, term, token.getOrig(), termOccurrences, maxTokenLength);
if ( ! term.equals(lowercasedOrig))
- addAnnotation(where, lowercasedOrig, token.getOrig(), termOccurrences);
+ addAnnotation(where, lowercasedOrig, token.getOrig(), termOccurrences, maxTokenLength);
}
for (int i = 0; i < token.getNumStems(); i++) {
String stem = token.getStem(i);
if (! (stem.equals(lowercasedOrig) || stem.equals(term)))
- addAnnotation(where, stem, token.getOrig(), termOccurrences);
+ addAnnotation(where, stem, token.getOrig(), termOccurrences, maxTokenLength);
}
} else {
String term = token.getTokenString();
if (term == null || term.trim().isEmpty()) return;
+ if (term.length() > maxTokenLength) {
+ return;
+ }
if (termOccurrences.termCountBelowLimit(term)) {
parent.span((int)token.getOffset(), token.getOrig().length()).annotate(termAnnotation(term, token.getOrig()));
}
diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj
index 77591d3e54e..29ca5270db8 100644
--- a/indexinglanguage/src/main/javacc/IndexingParser.jj
+++ b/indexinglanguage/src/main/javacc/IndexingParser.jj
@@ -174,6 +174,7 @@ TOKEN :
<LOWER_CASE: "lowercase"> |
<MAX_LENGTH: "max-length"> |
<MAX_OCCURRENCES: "max-occurrences"> |
+ <MAX_TOKEN_LENGTH: "max-token-length"> |
<NGRAM: "ngram"> |
<NORMALIZE: "normalize"> |
<NOW: "now"> |
@@ -407,10 +408,13 @@ Expression embedExp() :
{ return new EmbedExpression(embedders, embedderId, embedderArguments); }
}
-Expression exactExp() : { }
+Expression exactExp() :
{
- ( <EXACT> )
- { return new ExactExpression(); }
+ int maxTokenLength = annotatorCfg.getMaxTokenLength();
+}
+{
+ ( <EXACT> [ <MAX_TOKEN_LENGTH> <COLON> maxTokenLength = integer() ] )
+ { return new ExactExpression(maxTokenLength); }
}
Expression flattenExp() : { }
@@ -686,11 +690,13 @@ AnnotatorConfig tokenizeCfg() :
String str = "SHORTEST";
Integer maxLength;
Integer maxTermOccurrences;
+ Integer maxTokenLength;
}
{
( <STEM> ( <COLON> str = string() ) ? { val.setStemMode(str); } |
<MAX_LENGTH> <COLON> maxLength = integer() { val.setMaxTokenizeLength(maxLength); } |
<MAX_OCCURRENCES> <COLON> maxTermOccurrences = integer() { val.setMaxTermOccurrences(maxTermOccurrences); } |
+ <MAX_TOKEN_LENGTH> <COLON> maxTokenLength = integer() { val.setMaxTokenLength(maxTokenLength); } |
<NORMALIZE> { val.setRemoveAccents(true); } )+
{ return val; }
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java
index 403d1820f70..b338c45f7a4 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java
@@ -63,6 +63,15 @@ public class ExactTestCase {
}
@Test
+ public void requireThatLongStringsAreNotAnnotated() {
+ ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
+ ctx.setValue(new StringFieldValue("foo"));
+ new ExactExpression(2).execute(ctx);
+
+ assertNull(((StringFieldValue)ctx.getValue()).getSpanTree(SpanTrees.LINGUISTICS));
+ }
+
+ @Test
public void requireThatEmptyStringsAreNotAnnotated() {
ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
ctx.setValue(new StringFieldValue(""));
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java
index 01ffbe359f3..7ed3ab410a3 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java
@@ -62,4 +62,15 @@ public class TokenizeTestCase {
assertTrue(val instanceof StringFieldValue);
assertNotNull(((StringFieldValue)val).getSpanTree(SpanTrees.LINGUISTICS));
}
+
+ @Test
+ public void requireThatLongWordIsDropped() {
+ ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
+ ctx.setValue(new StringFieldValue("foo"));
+ new TokenizeExpression(new SimpleLinguistics(), new AnnotatorConfig().setMaxTokenLength(2)).execute(ctx);
+
+ FieldValue val = ctx.getValue();
+ assertTrue(val instanceof StringFieldValue);
+ assertNull(((StringFieldValue)val).getSpanTree(SpanTrees.LINGUISTICS));
+ }
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java
index 0d34d2841fd..c3131e28906 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java
@@ -27,6 +27,8 @@ public class AnnotatorConfigTestCase {
assertTrue(config.getRemoveAccents());
config.setRemoveAccents(false);
assertFalse(config.getRemoveAccents());
+ config.setMaxTokenLength(10);
+ assertEquals(10, config.getMaxTokenLength());
}
@Test
@@ -35,11 +37,13 @@ public class AnnotatorConfigTestCase {
config.setLanguage(Language.ARABIC);
config.setStemMode(StemMode.SHORTEST);
config.setRemoveAccents(!config.getRemoveAccents());
+ config.setMaxTokenLength(11);
AnnotatorConfig other = new AnnotatorConfig(config);
assertEquals(config.getLanguage(), other.getLanguage());
assertEquals(config.getStemMode(), other.getStemMode());
assertEquals(config.getRemoveAccents(), other.getRemoveAccents());
+ assertEquals(config.getMaxTokenLength(), other.getMaxTokenLength());
}
@Test
@@ -49,6 +53,7 @@ public class AnnotatorConfigTestCase {
assertFalse(config.equals(newConfig(Language.SPANISH, StemMode.SHORTEST, false)));
assertFalse(config.equals(newConfig(Language.DUTCH, StemMode.SHORTEST, false)));
assertFalse(config.equals(newConfig(Language.DUTCH, StemMode.NONE, false)));
+ assertNotEquals(config, newConfig(Language.DUTCH, StemMode.NONE, true).setMaxTokenLength(10));
assertEquals(config, newConfig(Language.DUTCH, StemMode.NONE, true));
assertEquals(config.hashCode(), newConfig(Language.DUTCH, StemMode.NONE, true).hashCode());
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
index a7ed7ae3e72..1b7c6973f1e 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
@@ -27,6 +27,7 @@ public class ExpressionTestCase {
assertExpression(ClearStateExpression.class, "clear_state");
assertExpression(EchoExpression.class, "echo");
assertExpression(ExactExpression.class, "exact");
+ assertExpression(ExactExpression.class, "exact max-token-length: 10", Optional.of("exact max-token-length:10"));
assertExpression(FlattenExpression.class, "flatten");
assertExpression(ForEachExpression.class, "for_each { 1 }");
assertExpression(GetFieldExpression.class, "get_field field1");
@@ -73,6 +74,7 @@ public class ExpressionTestCase {
assertExpression(TokenizeExpression.class, "tokenize stem:\"ALL\"");
assertExpression(TokenizeExpression.class, "tokenize normalize");
assertExpression(TokenizeExpression.class, "tokenize max-occurrences: 15", Optional.of("tokenize max-occurrences:15"));
+ assertExpression(TokenizeExpression.class, "tokenize max-token-length: 15", Optional.of("tokenize max-token-length:15"));
assertExpression(ToLongExpression.class, "to_long");
assertExpression(ToPositionExpression.class, "to_pos");
assertExpression(ToStringExpression.class, "to_string");
diff --git a/vespa-athenz/src/main/java/com/yahoo/vespa/athenz/utils/SiaUtils.java b/vespa-athenz/src/main/java/com/yahoo/vespa/athenz/utils/SiaUtils.java
index af0da93edc3..56e64b2261d 100644
--- a/vespa-athenz/src/main/java/com/yahoo/vespa/athenz/utils/SiaUtils.java
+++ b/vespa-athenz/src/main/java/com/yahoo/vespa/athenz/utils/SiaUtils.java
@@ -1,10 +1,10 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.athenz.utils;
-import com.yahoo.vespa.athenz.api.AthenzIdentity;
-import com.yahoo.vespa.athenz.api.AthenzService;
import com.yahoo.security.KeyUtils;
import com.yahoo.security.X509CertificateUtils;
+import com.yahoo.vespa.athenz.api.AthenzIdentity;
+import com.yahoo.vespa.athenz.api.AthenzService;
import java.io.IOException;
import java.io.UncheckedIOException;
@@ -132,7 +132,7 @@ public class SiaUtils {
try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(keysDirectory)) {
return StreamSupport.stream(directoryStream.spliterator(), false)
.map(path -> path.getFileName().toString())
- .filter(fileName -> fileName.endsWith(keyFileSuffix))
+ .filter(fileName -> fileName.endsWith(keyFileSuffix) && ! fileName.contains(":role."))
.map(fileName -> fileName.substring(0, fileName.length() - keyFileSuffix.length()))
.map(AthenzService::new)
.collect(toList());
diff --git a/vespa-athenz/src/test/java/com/yahoo/vespa/athenz/utils/SiaUtilsTest.java b/vespa-athenz/src/test/java/com/yahoo/vespa/athenz/utils/SiaUtilsTest.java
index 9ff59236c0c..8274fe7f7a6 100644
--- a/vespa-athenz/src/test/java/com/yahoo/vespa/athenz/utils/SiaUtilsTest.java
+++ b/vespa-athenz/src/test/java/com/yahoo/vespa/athenz/utils/SiaUtilsTest.java
@@ -32,6 +32,7 @@ public class SiaUtilsTest {
Files.createFile(SiaUtils.getPrivateKeyFile(siaRoot, fooService));
AthenzService barService = new AthenzService("my.domain.bar");
Files.createFile(SiaUtils.getPrivateKeyFile(siaRoot, barService));
+ Files.createFile(siaRoot.resolve("keys/my.domain.foo:role.my-role.key.pem"));
List<AthenzIdentity> siaIdentities = SiaUtils.findSiaServices(siaRoot);
assertEquals(2, siaIdentities.size());
diff --git a/vespalog/src/logger/runserver.cpp b/vespalog/src/logger/runserver.cpp
index 9a0a499cd54..4e0141f88dc 100644
--- a/vespalog/src/logger/runserver.cpp
+++ b/vespalog/src/logger/runserver.cpp
@@ -6,7 +6,7 @@
#include <cerrno>
#include <unistd.h>
#include <csignal>
-
+#include <poll.h>
#include <sys/select.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -18,6 +18,7 @@
#include "llreader.h"
#include <vespa/log/log.h>
#include <chrono>
+#include <array>
LOG_SETUP("runserver");
@@ -179,8 +180,6 @@ int loop(const char *svc, char * const * run)
pstdout[0], pstdout[1],
pstderr[0], pstderr[1]);
- int high = 1 + pstdout[0] + pstderr[0];
-
pid_t child = fork();
if (child == 0) {
@@ -237,24 +236,24 @@ int loop(const char *svc, char * const * run)
bool outeof = false;
bool erreof = false;
-
+ constexpr int stdout_idx = 0, stderr_idx = 1;
+ std::array<pollfd, 2> fds{};
int wstat = 0;
while (child || !outeof || !erreof) {
- struct timeval timeout;
-
- timeout.tv_sec = 0;
- timeout.tv_usec = 100000; // == 100 ms == 1/10 s
-
- fd_set pipes;
-
- FD_ZERO(&pipes);
- if (!outeof) FD_SET(pstdout[0], &pipes);
- if (!erreof) FD_SET(pstderr[0], &pipes);
-
- int n = select(high, &pipes, NULL, NULL, &timeout);
+ // Entries with negative fds are entirely ignored by the kernel.
+ fds[stdout_idx].fd = !outeof ? pstdout[0] : -1;
+ fds[stdout_idx].events = POLLIN;
+ fds[stdout_idx].revents = 0;
+ fds[stderr_idx].fd = !erreof ? pstderr[0] : -2;
+ fds[stderr_idx].events = POLLIN;
+ fds[stderr_idx].revents = 0;
+
+ constexpr int poll_timeout_ms = 100;
+ int n = poll(fds.data(), fds.size(), poll_timeout_ms);
if (n > 0) {
- if (FD_ISSET(pstdout[0], &pipes)) {
+ constexpr short ev_mask = POLLIN | POLLERR | POLLHUP;
+ if ((fds[stdout_idx].revents & ev_mask) != 0) {
LOG(debug, "out reader has input");
if (outReader.blockRead()) {
while (outReader.hasInput()) {
@@ -267,7 +266,7 @@ int loop(const char *svc, char * const * run)
close(pstdout[0]);
}
}
- if (FD_ISSET(pstderr[0], &pipes)) {
+ if ((fds[stderr_idx].revents & ev_mask) != 0) {
LOG(debug, "err reader has input");
if (errReader.blockRead()) {
while (errReader.hasInput()) {