aboutsummaryrefslogtreecommitdiffstats
path: root/indexinglanguage/src/test/java/com/yahoo/vespa
diff options
context:
space:
mode:
Diffstat (limited to 'indexinglanguage/src/test/java/com/yahoo/vespa')
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java8
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java9
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java11
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java5
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java2
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java2
6 files changed, 32 insertions, 5 deletions
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java
index f6995ac5a72..dd0ec255c35 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java
@@ -202,9 +202,9 @@ public class ScriptTestCase {
"my input", "[110.0, 122.0, 33.0, 106.0]");
assertThrows(() -> testEmbedStatement("input myText | embed | attribute 'myTensor'", embedders, "input text", "[105, 110, 112, 117]"),
- "Multiple embedders are provided but no embedder id is given. Valid embedders are emb1,emb2");
+ "Multiple embedders are provided but no embedder id is given. Valid embedders are emb1, emb2");
assertThrows(() -> testEmbedStatement("input myText | embed emb3 | attribute 'myTensor'", embedders, "input text", "[105, 110, 112, 117]"),
- "Can't find embedder 'emb3'. Valid embedders are emb1,emb2");
+ "Can't find embedder 'emb3'. Valid embedders are emb1, emb2");
}
private void testEmbedStatement(String expressionString, Map<String, Embedder> embedders, String input, String expected) {
@@ -562,12 +562,12 @@ public class ScriptTestCase {
}
- private void assertThrows(Runnable r, String msg) {
+ private void assertThrows(Runnable r, String expectedMessage) {
try {
r.run();
fail();
} catch (IllegalStateException e) {
- assertEquals(e.getMessage(), msg);
+ assertEquals(expectedMessage, e.getMessage());
}
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java
index 403d1820f70..b338c45f7a4 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java
@@ -63,6 +63,15 @@ public class ExactTestCase {
}
@Test
+ public void requireThatLongStringsAreNotAnnotated() {
+ ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
+ ctx.setValue(new StringFieldValue("foo"));
+ new ExactExpression(2).execute(ctx);
+
+ assertNull(((StringFieldValue)ctx.getValue()).getSpanTree(SpanTrees.LINGUISTICS));
+ }
+
+ @Test
public void requireThatEmptyStringsAreNotAnnotated() {
ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
ctx.setValue(new StringFieldValue(""));
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java
index 01ffbe359f3..7ed3ab410a3 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java
@@ -62,4 +62,15 @@ public class TokenizeTestCase {
assertTrue(val instanceof StringFieldValue);
assertNotNull(((StringFieldValue)val).getSpanTree(SpanTrees.LINGUISTICS));
}
+
+ @Test
+ public void requireThatLongWordIsDropped() {
+ ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
+ ctx.setValue(new StringFieldValue("foo"));
+ new TokenizeExpression(new SimpleLinguistics(), new AnnotatorConfig().setMaxTokenLength(2)).execute(ctx);
+
+ FieldValue val = ctx.getValue();
+ assertTrue(val instanceof StringFieldValue);
+ assertNull(((StringFieldValue)val).getSpanTree(SpanTrees.LINGUISTICS));
+ }
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java
index 0d34d2841fd..c3131e28906 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java
@@ -27,6 +27,8 @@ public class AnnotatorConfigTestCase {
assertTrue(config.getRemoveAccents());
config.setRemoveAccents(false);
assertFalse(config.getRemoveAccents());
+ config.setMaxTokenLength(10);
+ assertEquals(10, config.getMaxTokenLength());
}
@Test
@@ -35,11 +37,13 @@ public class AnnotatorConfigTestCase {
config.setLanguage(Language.ARABIC);
config.setStemMode(StemMode.SHORTEST);
config.setRemoveAccents(!config.getRemoveAccents());
+ config.setMaxTokenLength(11);
AnnotatorConfig other = new AnnotatorConfig(config);
assertEquals(config.getLanguage(), other.getLanguage());
assertEquals(config.getStemMode(), other.getStemMode());
assertEquals(config.getRemoveAccents(), other.getRemoveAccents());
+ assertEquals(config.getMaxTokenLength(), other.getMaxTokenLength());
}
@Test
@@ -49,6 +53,7 @@ public class AnnotatorConfigTestCase {
assertFalse(config.equals(newConfig(Language.SPANISH, StemMode.SHORTEST, false)));
assertFalse(config.equals(newConfig(Language.DUTCH, StemMode.SHORTEST, false)));
assertFalse(config.equals(newConfig(Language.DUTCH, StemMode.NONE, false)));
+ assertNotEquals(config, newConfig(Language.DUTCH, StemMode.NONE, true).setMaxTokenLength(10));
assertEquals(config, newConfig(Language.DUTCH, StemMode.NONE, true));
assertEquals(config.hashCode(), newConfig(Language.DUTCH, StemMode.NONE, true).hashCode());
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
index 136e71564d8..461c915acef 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
@@ -194,7 +194,7 @@ public class LinguisticsAnnotatorTestCase {
Linguistics linguistics = new SimpleLinguistics();
- LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, new AnnotatorConfig().setMaxTokenLength(12));
+ LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, new AnnotatorConfig().setMaxTokenizeLength(12));
assertTrue(annotator.annotate(shortValue));
assertEquals(spanTree, shortValue.getSpanTree(SpanTrees.LINGUISTICS));
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
index a7ed7ae3e72..1b7c6973f1e 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
@@ -27,6 +27,7 @@ public class ExpressionTestCase {
assertExpression(ClearStateExpression.class, "clear_state");
assertExpression(EchoExpression.class, "echo");
assertExpression(ExactExpression.class, "exact");
+ assertExpression(ExactExpression.class, "exact max-token-length: 10", Optional.of("exact max-token-length:10"));
assertExpression(FlattenExpression.class, "flatten");
assertExpression(ForEachExpression.class, "for_each { 1 }");
assertExpression(GetFieldExpression.class, "get_field field1");
@@ -73,6 +74,7 @@ public class ExpressionTestCase {
assertExpression(TokenizeExpression.class, "tokenize stem:\"ALL\"");
assertExpression(TokenizeExpression.class, "tokenize normalize");
assertExpression(TokenizeExpression.class, "tokenize max-occurrences: 15", Optional.of("tokenize max-occurrences:15"));
+ assertExpression(TokenizeExpression.class, "tokenize max-token-length: 15", Optional.of("tokenize max-token-length:15"));
assertExpression(ToLongExpression.class, "to_long");
assertExpression(ToPositionExpression.class, "to_pos");
assertExpression(ToStringExpression.class, "to_string");