aboutsummaryrefslogtreecommitdiffstats
path: root/indexinglanguage/src/test/java/com
diff options
context:
space:
mode:
Diffstat (limited to 'indexinglanguage/src/test/java/com')
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java9
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java11
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java5
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java2
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java2
5 files changed, 28 insertions, 1 deletions
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java
index 403d1820f70..b338c45f7a4 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ExactTestCase.java
@@ -63,6 +63,15 @@ public class ExactTestCase {
}
@Test
+ public void requireThatLongStringsAreNotAnnotated() {
+ ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
+ ctx.setValue(new StringFieldValue("foo"));
+ new ExactExpression(2).execute(ctx);
+
+ assertNull(((StringFieldValue)ctx.getValue()).getSpanTree(SpanTrees.LINGUISTICS));
+ }
+
+ @Test
public void requireThatEmptyStringsAreNotAnnotated() {
ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
ctx.setValue(new StringFieldValue(""));
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java
index 01ffbe359f3..7ed3ab410a3 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeTestCase.java
@@ -62,4 +62,15 @@ public class TokenizeTestCase {
assertTrue(val instanceof StringFieldValue);
assertNotNull(((StringFieldValue)val).getSpanTree(SpanTrees.LINGUISTICS));
}
+
+ @Test
+ public void requireThatLongWordIsDropped() {
+ ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
+ ctx.setValue(new StringFieldValue("foo"));
+ new TokenizeExpression(new SimpleLinguistics(), new AnnotatorConfig().setMaxTokenLength(2)).execute(ctx);
+
+ FieldValue val = ctx.getValue();
+ assertTrue(val instanceof StringFieldValue);
+ assertNull(((StringFieldValue)val).getSpanTree(SpanTrees.LINGUISTICS));
+ }
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java
index 0d34d2841fd..c3131e28906 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfigTestCase.java
@@ -27,6 +27,8 @@ public class AnnotatorConfigTestCase {
assertTrue(config.getRemoveAccents());
config.setRemoveAccents(false);
assertFalse(config.getRemoveAccents());
+ config.setMaxTokenLength(10);
+ assertEquals(10, config.getMaxTokenLength());
}
@Test
@@ -35,11 +37,13 @@ public class AnnotatorConfigTestCase {
config.setLanguage(Language.ARABIC);
config.setStemMode(StemMode.SHORTEST);
config.setRemoveAccents(!config.getRemoveAccents());
+ config.setMaxTokenLength(11);
AnnotatorConfig other = new AnnotatorConfig(config);
assertEquals(config.getLanguage(), other.getLanguage());
assertEquals(config.getStemMode(), other.getStemMode());
assertEquals(config.getRemoveAccents(), other.getRemoveAccents());
+ assertEquals(config.getMaxTokenLength(), other.getMaxTokenLength());
}
@Test
@@ -49,6 +53,7 @@ public class AnnotatorConfigTestCase {
assertFalse(config.equals(newConfig(Language.SPANISH, StemMode.SHORTEST, false)));
assertFalse(config.equals(newConfig(Language.DUTCH, StemMode.SHORTEST, false)));
assertFalse(config.equals(newConfig(Language.DUTCH, StemMode.NONE, false)));
+ assertNotEquals(config, newConfig(Language.DUTCH, StemMode.NONE, true).setMaxTokenLength(10));
assertEquals(config, newConfig(Language.DUTCH, StemMode.NONE, true));
assertEquals(config.hashCode(), newConfig(Language.DUTCH, StemMode.NONE, true).hashCode());
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
index 136e71564d8..461c915acef 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
@@ -194,7 +194,7 @@ public class LinguisticsAnnotatorTestCase {
Linguistics linguistics = new SimpleLinguistics();
- LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, new AnnotatorConfig().setMaxTokenLength(12));
+ LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, new AnnotatorConfig().setMaxTokenizeLength(12));
assertTrue(annotator.annotate(shortValue));
assertEquals(spanTree, shortValue.getSpanTree(SpanTrees.LINGUISTICS));
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
index a7ed7ae3e72..1b7c6973f1e 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
@@ -27,6 +27,7 @@ public class ExpressionTestCase {
assertExpression(ClearStateExpression.class, "clear_state");
assertExpression(EchoExpression.class, "echo");
assertExpression(ExactExpression.class, "exact");
+ assertExpression(ExactExpression.class, "exact max-token-length: 10", Optional.of("exact max-token-length:10"));
assertExpression(FlattenExpression.class, "flatten");
assertExpression(ForEachExpression.class, "for_each { 1 }");
assertExpression(GetFieldExpression.class, "get_field field1");
@@ -73,6 +74,7 @@ public class ExpressionTestCase {
assertExpression(TokenizeExpression.class, "tokenize stem:\"ALL\"");
assertExpression(TokenizeExpression.class, "tokenize normalize");
assertExpression(TokenizeExpression.class, "tokenize max-occurrences: 15", Optional.of("tokenize max-occurrences:15"));
+ assertExpression(TokenizeExpression.class, "tokenize max-token-length: 15", Optional.of("tokenize max-token-length:15"));
assertExpression(ToLongExpression.class, "to_long");
assertExpression(ToPositionExpression.class, "to_pos");
assertExpression(ToStringExpression.class, "to_string");