Revert "Bratseth/casing take 2"

author: Harald Musum <musum@vespa.ai> 2023-11-13 21:34:45 +0100
committer: GitHub <noreply@github.com> 2023-11-13 21:34:45 +0100
commit: ef5be496bc4857c5923f566251dd527873b248bf (patch)
tree: 657d51a4166d3f7cf40e04f0a5972f11d0261afd /indexinglanguage/src/test/java
parent: 944d635d00e165166508ef23399e9ed65a87a9c8 (diff)
2 files changed, 38 insertions, 55 deletions
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NGramTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NGramTestCase.java
index b4e266ab3eb..bcde8751de8 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NGramTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NGramTestCase.java
@@ -57,8 +57,8 @@ public class NGramTestCase {
         new NGramExpression(new SimpleLinguistics(), 3).execute(context);
 
         StringFieldValue value = (StringFieldValue)context.getValue();
-        assertEquals("Grams are pure annotations - field value is unchanged",
-                     "en gul Bille sang... ", value.getString());
+        assertEquals("Grams are pure annotations - field value is unchanged", "en gul Bille sang... ",
+                     value.getString());
         SpanTree gramTree = value.getSpanTree(SpanTrees.LINGUISTICS);
         assertNotNull(gramTree);
         SpanList grams = (SpanList)gramTree.getRoot();
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
index a4dbe1fe826..67bff3843ee 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
@@ -19,6 +19,7 @@ import org.junit.Test;
 import org.mockito.Mockito;
 
 import java.util.*;
+import java.util.stream.Collectors;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
@@ -28,6 +29,8 @@ import static org.junit.Assert.assertTrue;
  */
 public class LinguisticsAnnotatorTestCase {
 
+    private static final AnnotatorConfig CONFIG = new AnnotatorConfig();
+
     @Test
     public void requireThatAnnotateFailsWithZeroTokens() {
         assertAnnotations(null, "foo");
@@ -39,7 +42,7 @@ public class LinguisticsAnnotatorTestCase {
             if (type.isIndexable()) {
                 continue;
             }
-            assertAnnotations(null, "foo", token("foo", "bar", type));
+            assertAnnotations(null, "foo", newToken("foo", "bar", type));
         }
     }
 
@@ -51,27 +54,7 @@ public class LinguisticsAnnotatorTestCase {
             if (!type.isIndexable()) {
                 continue;
             }
-            assertAnnotations(expected, "foo", token("foo", "bar", type));
-        }
-    }
-
-    @Test
-    public void requireThatIndexableTokenStringsAreAnnotatedWithModeALL() {
-        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
-        expected.spanList().span(0, 5).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("tesla")));
-        var span2 = expected.spanList().span(0, 4);
-        span2.annotate(new Annotation(AnnotationTypes.TERM));
-        span2.annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("car")));
-        var span3 = expected.spanList().span(0, 8);
-        span3.annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("modelxes")));
-        span3.annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("modelx")));
-        span3.annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("mex")));
-        for (TokenType type : TokenType.values()) {
-            if (!type.isIndexable()) continue;
-            assertAnnotations(expected, "Tesla cars", new AnnotatorConfig().setStemMode("ALL"),
-                              token("Tesla", "tesla", type),
-                              token("cars", "car", type),
-                              SimpleToken.fromStems("ModelXes", List.of("modelxes", "modelx", "mex")));
+            assertAnnotations(expected, "foo", newToken("foo", "bar", type));
         }
     }
 
@@ -80,7 +63,7 @@ public class LinguisticsAnnotatorTestCase {
         SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
         expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
         for (TokenType type : TokenType.values()) {
-            assertAnnotations(expected, "foo", token("foo", "bar", type, true));
+            assertAnnotations(expected, "foo", newToken("foo", "bar", type, true));
         }
     }
 
@@ -93,21 +76,21 @@ public class LinguisticsAnnotatorTestCase {
                 if (!specialToken && !type.isIndexable()) {
                     continue;
                 }
-                assertAnnotations(expected, "foo", token("foo", "foo", type, specialToken));
+                assertAnnotations(expected, "foo", newToken("foo", "foo", type, specialToken));
             }
         }
     }
 
     @Test
-    public void requireThatTermAnnotationsPreserveCasing() {
+    public void requireThatTermAnnotationsAreLowerCased() {
         SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
-        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("BaR")));
+        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
         for (boolean specialToken : Arrays.asList(true, false)) {
             for (TokenType type : TokenType.values()) {
                 if (!specialToken && !type.isIndexable()) {
                     continue;
                 }
-                assertAnnotations(expected, "foo", token("foo", "BaR", type, specialToken));
+                assertAnnotations(expected, "foo", newToken("foo", "BAR", type, specialToken));
             }
         }
     }
@@ -119,11 +102,11 @@ public class LinguisticsAnnotatorTestCase {
         expected.spanList().span(3, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
         expected.spanList().span(6, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("baz")));
 
-        SimpleToken token = token("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC)
-                .addComponent(token("FOO", "foo", TokenType.ALPHABETIC).setOffset(0))
-                .addComponent(token("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3)
-                                                                             .addComponent(token("BAR", "bar", TokenType.ALPHABETIC).setOffset(3))
-                                                                             .addComponent(token("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6)));
+        SimpleToken token = newToken("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC)
+                .addComponent(newToken("FOO", "foo", TokenType.ALPHABETIC).setOffset(0))
+                .addComponent(newToken("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3)
+                                      .addComponent(newToken("BAR", "bar", TokenType.ALPHABETIC).setOffset(3))
+                                      .addComponent(newToken("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6)));
         assertAnnotations(expected, "foobarbaz", token);
     }
 
@@ -133,11 +116,11 @@ public class LinguisticsAnnotatorTestCase {
         expected.spanList().span(0, 9).annotate(new Annotation(AnnotationTypes.TERM,
                                                                new StringFieldValue("foobarbaz")));
 
-        SimpleToken token = token("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC).setSpecialToken(true)
-                                                                                 .addComponent(token("FOO", "foo", TokenType.ALPHABETIC).setOffset(0))
-                                                                                 .addComponent(token("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3)
-                                                                                                                                              .addComponent(token("BAR", "bar", TokenType.ALPHABETIC).setOffset(3))
-                                                                                                                                              .addComponent(token("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6)));
+        SimpleToken token = newToken("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC).setSpecialToken(true)
+                .addComponent(newToken("FOO", "foo", TokenType.ALPHABETIC).setOffset(0))
+                .addComponent(newToken("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3)
+                                      .addComponent(newToken("BAR", "bar", TokenType.ALPHABETIC).setOffset(3))
+                                      .addComponent(newToken("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6)));
         assertAnnotations(expected, "foobarbaz", token);
     }
 
@@ -157,8 +140,7 @@ public class LinguisticsAnnotatorTestCase {
                     continue;
                 }
                 assertAnnotations(expected, "foo",
-                                  new AnnotatorConfig(),
-                                  newLinguistics(List.of(token("foo", "foo", type, specialToken)),
+                                  newLinguistics(List.of(newToken("foo", "foo", type, specialToken)),
                                                  Collections.singletonMap("foo", "bar")));
             }
         }
@@ -172,9 +154,11 @@ public class LinguisticsAnnotatorTestCase {
         StringFieldValue val = new StringFieldValue("foo");
         val.setSpanTree(spanTree);
 
-        Linguistics linguistics = newLinguistics(List.of(token("foo", "bar", TokenType.ALPHABETIC, false)),
+        Linguistics linguistics = newLinguistics(List.of(newToken("foo", "bar", TokenType.ALPHABETIC, false)),
                                                  Collections.<String, String>emptyMap());
-        assertTrue(new LinguisticsAnnotator(linguistics, new AnnotatorConfig()).annotate(val));
+        new LinguisticsAnnotator(linguistics, CONFIG).annotate(val);
+
+        assertTrue(new LinguisticsAnnotator(linguistics, CONFIG).annotate(val));
         assertEquals(spanTree, val.getSpanTree(SpanTrees.LINGUISTICS));
     }
 
@@ -202,7 +186,7 @@ public class LinguisticsAnnotatorTestCase {
     }
 
     @Test
-    public void requireThatMaxTermOccurrencesIsHonored() {
+    public void requireThatMaxTermOccurencesIsHonored() {
         final String inputTerm = "foo";
         final String stemmedInputTerm = "bar"; // completely different from
                                                // inputTerm for safer test
@@ -220,7 +204,7 @@ public class LinguisticsAnnotatorTestCase {
             StringBuilder input = new StringBuilder();
             Token[] tokens = new Token[inputTermOccurence];
             for (int i = 0; i < inputTermOccurence; ++i) {
-                SimpleToken t = token(inputTerm, stemmedInputTerm, type);
+                SimpleToken t = newToken(inputTerm, stemmedInputTerm, type);
                 t.setOffset(i * paddedInputTerm.length());
                 tokens[i] = t;
                 input.append(paddedInputTerm);
@@ -230,29 +214,28 @@ public class LinguisticsAnnotatorTestCase {
     }
 
     // --------------------------------------------------------------------------------
+    //
     // Utilities
+    //
+    // --------------------------------------------------------------------------------
 
-    private static SimpleToken token(String orig, String stem, TokenType type) {
-        return token(orig, stem, type, false);
+    private static SimpleToken newToken(String orig, String stem, TokenType type) {
+        return newToken(orig, stem, type, false);
     }
 
-    private static SimpleToken token(String orig, String stem, TokenType type, boolean specialToken) {
+    private static SimpleToken newToken(String orig, String stem, TokenType type, boolean specialToken) {
         return new SimpleToken(orig).setTokenString(stem)
                                     .setType(type)
                                     .setSpecialToken(specialToken);
     }
 
     private static void assertAnnotations(SpanTree expected, String value, Token... tokens) {
-        assertAnnotations(expected, value, new AnnotatorConfig(), newLinguistics(Arrays.asList(tokens), Collections.emptyMap()));
-    }
-
-    private static void assertAnnotations(SpanTree expected, String value, AnnotatorConfig config, Token... tokens) {
-        assertAnnotations(expected, value, config, newLinguistics(Arrays.asList(tokens), Collections.emptyMap()));
+        assertAnnotations(expected, value, newLinguistics(Arrays.asList(tokens), Collections.<String, String>emptyMap()));
     }
 
-    private static void assertAnnotations(SpanTree expected, String str, AnnotatorConfig config, Linguistics linguistics) {
+    private static void assertAnnotations(SpanTree expected, String str, Linguistics linguistics) {
         StringFieldValue val = new StringFieldValue(str);
-        assertEquals(expected != null, new LinguisticsAnnotator(linguistics, config).annotate(val));
+        assertEquals(expected != null, new LinguisticsAnnotator(linguistics, CONFIG).annotate(val));
         assertEquals(expected, val.getSpanTree(SpanTrees.LINGUISTICS));
     }
author	Harald Musum <musum@vespa.ai>	2023-11-13 21:34:45 +0100
committer	GitHub <noreply@github.com>	2023-11-13 21:34:45 +0100
commit	ef5be496bc4857c5923f566251dd527873b248bf (patch)
tree	657d51a4166d3f7cf40e04f0a5972f11d0261afd /indexinglanguage/src/test/java
parent	944d635d00e165166508ef23399e9ed65a87a9c8 (diff)