From a2fa15b1ab6d6a9930381e981694ce5b39a1160c Mon Sep 17 00:00:00 2001
From: Jon Bratseth <bratseth@gmail.com>
Date: Sun, 11 Sep 2022 22:19:47 +0200
Subject: No functional changes

---
 .../linguistics/LinguisticsAnnotator.java          | 27 ++++++++++------------
 .../com/yahoo/language/simple/SimpleToken.java     | 20 ++++++++--------
 .../yahoo/language/simple/SimpleTokenTestCase.java |  4 ++--
 3 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
index 5986ab44426..173df65a47e 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
@@ -63,7 +63,7 @@ public class LinguisticsAnnotator {
      * Annotates the given string with the appropriate linguistics annotations.
      *
      * @param text the text to annotate
-     * @return whether or not anything was annotated
+     * @return whether anything was annotated
      */
     public boolean annotate(StringFieldValue text) {
         if (text.getSpanTree(SpanTrees.LINGUISTICS) != null) return true;  // Already annotated with LINGUISTICS.
@@ -116,21 +116,18 @@ public class LinguisticsAnnotator {
             }
             if ( ! token.isIndexable()) return;
         }
-        String orig = token.getOrig();
-        int pos = (int)token.getOffset();
-        if (pos >= input.length()) {
-            throw new IllegalArgumentException("Token '" + orig + "' has offset " + pos + ", which is outside the " +
-                                               "bounds of the input string; " + input);
+        if (token.getOffset() >= input.length()) {
+            throw new IllegalArgumentException(token + " has offset " + token.getOffset() + ", which is outside the " +
+                                               "bounds of the input string '" + input + "'");
         }
-        int len = orig.length();
-        if (pos + len > input.length()) {
-            throw new IllegalArgumentException("Token '" + orig + "' has offset " + pos + ", which makes it overflow " +
+        if (token.getOffset() + token.getOrig().length() > input.length()) {
+            throw new IllegalArgumentException(token + " has offset " + token.getOffset() + ", which makes it overflow " +
                                                "the bounds of the input string; " + input);
         }
         if (mode == StemMode.ALL) {
-            Span where = parent.span(pos, len);
-            String lowercasedOrig = toLowerCase(orig);
-            addAnnotation(where, orig, orig, termOccurrences);
+            Span where = parent.span((int)token.getOffset(), token.getOrig().length());
+            String lowercasedOrig = toLowerCase(token.getOrig());
+            addAnnotation(where, token.getOrig(), token.getOrig(), termOccurrences);
 
             String lowercasedTerm = lowercasedOrig;
             String term = token.getTokenString();
@@ -138,20 +135,20 @@ public class LinguisticsAnnotator {
                 lowercasedTerm = toLowerCase(term);
             }
             if (! lowercasedOrig.equals(lowercasedTerm)) {
-                addAnnotation(where, term, orig, termOccurrences);
+                addAnnotation(where, term, token.getOrig(), termOccurrences);
             }
             for (int i = 0; i < token.getNumStems(); i++) {
                 String stem = token.getStem(i);
                 String lowercasedStem = toLowerCase(stem);
                 if (! (lowercasedOrig.equals(lowercasedStem) || lowercasedTerm.equals(lowercasedStem))) {
-                    addAnnotation(where, stem, orig, termOccurrences);
+                    addAnnotation(where, stem, token.getOrig(), termOccurrences);
                 }
             }
         } else {
             String term = token.getTokenString();
             if (term == null || term.trim().isEmpty()) return;
             if (termOccurrences.termCountBelowLimit(term))  {
-                parent.span(pos, len).annotate(lowerCaseTermAnnotation(term, token.getOrig()));
+                parent.span((int)token.getOffset(), token.getOrig().length()).annotate(lowerCaseTermAnnotation(term, token.getOrig()));
             }
         }
     }
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java
index b6ca219afc8..7ed9e1a2f03 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java
@@ -7,6 +7,7 @@ import com.yahoo.language.process.TokenType;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * @author Mathias Mølster Lidal
@@ -115,14 +116,14 @@ public class SimpleToken implements Token {
         if (!(o instanceof Token other)) return false;
 
         if (getType() != other.getType()) return false;
-        if (!equalsOpt(getOrig(), other.getOrig())) return false;
+        if (!Objects.equals(getOrig(), other.getOrig())) return false;
         if (getOffset() != other.getOffset()) return false;
-        if (!equalsOpt(getScript(), other.getScript())) return false;
-        if (!equalsOpt(getTokenString(), other.getTokenString())) return false;
+        if (!Objects.equals(getScript(), other.getScript())) return false;
+        if (!Objects.equals(getTokenString(), other.getTokenString())) return false;
         if (isSpecialToken() != other.isSpecialToken()) return false;
         if (getNumComponents() != other.getNumComponents()) return false;
         for (int i = 0, len = getNumComponents(); i < len; ++i) {
-            if (!equalsOpt(getComponent(i), other.getComponent(i)))
+            if (!Objects.equals(getComponent(i), other.getComponent(i)))
                 return false;
         }
         return true;
@@ -133,15 +134,12 @@ public class SimpleToken implements Token {
         return orig.hashCode();
     }
 
-    private static boolean equalsOpt(Object lhs, Object rhs) {
-        if (lhs == null || rhs == null) {
-            return lhs == rhs;
-        }
-        return lhs.equals(rhs);
-    }
-
     @Override
     public String toString() {
+        return "token '" + orig + "'";
+    }
+
+    public String toDetailString() {
         return "token : " + getClass().getSimpleName() + " {\n" + toString(this, "    ") + "}";
     }
 
diff --git a/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTestCase.java b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTestCase.java
index c699f9d314b..67d787d8587 100644
--- a/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTestCase.java
+++ b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTestCase.java
@@ -138,7 +138,7 @@ public class SimpleTokenTestCase {
     }
 
     @Test
-    public void requireThatToStringIsExpressive() {
+    public void testDetailString() {
         SimpleToken token = new SimpleToken("my_orig");
         token.addComponent(new SimpleToken("my_component_1"));
         token.addComponent(new SimpleToken("my_component_2"));
@@ -177,7 +177,7 @@ public class SimpleTokenTestCase {
                           "    token string : 'my_token_string'\n" +
                           "    type : ALPHABETIC\n" +
                           "}";
-        assertEquals(expected, token.toString());
+        assertEquals(expected, token.toDetailString());
     }
 
     @Test
-- 
cgit v1.2.3