summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java27
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java20
-rw-r--r--linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTestCase.java4
3 files changed, 23 insertions, 28 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
index 5986ab44426..173df65a47e 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
@@ -63,7 +63,7 @@ public class LinguisticsAnnotator {
* Annotates the given string with the appropriate linguistics annotations.
*
* @param text the text to annotate
- * @return whether or not anything was annotated
+ * @return whether anything was annotated
*/
public boolean annotate(StringFieldValue text) {
if (text.getSpanTree(SpanTrees.LINGUISTICS) != null) return true; // Already annotated with LINGUISTICS.
@@ -116,21 +116,18 @@ public class LinguisticsAnnotator {
}
if ( ! token.isIndexable()) return;
}
- String orig = token.getOrig();
- int pos = (int)token.getOffset();
- if (pos >= input.length()) {
- throw new IllegalArgumentException("Token '" + orig + "' has offset " + pos + ", which is outside the " +
- "bounds of the input string; " + input);
+ if (token.getOffset() >= input.length()) {
+ throw new IllegalArgumentException(token + " has offset " + token.getOffset() + ", which is outside the " +
+ "bounds of the input string '" + input + "'");
}
- int len = orig.length();
- if (pos + len > input.length()) {
- throw new IllegalArgumentException("Token '" + orig + "' has offset " + pos + ", which makes it overflow " +
+ if (token.getOffset() + token.getOrig().length() > input.length()) {
+ throw new IllegalArgumentException(token + " has offset " + token.getOffset() + ", which makes it overflow " +
"the bounds of the input string; " + input);
}
if (mode == StemMode.ALL) {
- Span where = parent.span(pos, len);
- String lowercasedOrig = toLowerCase(orig);
- addAnnotation(where, orig, orig, termOccurrences);
+ Span where = parent.span((int)token.getOffset(), token.getOrig().length());
+ String lowercasedOrig = toLowerCase(token.getOrig());
+ addAnnotation(where, token.getOrig(), token.getOrig(), termOccurrences);
String lowercasedTerm = lowercasedOrig;
String term = token.getTokenString();
@@ -138,20 +135,20 @@ public class LinguisticsAnnotator {
lowercasedTerm = toLowerCase(term);
}
if (! lowercasedOrig.equals(lowercasedTerm)) {
- addAnnotation(where, term, orig, termOccurrences);
+ addAnnotation(where, term, token.getOrig(), termOccurrences);
}
for (int i = 0; i < token.getNumStems(); i++) {
String stem = token.getStem(i);
String lowercasedStem = toLowerCase(stem);
if (! (lowercasedOrig.equals(lowercasedStem) || lowercasedTerm.equals(lowercasedStem))) {
- addAnnotation(where, stem, orig, termOccurrences);
+ addAnnotation(where, stem, token.getOrig(), termOccurrences);
}
}
} else {
String term = token.getTokenString();
if (term == null || term.trim().isEmpty()) return;
if (termOccurrences.termCountBelowLimit(term)) {
- parent.span(pos, len).annotate(lowerCaseTermAnnotation(term, token.getOrig()));
+ parent.span((int)token.getOffset(), token.getOrig().length()).annotate(lowerCaseTermAnnotation(term, token.getOrig()));
}
}
}
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java
index b6ca219afc8..7ed9e1a2f03 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java
@@ -7,6 +7,7 @@ import com.yahoo.language.process.TokenType;
import java.util.ArrayList;
import java.util.List;
+import java.util.Objects;
/**
* @author Mathias Mølster Lidal
@@ -115,14 +116,14 @@ public class SimpleToken implements Token {
if (!(o instanceof Token other)) return false;
if (getType() != other.getType()) return false;
- if (!equalsOpt(getOrig(), other.getOrig())) return false;
+ if (!Objects.equals(getOrig(), other.getOrig())) return false;
if (getOffset() != other.getOffset()) return false;
- if (!equalsOpt(getScript(), other.getScript())) return false;
- if (!equalsOpt(getTokenString(), other.getTokenString())) return false;
+ if (!Objects.equals(getScript(), other.getScript())) return false;
+ if (!Objects.equals(getTokenString(), other.getTokenString())) return false;
if (isSpecialToken() != other.isSpecialToken()) return false;
if (getNumComponents() != other.getNumComponents()) return false;
for (int i = 0, len = getNumComponents(); i < len; ++i) {
- if (!equalsOpt(getComponent(i), other.getComponent(i)))
+ if (!Objects.equals(getComponent(i), other.getComponent(i)))
return false;
}
return true;
@@ -133,15 +134,12 @@ public class SimpleToken implements Token {
return orig.hashCode();
}
- private static boolean equalsOpt(Object lhs, Object rhs) {
- if (lhs == null || rhs == null) {
- return lhs == rhs;
- }
- return lhs.equals(rhs);
- }
-
@Override
public String toString() {
+ return "token '" + orig + "'";
+ }
+
+ public String toDetailString() {
return "token : " + getClass().getSimpleName() + " {\n" + toString(this, " ") + "}";
}
diff --git a/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTestCase.java b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTestCase.java
index c699f9d314b..67d787d8587 100644
--- a/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTestCase.java
+++ b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTestCase.java
@@ -138,7 +138,7 @@ public class SimpleTokenTestCase {
}
@Test
- public void requireThatToStringIsExpressive() {
+ public void testDetailString() {
SimpleToken token = new SimpleToken("my_orig");
token.addComponent(new SimpleToken("my_component_1"));
token.addComponent(new SimpleToken("my_component_2"));
@@ -177,7 +177,7 @@ public class SimpleTokenTestCase {
" token string : 'my_token_string'\n" +
" type : ALPHABETIC\n" +
"}";
- assertEquals(expected, token.toString());
+ assertEquals(expected, token.toDetailString());
}
@Test