diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-09-11 22:19:47 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2022-09-11 22:19:47 +0200 |
commit | a2fa15b1ab6d6a9930381e981694ce5b39a1160c (patch) | |
tree | aa40dd981f7a396c7b60862843c5dfe78f90bfa6 /indexinglanguage | |
parent | dda637d7693328ad2f246e88d13d1d2293c59ae8 (diff) |
No functional changes
Diffstat (limited to 'indexinglanguage')
-rw-r--r-- | indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java | 27 |
1 files changed, 12 insertions, 15 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java index 5986ab44426..173df65a47e 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java @@ -63,7 +63,7 @@ public class LinguisticsAnnotator { * Annotates the given string with the appropriate linguistics annotations. * * @param text the text to annotate - * @return whether or not anything was annotated + * @return whether anything was annotated */ public boolean annotate(StringFieldValue text) { if (text.getSpanTree(SpanTrees.LINGUISTICS) != null) return true; // Already annotated with LINGUISTICS. @@ -116,21 +116,18 @@ public class LinguisticsAnnotator { } if ( ! token.isIndexable()) return; } - String orig = token.getOrig(); - int pos = (int)token.getOffset(); - if (pos >= input.length()) { - throw new IllegalArgumentException("Token '" + orig + "' has offset " + pos + ", which is outside the " + - "bounds of the input string; " + input); + if (token.getOffset() >= input.length()) { + throw new IllegalArgumentException(token + " has offset " + token.getOffset() + ", which is outside the " + + "bounds of the input string '" + input + "'"); } - int len = orig.length(); - if (pos + len > input.length()) { - throw new IllegalArgumentException("Token '" + orig + "' has offset " + pos + ", which makes it overflow " + + if (token.getOffset() + token.getOrig().length() > input.length()) { + throw new IllegalArgumentException(token + " has offset " + token.getOffset() + ", which makes it overflow " + "the bounds of the input string; " + input); } if (mode == StemMode.ALL) { - Span where = parent.span(pos, len); - String lowercasedOrig = toLowerCase(orig); - addAnnotation(where, orig, orig, termOccurrences); + Span where = parent.span((int)token.getOffset(), token.getOrig().length()); + String lowercasedOrig = toLowerCase(token.getOrig()); + addAnnotation(where, token.getOrig(), token.getOrig(), termOccurrences); String lowercasedTerm = lowercasedOrig; String term = token.getTokenString(); @@ -138,20 +135,20 @@ public class LinguisticsAnnotator { lowercasedTerm = toLowerCase(term); } if (! lowercasedOrig.equals(lowercasedTerm)) { - addAnnotation(where, term, orig, termOccurrences); + addAnnotation(where, term, token.getOrig(), termOccurrences); } for (int i = 0; i < token.getNumStems(); i++) { String stem = token.getStem(i); String lowercasedStem = toLowerCase(stem); if (! (lowercasedOrig.equals(lowercasedStem) || lowercasedTerm.equals(lowercasedStem))) { - addAnnotation(where, stem, orig, termOccurrences); + addAnnotation(where, stem, token.getOrig(), termOccurrences); } } } else { String term = token.getTokenString(); if (term == null || term.trim().isEmpty()) return; if (termOccurrences.termCountBelowLimit(term)) { - parent.span(pos, len).annotate(lowerCaseTermAnnotation(term, token.getOrig())); + parent.span((int)token.getOffset(), token.getOrig().length()).annotate(lowerCaseTermAnnotation(term, token.getOrig())); } } } |