summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java2
1 files changed, 1 insertions, 1 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
index 40bdaa9db5d..191d067effe 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
@@ -72,7 +72,7 @@ public class LinguisticsAnnotator {
Tokenizer tokenizer = factory.getTokenizer();
String input = (text.getString().length() <= config.getMaxTokenizeLength())
? text.getString()
- : Text.safeSubstring(text.getString(), config.getMaxTokenizeLength());
+ : Text.substringByCodepoints(text.getString(), 0, config.getMaxTokenizeLength());
Iterable<Token> tokens = tokenizer.tokenize(input, config.getLanguage(), config.getStemMode(),
config.getRemoveAccents());
TermOccurrences termOccurrences = new TermOccurrences(config.getMaxTermOccurrences());