aboutsummaryrefslogtreecommitdiffstats
path: root/indexinglanguage
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2020-06-25 14:09:24 +0200
committerJon Bratseth <bratseth@gmail.com>2020-06-25 14:09:24 +0200
commit74bffb810050342bd32065a818e4f74b8cd7ce51 (patch)
treef4e50acb6aee944f0176d049ee94ca4a3a0614c6 /indexinglanguage
parent0680bf96a4bf17aec0b9fde98ac5369c0991f0fb (diff)
Surrogate aware gram splitting
Diffstat (limited to 'indexinglanguage')
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java4
1 files changed, 2 insertions, 2 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
index d91338e3d3f..adf3e4ecaaa 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
@@ -65,10 +65,10 @@ public final class NGramExpression extends Expression {
// annotate gram as a word term
String gramString = gram.extractFrom(input.getString());
- typedSpan(gram.getStart(), gram.getLength(), TokenType.ALPHABETIC, spanList).
+ typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList).
annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString));
- lastPosition = gram.getStart() + gram.getLength();
+ lastPosition = gram.getStart() + gram.getCodePointCount();
}
// handle punctuation at the end
if (lastPosition < input.toString().length()) {