aboutsummaryrefslogtreecommitdiffstats
path: root/indexinglanguage
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2020-06-26 11:31:08 +0200
committerGitHub <noreply@github.com>2020-06-26 11:31:08 +0200
commitbb367946be112361611f62fb6803c5060cfe9dde (patch)
tree99bbe3f58303f87e31cfeab897a6352c4690254e /indexinglanguage
parent472bea6c64a4c18be5097dd0cbb1078579ba580e (diff)
parentc565914839d22b6d469c8626c8d5197d71588ad8 (diff)
Merge pull request #13709 from vespa-engine/bratseth/surrogate-aware-gram-splitting
Surrogate aware gram splitting
Diffstat (limited to 'indexinglanguage')
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java4
1 files changed, 2 insertions, 2 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
index d91338e3d3f..adf3e4ecaaa 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
@@ -65,10 +65,10 @@ public final class NGramExpression extends Expression {
// annotate gram as a word term
String gramString = gram.extractFrom(input.getString());
- typedSpan(gram.getStart(), gram.getLength(), TokenType.ALPHABETIC, spanList).
+ typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList).
annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString));
- lastPosition = gram.getStart() + gram.getLength();
+ lastPosition = gram.getStart() + gram.getCodePointCount();
}
// handle punctuation at the end
if (lastPosition < input.toString().length()) {