diff options
author | Jon Bratseth <bratseth@oath.com> | 2020-06-26 11:31:08 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-26 11:31:08 +0200 |
commit | bb367946be112361611f62fb6803c5060cfe9dde (patch) | |
tree | 99bbe3f58303f87e31cfeab897a6352c4690254e /indexinglanguage/src | |
parent | 472bea6c64a4c18be5097dd0cbb1078579ba580e (diff) | |
parent | c565914839d22b6d469c8626c8d5197d71588ad8 (diff) |
Merge pull request #13709 from vespa-engine/bratseth/surrogate-aware-gram-splitting
Surrogate aware gram splitting
Diffstat (limited to 'indexinglanguage/src')
-rw-r--r-- | indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java index d91338e3d3f..adf3e4ecaaa 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java @@ -65,10 +65,10 @@ public final class NGramExpression extends Expression { // annotate gram as a word term String gramString = gram.extractFrom(input.getString()); - typedSpan(gram.getStart(), gram.getLength(), TokenType.ALPHABETIC, spanList). + typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList). annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString)); - lastPosition = gram.getStart() + gram.getLength(); + lastPosition = gram.getStart() + gram.getCodePointCount(); } // handle punctuation at the end if (lastPosition < input.toString().length()) { |