diff options
Diffstat (limited to 'indexinglanguage')
-rw-r--r-- | indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java index adf3e4ecaaa..2c56f0e356b 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java @@ -52,9 +52,12 @@ public final class NGramExpression extends Expression { // This expression is already executed for this input instance return; } - SpanList spanList = input.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList(); + StringFieldValue output = input.clone(); + ctx.setValue(output); + + SpanList spanList = output.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList(); int lastPosition = 0; - for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(input.getString(), gramSize); it.hasNext();) { + for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(output.getString(), gramSize); it.hasNext();) { GramSplitter.Gram gram = it.next(); // if there is a gap before this gram, then annotate the gram as punctuation // (technically it may be of various types, but it does not matter - we just @@ -64,15 +67,15 @@ public final class NGramExpression extends Expression { } // annotate gram as a word term - String gramString = gram.extractFrom(input.getString()); + String gramString = gram.extractFrom(output.getString()); typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList). annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString)); lastPosition = gram.getStart() + gram.getCodePointCount(); } // handle punctuation at the end - if (lastPosition < input.toString().length()) { - typedSpan(lastPosition, input.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList); + if (lastPosition < output.toString().length()) { + typedSpan(lastPosition, output.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList); } } |