summaryrefslogtreecommitdiffstats
path: root/indexinglanguage
diff options
context:
space:
mode:
Diffstat (limited to 'indexinglanguage')
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java13
1 files changed, 8 insertions, 5 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
index adf3e4ecaaa..2c56f0e356b 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
@@ -52,9 +52,12 @@ public final class NGramExpression extends Expression {
// This expression is already executed for this input instance
return;
}
- SpanList spanList = input.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList();
+ StringFieldValue output = input.clone();
+ ctx.setValue(output);
+
+ SpanList spanList = output.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList();
int lastPosition = 0;
- for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(input.getString(), gramSize); it.hasNext();) {
+ for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(output.getString(), gramSize); it.hasNext();) {
GramSplitter.Gram gram = it.next();
// if there is a gap before this gram, then annotate the gram as punctuation
// (technically it may be of various types, but it does not matter - we just
@@ -64,15 +67,15 @@ public final class NGramExpression extends Expression {
}
// annotate gram as a word term
- String gramString = gram.extractFrom(input.getString());
+ String gramString = gram.extractFrom(output.getString());
typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList).
annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString));
lastPosition = gram.getStart() + gram.getCodePointCount();
}
// handle punctuation at the end
- if (lastPosition < input.toString().length()) {
- typedSpan(lastPosition, input.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList);
+ if (lastPosition < output.toString().length()) {
+ typedSpan(lastPosition, output.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList);
}
}