aboutsummaryrefslogtreecommitdiffstats
path: root/indexinglanguage
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2021-01-07 15:19:24 +0000
committerGeir Storli <geirst@verizonmedia.com>2021-01-07 15:19:24 +0000
commit3d2ae8e2398de5b88674caac462fb8ddf1639a50 (patch)
treee240fa676fbf34d3e866a64a05491fb63e6dcc53 /indexinglanguage
parent55042393392a94095d5c821e8118d06a7067e048 (diff)
Avoid changing the input string field value by cloning it before doing changes.
This is needed as the input can be used by other expressions as well. The same cloning is done in ExactExpression and TokenizeExpression.
Diffstat (limited to 'indexinglanguage')
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java13
1 files changed, 8 insertions, 5 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
index adf3e4ecaaa..2c56f0e356b 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
@@ -52,9 +52,12 @@ public final class NGramExpression extends Expression {
// This expression is already executed for this input instance
return;
}
- SpanList spanList = input.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList();
+ StringFieldValue output = input.clone();
+ ctx.setValue(output);
+
+ SpanList spanList = output.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList();
int lastPosition = 0;
- for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(input.getString(), gramSize); it.hasNext();) {
+ for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(output.getString(), gramSize); it.hasNext();) {
GramSplitter.Gram gram = it.next();
// if there is a gap before this gram, then annotate the gram as punctuation
// (technically it may be of various types, but it does not matter - we just
@@ -64,15 +67,15 @@ public final class NGramExpression extends Expression {
}
// annotate gram as a word term
- String gramString = gram.extractFrom(input.getString());
+ String gramString = gram.extractFrom(output.getString());
typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList).
annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString));
lastPosition = gram.getStart() + gram.getCodePointCount();
}
// handle punctuation at the end
- if (lastPosition < input.toString().length()) {
- typedSpan(lastPosition, input.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList);
+ if (lastPosition < output.toString().length()) {
+ typedSpan(lastPosition, output.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList);
}
}