diff options
author | Geir Storli <geirst@verizonmedia.com> | 2021-01-07 15:19:24 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2021-01-07 15:19:24 +0000 |
commit | 3d2ae8e2398de5b88674caac462fb8ddf1639a50 (patch) | |
tree | e240fa676fbf34d3e866a64a05491fb63e6dcc53 /indexinglanguage | |
parent | 55042393392a94095d5c821e8118d06a7067e048 (diff) |
Avoid changing the input string field value by cloning it before doing changes.
This is needed as the input can be used by other expressions as well.
The same cloning is done in ExactExpression and TokenizeExpression.
Diffstat (limited to 'indexinglanguage')
-rw-r--r-- | indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java index adf3e4ecaaa..2c56f0e356b 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java @@ -52,9 +52,12 @@ public final class NGramExpression extends Expression { // This expression is already executed for this input instance return; } - SpanList spanList = input.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList(); + StringFieldValue output = input.clone(); + ctx.setValue(output); + + SpanList spanList = output.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList(); int lastPosition = 0; - for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(input.getString(), gramSize); it.hasNext();) { + for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(output.getString(), gramSize); it.hasNext();) { GramSplitter.Gram gram = it.next(); // if there is a gap before this gram, then annotate the gram as punctuation // (technically it may be of various types, but it does not matter - we just @@ -64,15 +67,15 @@ public final class NGramExpression extends Expression { } // annotate gram as a word term - String gramString = gram.extractFrom(input.getString()); + String gramString = gram.extractFrom(output.getString()); typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList). annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString)); lastPosition = gram.getStart() + gram.getCodePointCount(); } // handle punctuation at the end - if (lastPosition < input.toString().length()) { - typedSpan(lastPosition, input.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList); + if (lastPosition < output.toString().length()) { + typedSpan(lastPosition, output.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList); } } |