Merge pull request #15953 from vespa-engine/geirst/clone-input-to-ngram-expression

Avoid changing the input string field value by cloning it before doin…
author: Jon Bratseth <bratseth@oath.com> 2021-01-07 20:36:47 +0100
committer: GitHub <noreply@github.com> 2021-01-07 20:36:47 +0100
commit: 16f521f73e64a7e54b80c296b933d136f45986f6 (patch)
tree: a8c1bddde5d6d3679c3c1c41311c7e36f45ea275
parent: 35d6e7789f508e415f3bd50e1a88974cacab3419 (diff)
parent: 3d2ae8e2398de5b88674caac462fb8ddf1639a50 (diff)
1 files changed, 8 insertions, 5 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
index adf3e4ecaaa..2c56f0e356b 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
@@ -52,9 +52,12 @@ public final class NGramExpression extends Expression {
             // This expression is already executed for this input instance
             return;
         }
-        SpanList spanList = input.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList();
+        StringFieldValue output = input.clone();
+        ctx.setValue(output);
+
+        SpanList spanList = output.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList();
         int lastPosition = 0;
-        for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(input.getString(), gramSize); it.hasNext();) {
+        for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(output.getString(), gramSize); it.hasNext();) {
             GramSplitter.Gram gram = it.next();
             // if there is a gap before this gram, then annotate the gram as punctuation
             // (technically it may be of various types, but it does not matter - we just
@@ -64,15 +67,15 @@ public final class NGramExpression extends Expression {
             }
 
             // annotate gram as a word term
-            String gramString = gram.extractFrom(input.getString());
+            String gramString = gram.extractFrom(output.getString());
             typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList).
                     annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString));
 
             lastPosition = gram.getStart() + gram.getCodePointCount();
         }
         // handle punctuation at the end
-        if (lastPosition < input.toString().length()) {
-            typedSpan(lastPosition, input.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList);
+        if (lastPosition < output.toString().length()) {
+            typedSpan(lastPosition, output.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList);
         }
     }
author	Jon Bratseth <bratseth@oath.com>	2021-01-07 20:36:47 +0100
committer	GitHub <noreply@github.com>	2021-01-07 20:36:47 +0100
commit	16f521f73e64a7e54b80c296b933d136f45986f6 (patch)
tree	a8c1bddde5d6d3679c3c1c41311c7e36f45ea275
parent	35d6e7789f508e415f3bd50e1a88974cacab3419 (diff)
parent	3d2ae8e2398de5b88674caac462fb8ddf1639a50 (diff)