diff options
author | Jon Bratseth <bratseth@oath.com> | 2021-01-07 20:36:47 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-07 20:36:47 +0100 |
commit | 16f521f73e64a7e54b80c296b933d136f45986f6 (patch) | |
tree | a8c1bddde5d6d3679c3c1c41311c7e36f45ea275 | |
parent | 35d6e7789f508e415f3bd50e1a88974cacab3419 (diff) | |
parent | 3d2ae8e2398de5b88674caac462fb8ddf1639a50 (diff) |
Merge pull request #15953 from vespa-engine/geirst/clone-input-to-ngram-expression
Avoid changing the input string field value by cloning it before doin…
-rw-r--r-- | indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java index adf3e4ecaaa..2c56f0e356b 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java @@ -52,9 +52,12 @@ public final class NGramExpression extends Expression { // This expression is already executed for this input instance return; } - SpanList spanList = input.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList(); + StringFieldValue output = input.clone(); + ctx.setValue(output); + + SpanList spanList = output.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList(); int lastPosition = 0; - for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(input.getString(), gramSize); it.hasNext();) { + for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(output.getString(), gramSize); it.hasNext();) { GramSplitter.Gram gram = it.next(); // if there is a gap before this gram, then annotate the gram as punctuation // (technically it may be of various types, but it does not matter - we just @@ -64,15 +67,15 @@ public final class NGramExpression extends Expression { } // annotate gram as a word term - String gramString = gram.extractFrom(input.getString()); + String gramString = gram.extractFrom(output.getString()); typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList). annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString)); lastPosition = gram.getStart() + gram.getCodePointCount(); } // handle punctuation at the end - if (lastPosition < input.toString().length()) { - typedSpan(lastPosition, input.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList); + if (lastPosition < output.toString().length()) { + typedSpan(lastPosition, output.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList); } } |