From 3d2ae8e2398de5b88674caac462fb8ddf1639a50 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Thu, 7 Jan 2021 15:19:24 +0000 Subject: Avoid changing the input string field value by cloning it before doing changes. This is needed as the input can be used by other expressions as well. The same cloning is done in ExactExpression and TokenizeExpression. --- .../vespa/indexinglanguage/expressions/NGramExpression.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java index adf3e4ecaaa..2c56f0e356b 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java @@ -52,9 +52,12 @@ public final class NGramExpression extends Expression { // This expression is already executed for this input instance return; } - SpanList spanList = input.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList(); + StringFieldValue output = input.clone(); + ctx.setValue(output); + + SpanList spanList = output.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList(); int lastPosition = 0; - for (Iterator it = linguistics.getGramSplitter().split(input.getString(), gramSize); it.hasNext();) { + for (Iterator it = linguistics.getGramSplitter().split(output.getString(), gramSize); it.hasNext();) { GramSplitter.Gram gram = it.next(); // if there is a gap before this gram, then annotate the gram as punctuation // (technically it may be of various types, but it does not matter - we just @@ -64,15 +67,15 @@ public final class NGramExpression extends Expression { } // annotate gram as a word term - String gramString = gram.extractFrom(input.getString()); + String gramString = gram.extractFrom(output.getString()); typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList). annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString)); lastPosition = gram.getStart() + gram.getCodePointCount(); } // handle punctuation at the end - if (lastPosition < input.toString().length()) { - typedSpan(lastPosition, input.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList); + if (lastPosition < output.toString().length()) { + typedSpan(lastPosition, output.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList); } } -- cgit v1.2.3