summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-01-07 20:36:47 +0100
committerGitHub <noreply@github.com>2021-01-07 20:36:47 +0100
commit16f521f73e64a7e54b80c296b933d136f45986f6 (patch)
treea8c1bddde5d6d3679c3c1c41311c7e36f45ea275
parent35d6e7789f508e415f3bd50e1a88974cacab3419 (diff)
parent3d2ae8e2398de5b88674caac462fb8ddf1639a50 (diff)
Merge pull request #15953 from vespa-engine/geirst/clone-input-to-ngram-expression
Avoid changing the input string field value by cloning it before doin…
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java13
1 files changed, 8 insertions, 5 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
index adf3e4ecaaa..2c56f0e356b 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NGramExpression.java
@@ -52,9 +52,12 @@ public final class NGramExpression extends Expression {
// This expression is already executed for this input instance
return;
}
- SpanList spanList = input.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList();
+ StringFieldValue output = input.clone();
+ ctx.setValue(output);
+
+ SpanList spanList = output.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList();
int lastPosition = 0;
- for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(input.getString(), gramSize); it.hasNext();) {
+ for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(output.getString(), gramSize); it.hasNext();) {
GramSplitter.Gram gram = it.next();
// if there is a gap before this gram, then annotate the gram as punctuation
// (technically it may be of various types, but it does not matter - we just
@@ -64,15 +67,15 @@ public final class NGramExpression extends Expression {
}
// annotate gram as a word term
- String gramString = gram.extractFrom(input.getString());
+ String gramString = gram.extractFrom(output.getString());
typedSpan(gram.getStart(), gram.getCodePointCount(), TokenType.ALPHABETIC, spanList).
annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString));
lastPosition = gram.getStart() + gram.getCodePointCount();
}
// handle punctuation at the end
- if (lastPosition < input.toString().length()) {
- typedSpan(lastPosition, input.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList);
+ if (lastPosition < output.toString().length()) {
+ typedSpan(lastPosition, output.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList);
}
}