aboutsummaryrefslogtreecommitdiffstats
path: root/indexinglanguage/src/main
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-06-15 19:43:59 +0200
committerJon Bratseth <bratseth@gmail.com>2021-06-15 19:43:59 +0200
commit0c3afd47b0e2f43bc9aa9e4e60d33c104c37b81b (patch)
treeb419c9be56d0fd049b49ba6f4258bb5d2f931c00 /indexinglanguage/src/main
parent8ef499e16e9fb5daede071d36cb523f4d30538c0 (diff)
Require replacements to be applied during tokenization
Diffstat (limited to 'indexinglanguage/src/main')
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java14
1 files changed, 2 insertions, 12 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
index 8b6ef83f05e..81a5305a778 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
@@ -115,9 +115,7 @@ public class LinguisticsAnnotator {
}
return;
}
- if ( ! token.isIndexable()) {
- return;
- }
+ if ( ! token.isIndexable()) return;
}
String orig = token.getOrig();
int pos = (int)token.getOffset();
@@ -138,9 +136,6 @@ public class LinguisticsAnnotator {
String lowercasedTerm = lowercasedOrig;
String term = token.getTokenString();
if (term != null) {
- term = tokenizer.getReplacementTerm(term);
- }
- if (term != null) {
lowercasedTerm = toLowerCase(term);
}
if (! lowercasedOrig.equals(lowercasedTerm)) {
@@ -155,12 +150,7 @@ public class LinguisticsAnnotator {
}
} else {
String term = token.getTokenString();
- if (term != null) {
- term = tokenizer.getReplacementTerm(term);
- }
- if (term == null || term.trim().isEmpty()) {
- return;
- }
+ if (term == null || term.trim().isEmpty()) return;
if (termOccurrences.termCountBelowLimit(term)) {
parent.span(pos, len).annotate(lowerCaseTermAnnotation(term, token.getOrig()));
}