summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java13
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java13
2 files changed, 16 insertions, 10 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
index 9a1e6da7629..d3f6fcf2ee3 100644
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
+++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
@@ -124,18 +124,21 @@ public class OpenNlpTokenizer implements Tokenizer {
private String processToken(String token, Language language, StemMode stemMode, boolean removeAccents,
Stemmer stemmer) {
- log.log(Level.FINEST, () -> "processToken '"+token+"'");
+ final String original = token;
+ log.log(Level.FINEST, () -> "processToken '"+original+"'");
token = normalizer.normalize(token);
token = LinguisticsCase.toLowerCase(token);
if (removeAccents)
token = transformer.accentDrop(token, language);
if (stemMode != StemMode.NONE) {
- String oldToken = token;
+ final String oldToken = token;
token = doStemming(token, stemmer);
- log.log(Level.FINEST, () -> "stem '"+oldToken+"' to '"+token+"'");
+ final String newToken = token;
+ log.log(Level.FINEST, () -> "stem '"+oldToken+"' to '"+newToken+"'");
}
- log.log(Level.FINEST, () -> "processed token is: "+token);
- return token;
+ final String result = token;
+ log.log(Level.FINEST, () -> "processed token is: "+result);
+ return result;
}
private String doStemming(String token, Stemmer stemmer) {
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java
index aa24e359b53..7df432f496d 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java
@@ -67,18 +67,21 @@ public class SimpleTokenizer implements Tokenizer {
}
private String processToken(String token, Language language, StemMode stemMode, boolean removeAccents) {
- log.log(Level.FINEST, "processToken '"+token+"'");
+ final String original = token;
+ log.log(Level.FINEST, () -> "processToken '"+original+"'");
token = normalizer.normalize(token);
token = LinguisticsCase.toLowerCase(token);
if (removeAccents)
token = transformer.accentDrop(token, language);
if (stemMode != StemMode.NONE) {
- String oldToken = token;
+ final String oldToken = token;
token = stemmer.stem(token);
- log.log(Level.FINEST, () -> "stem '"+oldToken+"' to '"+token+"'");
+ final String newToken = token;
+ log.log(Level.FINEST, () -> "stem '"+oldToken+"' to '"+newToken+"'");
}
- log.log(Level.FINEST, () -> "processed token is: "+token);
- return token;
+ final String result = token;
+ log.log(Level.FINEST, () -> "processed token is: "+result);
+ return result;
}
}