From 976f28680df424bf028eb12a3f413049c17bf098 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Fri, 24 Apr 2020 08:34:55 +0000 Subject: add more tracing and debug logging of stemming --- .../com/yahoo/prelude/querytransform/StemmingSearcher.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'container-search') diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java index 9a9044def2d..9b846d9f0ae 100644 --- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java @@ -98,6 +98,7 @@ public class StemmingSearcher extends Searcher { context.language = language; context.indexFacts = indexFacts; context.reverseConnectivity = createReverseConnectivities(q.getModel().getQueryTree().getRoot()); + q.trace("Stemming with language="+language, 3); return scan(q.getModel().getQueryTree().getRoot(), context); } @@ -183,9 +184,20 @@ public class StemmingSearcher extends Searcher { Substring substring = getOffsets(current); if (segments.size() == 1) { + getLogger().log(LogLevel.DEBUG, "Stem '"+current.stringValue()+"' mode "+index.getStemMode() + +" and language '"+context.language+"' -> '"+segments.get(0)+"'"); TaggableItem w = singleWordSegment(current, segments.get(0), index, substring, context.insidePhrase); setMetaData(current, context.reverseConnectivity, w); return (Item) w; + } else if (getLogger().isLoggable(LogLevel.DEBUG)) { + var buf = new StringBuilder(); + buf.append("Stem '").append(current.stringValue()); + buf.append("' mode ").append(index.getStemMode()); + buf.append(" and language '").append(context.language).append("' ->"); + for (StemList segment : segments) { + buf.append(" '").append(segment).append("'"); + } + getLogger().log(LogLevel.DEBUG, buf.toString()); } if (context.isCJK) @@ -194,6 +206,7 @@ public class StemmingSearcher extends Searcher { composite = chooseComposite(current, ((Item) current).getParent(), indexName); for (StemList segment : segments) { + getLogger().log(LogLevel.DEBUG, "Stem to multiple segments '"+segment+"'"); TaggableItem w = singleWordSegment(current, segment, index, substring, context.insidePhrase); if (composite instanceof AndSegmentItem) { -- cgit v1.2.3 From be1fb0d49eead759e69ed224c403cd8ecf5ed84e Mon Sep 17 00:00:00 2001 From: Arne H Juul Date: Fri, 24 Apr 2020 11:18:18 +0200 Subject: Apply suggestions from code review Co-Authored-By: Jon Bratseth --- .../java/com/yahoo/prelude/querytransform/StemmingSearcher.java | 4 ++-- .../main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java | 8 ++++---- .../main/java/com/yahoo/language/opennlp/OptimaizeDetector.java | 2 +- .../src/main/java/com/yahoo/language/simple/SimpleTokenizer.java | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'container-search') diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java index 9b846d9f0ae..7279e1d6a3d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java @@ -184,7 +184,7 @@ public class StemmingSearcher extends Searcher { Substring substring = getOffsets(current); if (segments.size() == 1) { - getLogger().log(LogLevel.DEBUG, "Stem '"+current.stringValue()+"' mode "+index.getStemMode() + getLogger().log(LogLevel.DEBUG, () -> "Stem '"+current.stringValue()+"' mode "+index.getStemMode() +" and language '"+context.language+"' -> '"+segments.get(0)+"'"); TaggableItem w = singleWordSegment(current, segments.get(0), index, substring, context.insidePhrase); setMetaData(current, context.reverseConnectivity, w); @@ -206,7 +206,7 @@ public class StemmingSearcher extends Searcher { composite = chooseComposite(current, ((Item) current).getParent(), indexName); for (StemList segment : segments) { - getLogger().log(LogLevel.DEBUG, "Stem to multiple segments '"+segment+"'"); + getLogger().log(LogLevel.DEBUG, () -> "Stem to multiple segments '"+segment+"'"); TaggableItem w = singleWordSegment(current, segment, index, substring, context.insidePhrase); if (composite instanceof AndSegmentItem) { diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java index 0ebd4e0f638..9a1e6da7629 100644 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java @@ -60,7 +60,7 @@ public class OpenNlpTokenizer implements Tokenizer { } private Stemmer getStemmerForLanguage(Language language, StemMode stemMode) { - log.log(Level.FINEST, "getStemmerForLanguage '"+language+"' mode: "+stemMode); + log.log(Level.FINEST, () -> "getStemmerForLanguage '"+language+"' mode: "+stemMode); if (language == null || Language.ENGLISH.equals(language) || StemMode.NONE.equals(stemMode)) { return null; } @@ -124,7 +124,7 @@ public class OpenNlpTokenizer implements Tokenizer { private String processToken(String token, Language language, StemMode stemMode, boolean removeAccents, Stemmer stemmer) { - log.log(Level.FINEST, "processToken '"+token+"'"); + log.log(Level.FINEST, () -> "processToken '"+token+"'"); token = normalizer.normalize(token); token = LinguisticsCase.toLowerCase(token); if (removeAccents) @@ -132,9 +132,9 @@ public class OpenNlpTokenizer implements Tokenizer { if (stemMode != StemMode.NONE) { String oldToken = token; token = doStemming(token, stemmer); - log.log(Level.FINEST, "stem '"+oldToken+"' to '"+token+"'"); + log.log(Level.FINEST, () -> "stem '"+oldToken+"' to '"+token+"'"); } - log.log(Level.FINEST, "processed token is: "+token); + log.log(Level.FINEST, () -> "processed token is: "+token); return token; } diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java index a42c9f0504e..bf07c91ba44 100644 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java @@ -99,7 +99,7 @@ public class OptimaizeDetector implements Detector { private static Language guessLanguageUsingOptimaize(String input) { Optional result = languageDetector.detect(textObjectFactory.forText(input)); if ( ! result.isPresent()) return Language.UNKNOWN; - log.log(Level.FINE, "guessing language "+result.get()+" from input: "+input); + log.log(Level.FINE, () -> "guessing language "+result.get()+" from input: "+input); return Language.fromLocale(new Locale(result.get().getLanguage())); } diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java index a8470d86869..aa24e359b53 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java @@ -75,9 +75,9 @@ public class SimpleTokenizer implements Tokenizer { if (stemMode != StemMode.NONE) { String oldToken = token; token = stemmer.stem(token); - log.log(Level.FINEST, "stem '"+oldToken+"' to '"+token+"'"); + log.log(Level.FINEST, () -> "stem '"+oldToken+"' to '"+token+"'"); } - log.log(Level.FINEST, "processed token is: "+token); + log.log(Level.FINEST, () -> "processed token is: "+token); return token; } -- cgit v1.2.3