diff options
author | Bjørn Christian Seime <bjorn.christian@seime.no> | 2024-06-25 17:05:11 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-25 17:05:11 +0200 |
commit | 7c0ac144b1c3ea28bb03843f364a718f63cdabbc (patch) | |
tree | 819cec1fabb6dd87654088a32779e0fde8e61996 | |
parent | 65652b530320785081a83f9688ac57a29cf43485 (diff) | |
parent | 9604674b054642501d7b74ba7b51b393e7904496 (diff) |
Merge pull request #31721 from vespa-engine/bjorncs/significance-searcher
Add debug logging in `SignificanceSearcher`
2 files changed, 23 insertions, 15 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java index 9e6e2f785fd..3f72e98f18a 100644 --- a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java @@ -22,6 +22,7 @@ import com.yahoo.search.searchchain.Execution; import java.util.HashSet; import java.util.Optional; +import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -69,6 +70,7 @@ public class SignificanceSearcher extends Searcher { // This will result in a failure later (in a "backend searcher") anyway. Optional.ofNullable(schema.rankProfiles().get(rankProfileName)) .map(RankProfile::useSignificanceModel).orElse(false))); + log.log(Level.FINE, () -> "Significance setup per schema: " + perSchemaSetup); var uniqueSetups = new HashSet<>(perSchemaSetup.values()); // Fail if the significance setup for the selected schemas are conflicting @@ -95,6 +97,8 @@ public class SignificanceSearcher extends Searcher { private Result calculateAndSetSignificance(Query query, Execution execution) { Language language = query.getModel().getParsingLanguage(); Optional<SignificanceModel> model = significanceModelRegistry.getModel(language); + log.log(Level.FINE, () -> "Got model for language %s: %s" + .formatted(language, model.map(SignificanceModel::getId).orElse("<none>"))); if (model.isEmpty()) return execution.search(query); @@ -106,17 +110,17 @@ public class SignificanceSearcher extends Searcher { private void setIDF(Item root, SignificanceModel significanceModel) { if (root == null || root instanceof NullItem) return; - if (root instanceof WordItem) { - - var documentFrequency = significanceModel.documentFrequency(((WordItem) root).getWord()); + if (root instanceof WordItem wi) { + var word = wi.getWord(); + var documentFrequency = significanceModel.documentFrequency(word); long N = documentFrequency.corpusSize(); long nq_i = documentFrequency.frequency(); double idf = calculateIDF(N, nq_i); - - ((WordItem) root).setSignificance(idf); - } else if (root instanceof CompositeItem) { - for (int i = 0; i < ((CompositeItem) root).getItemCount(); i++) { - setIDF(((CompositeItem) root).getItem(i), significanceModel); + log.log(Level.FINE, () -> "Setting IDF for " + word + " to " + idf); + wi.setSignificance(idf); + } else if (root instanceof CompositeItem ci) { + for (int i = 0; i < ci.getItemCount(); i++) { + setIDF(ci.getItem(i), significanceModel); } } } diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java index a88d8165624..45c6054f748 100644 --- a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java +++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java @@ -20,6 +20,7 @@ import java.util.Map; import java.util.Optional; import java.util.ArrayList; import java.util.Objects; +import java.util.logging.Logger; /** * Default implementation of {@link SignificanceModelRegistry}. @@ -29,6 +30,8 @@ import java.util.Objects; */ public class DefaultSignificanceModelRegistry implements SignificanceModelRegistry { + private static final Logger log = Logger.getLogger(DefaultSignificanceModelRegistry.class.getName()); + private final Map<Language, SignificanceModel> models; @Inject @@ -47,6 +50,7 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist } public void addModel(Path path) { + log.fine(() -> "Loading model from " + path); ObjectMapper objectMapper = new ObjectMapper(); try { InputStream in = path.toString().endsWith(".zst") ? @@ -56,16 +60,16 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist SignificanceModelFile file = objectMapper.readValue(in, SignificanceModelFile.class); for (var pair : file.languages().entrySet()) { - String[] languageTags = pair.getKey().split(","); + var languagesStr = pair.getKey(); + log.fine(() -> "Found model for languages '%s'".formatted(languagesStr)); + String[] languageTags = languagesStr.split(","); for (var languageTag : languageTags) { - this.models.put( - Language.fromLanguageTag(languageTag), - new DefaultSignificanceModel(pair.getValue(), file.id())); - } + var language = Language.fromLanguageTag(languageTag); + log.fine(() -> "Adding model for language %s with id %s".formatted(language, file.id())); + this.models.put(language, new DefaultSignificanceModel(pair.getValue(), file.id())); } - - + } } catch (IOException e) { throw new UncheckedIOException("Failed to load model from " + path, e); } |