summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorn.christian@seime.no>2024-06-25 17:05:11 +0200
committerGitHub <noreply@github.com>2024-06-25 17:05:11 +0200
commit7c0ac144b1c3ea28bb03843f364a718f63cdabbc (patch)
tree819cec1fabb6dd87654088a32779e0fde8e61996
parent65652b530320785081a83f9688ac57a29cf43485 (diff)
parent9604674b054642501d7b74ba7b51b393e7904496 (diff)
Merge pull request #31721 from vespa-engine/bjorncs/significance-searcher
Add debug logging in `SignificanceSearcher`
-rw-r--r--container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java20
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java18
2 files changed, 23 insertions, 15 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java
index 9e6e2f785fd..3f72e98f18a 100644
--- a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java
+++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java
@@ -22,6 +22,7 @@ import com.yahoo.search.searchchain.Execution;
import java.util.HashSet;
import java.util.Optional;
+import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
@@ -69,6 +70,7 @@ public class SignificanceSearcher extends Searcher {
// This will result in a failure later (in a "backend searcher") anyway.
Optional.ofNullable(schema.rankProfiles().get(rankProfileName))
.map(RankProfile::useSignificanceModel).orElse(false)));
+ log.log(Level.FINE, () -> "Significance setup per schema: " + perSchemaSetup);
var uniqueSetups = new HashSet<>(perSchemaSetup.values());
// Fail if the significance setup for the selected schemas are conflicting
@@ -95,6 +97,8 @@ public class SignificanceSearcher extends Searcher {
private Result calculateAndSetSignificance(Query query, Execution execution) {
Language language = query.getModel().getParsingLanguage();
Optional<SignificanceModel> model = significanceModelRegistry.getModel(language);
+ log.log(Level.FINE, () -> "Got model for language %s: %s"
+ .formatted(language, model.map(SignificanceModel::getId).orElse("<none>")));
if (model.isEmpty()) return execution.search(query);
@@ -106,17 +110,17 @@ public class SignificanceSearcher extends Searcher {
private void setIDF(Item root, SignificanceModel significanceModel) {
if (root == null || root instanceof NullItem) return;
- if (root instanceof WordItem) {
-
- var documentFrequency = significanceModel.documentFrequency(((WordItem) root).getWord());
+ if (root instanceof WordItem wi) {
+ var word = wi.getWord();
+ var documentFrequency = significanceModel.documentFrequency(word);
long N = documentFrequency.corpusSize();
long nq_i = documentFrequency.frequency();
double idf = calculateIDF(N, nq_i);
-
- ((WordItem) root).setSignificance(idf);
- } else if (root instanceof CompositeItem) {
- for (int i = 0; i < ((CompositeItem) root).getItemCount(); i++) {
- setIDF(((CompositeItem) root).getItem(i), significanceModel);
+ log.log(Level.FINE, () -> "Setting IDF for " + word + " to " + idf);
+ wi.setSignificance(idf);
+ } else if (root instanceof CompositeItem ci) {
+ for (int i = 0; i < ci.getItemCount(); i++) {
+ setIDF(ci.getItem(i), significanceModel);
}
}
}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
index a88d8165624..45c6054f748 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
@@ -20,6 +20,7 @@ import java.util.Map;
import java.util.Optional;
import java.util.ArrayList;
import java.util.Objects;
+import java.util.logging.Logger;
/**
* Default implementation of {@link SignificanceModelRegistry}.
@@ -29,6 +30,8 @@ import java.util.Objects;
*/
public class DefaultSignificanceModelRegistry implements SignificanceModelRegistry {
+ private static final Logger log = Logger.getLogger(DefaultSignificanceModelRegistry.class.getName());
+
private final Map<Language, SignificanceModel> models;
@Inject
@@ -47,6 +50,7 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist
}
public void addModel(Path path) {
+ log.fine(() -> "Loading model from " + path);
ObjectMapper objectMapper = new ObjectMapper();
try {
InputStream in = path.toString().endsWith(".zst") ?
@@ -56,16 +60,16 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist
SignificanceModelFile file = objectMapper.readValue(in, SignificanceModelFile.class);
for (var pair : file.languages().entrySet()) {
- String[] languageTags = pair.getKey().split(",");
+ var languagesStr = pair.getKey();
+ log.fine(() -> "Found model for languages '%s'".formatted(languagesStr));
+ String[] languageTags = languagesStr.split(",");
for (var languageTag : languageTags) {
- this.models.put(
- Language.fromLanguageTag(languageTag),
- new DefaultSignificanceModel(pair.getValue(), file.id()));
- }
+ var language = Language.fromLanguageTag(languageTag);
+ log.fine(() -> "Adding model for language %s with id %s".formatted(language, file.id()));
+ this.models.put(language, new DefaultSignificanceModel(pair.getValue(), file.id()));
}
-
-
+ }
} catch (IOException e) {
throw new UncheckedIOException("Failed to load model from " + path, e);
}