aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/significance/impl
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/significance/impl')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java80
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java56
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java44
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/SignificanceModelFile.java52
4 files changed, 141 insertions, 91 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
index 7ed6f442610..6e024c3025e 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
@@ -1,15 +1,15 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.significance.impl;
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
-import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.yahoo.language.significance.DocumentFrequency;
import com.yahoo.language.significance.SignificanceModel;
+import java.io.IOException;
import java.nio.file.Path;
import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
/**
*
@@ -17,71 +17,23 @@ import java.util.HashMap;
*/
public class DefaultSignificanceModel implements SignificanceModel {
private final long corpusSize;
- private final HashMap<String, Long> frequencies;
- private final Path path;
+ private final Map<String, Long> frequencies;
- @JsonIgnoreProperties(ignoreUnknown = true)
- public static class SignificanceModelFile {
- private final String version;
- private final String id;
- private final String description;
- private final long corpusSize;
- private final String language;
-
- private final long wordCount;
- private final HashMap<String, Long> frequencies;
-
- @JsonCreator
- public SignificanceModelFile(
- @JsonProperty("version") String version,
- @JsonProperty("id") String id,
- @JsonProperty("description") String description,
- @JsonProperty("corpus-size") long corpusSize,
- @JsonProperty("language") String language,
- @JsonProperty("word-count") long wordCount,
- @JsonProperty("frequencies") HashMap<String, Long> frequencies) {
- this.version = version;
- this.id = id;
- this.description = description;
- this.corpusSize = corpusSize;
- this.language = language;
- this.wordCount = wordCount;
- this.frequencies = frequencies;
- }
-
- @JsonProperty("version")
- public String version() { return version; }
-
- @JsonProperty("id")
- public String id() { return id; }
-
- @JsonProperty("description")
- public String description() { return description; }
-
- @JsonProperty("corpus-size")
- public long corpusSize() { return corpusSize; }
-
- @JsonProperty("language")
- public String language() { return language; }
-
- @JsonProperty("frequencies")
- public HashMap<String, Long> frequencies() { return frequencies; }
-
- @JsonProperty("word-count")
- public long wordCount() { return wordCount; }
+ private String id;
+ public DefaultSignificanceModel(DocumentFrequencyFile file, String id) {
+ this.frequencies = file.frequencies();
+ this.corpusSize = file.documentCount();
+ this.id = id;
}
public DefaultSignificanceModel(Path path) {
- this.path = path;
-
ObjectMapper objectMapper = new ObjectMapper();
-
try {
- SignificanceModelFile model = objectMapper.readValue(this.path.toFile(), SignificanceModelFile.class);
- this.corpusSize = model.corpusSize;
- this.frequencies = model.frequencies;
- } catch (Exception e) {
+ var file = objectMapper.readValue(path.toFile(), DocumentFrequencyFile.class);
+ this.frequencies = file.frequencies();
+ this.corpusSize = file.documentCount();
+ } catch (IOException e) {
throw new RuntimeException("Failed to load model from " + path, e);
}
}
@@ -93,4 +45,10 @@ public class DefaultSignificanceModel implements SignificanceModel {
}
return new DocumentFrequency(1, corpusSize);
}
+
+ @Override
+ public String getId() {
+ return this.id;
+ }
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
index 1be1d3f13b5..72874c15d9e 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
@@ -1,20 +1,21 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.significance.impl;
+import com.fasterxml.jackson.databind.ObjectMapper;
import com.yahoo.component.annotation.Inject;
import com.yahoo.language.Language;
import com.yahoo.language.significance.SignificanceModel;
import com.yahoo.language.significance.SignificanceModelRegistry;
import com.yahoo.search.significance.config.SignificanceConfig;
+import java.io.IOException;
+import java.io.UncheckedIOException;
import java.nio.file.Path;
import java.util.EnumMap;
-import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.Optional;
-import java.util.function.Supplier;
-import static com.yahoo.yolean.Exceptions.uncheck;
/**
* Default implementation of {@link SignificanceModelRegistry}.
* This implementation loads models lazily and caches them.
@@ -24,24 +25,35 @@ import static com.yahoo.yolean.Exceptions.uncheck;
public class DefaultSignificanceModelRegistry implements SignificanceModelRegistry {
private final Map<Language, SignificanceModel> models;
+
@Inject
- public DefaultSignificanceModelRegistry(SignificanceConfig cfg) { this(new Builder(cfg)); }
- private DefaultSignificanceModelRegistry(Builder b) {
+ public DefaultSignificanceModelRegistry(SignificanceConfig cfg) {
this.models = new EnumMap<>(Language.class);
- b.models.forEach((language, path) -> {
- models.put(language,
- uncheck(() -> new DefaultSignificanceModel(path)));
- });
+ for (var model : cfg.model()) {
+ addModel(model.path());
+ }
}
- public DefaultSignificanceModelRegistry(HashMap<Language, Path> map) {
+ public DefaultSignificanceModelRegistry(List<Path> models) {
this.models = new EnumMap<>(Language.class);
- map.forEach((language, path) -> {
- models.put(language,
- uncheck(() -> new DefaultSignificanceModel(path)));
- });
+ for (var path : models) {
+ addModel(path);
+ }
}
+ public void addModel(Path path) {
+ ObjectMapper objectMapper = new ObjectMapper();
+ try {
+ SignificanceModelFile file = objectMapper.readValue(path.toFile(), SignificanceModelFile.class);
+ for (var pair : file.languages().entrySet()) {
+ this.models.put(
+ Language.fromLanguageTag(pair.getKey()),
+ new DefaultSignificanceModel(pair.getValue(), file.id()));
+ }
+ } catch (IOException e) {
+ throw new UncheckedIOException("Failed to load model from " + path, e);
+ }
+ }
@Override
public Optional<SignificanceModel> getModel(Language language) {
@@ -51,20 +63,4 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist
}
return Optional.of(models.get(language));
}
-
-
- public static final class Builder {
- private final Map<Language, Path> models = new EnumMap<>(Language.class);
-
- public Builder() {}
- public Builder(SignificanceConfig cfg) {
- for (var model : cfg.model()) {
- addModel(Language.fromLanguageTag(model.language()), model.path());
- }
- }
-
- public Builder addModel(Language lang, Path path) { models.put(lang, path); return this; }
- public DefaultSignificanceModelRegistry build() { return new DefaultSignificanceModelRegistry(this); }
- }
-
}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java
new file mode 100644
index 00000000000..34e73e1b547
--- /dev/null
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java
@@ -0,0 +1,44 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.language.significance.impl;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ *
+ * @author MariusArhaug
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+@JsonInclude(JsonInclude.Include.NON_NULL)
+public class DocumentFrequencyFile {
+ private final String description;
+
+ private final long documentCount;
+
+ private final Map<String, Long> frequencies;
+
+ @JsonCreator
+ public DocumentFrequencyFile(
+ @JsonProperty("description") String description,
+ @JsonProperty("document-count") long documentCount,
+ @JsonProperty("document-frequencies") Map<String, Long> frequencies) {
+ this.description = description;
+ this.documentCount = documentCount;
+ this.frequencies = frequencies;
+ }
+
+ @JsonProperty("description")
+ public String description() { return description; }
+
+ @JsonProperty("document-count")
+ public long documentCount() { return documentCount; }
+
+ @JsonProperty("document-frequencies")
+ public Map<String, Long> frequencies() { return frequencies; }
+}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/SignificanceModelFile.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/SignificanceModelFile.java
new file mode 100644
index 00000000000..94030108671
--- /dev/null
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/SignificanceModelFile.java
@@ -0,0 +1,52 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.language.significance.impl;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ *
+ * @author MariusArhaug
+ */
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class SignificanceModelFile {
+ private final String version;
+ private final String id;
+ private final String description;
+
+ private final HashMap<String, DocumentFrequencyFile> languages;
+
+ @JsonCreator
+ public SignificanceModelFile(
+ @JsonProperty("version") String version,
+ @JsonProperty("id") String id,
+ @JsonProperty("description") String description,
+ @JsonProperty("languages") HashMap<String, DocumentFrequencyFile> languages) {
+ this.version = version;
+ this.id = id;
+ this.description = description;
+ this.languages = languages;
+ }
+
+ @JsonProperty("version")
+ public String version() { return version; }
+
+ @JsonProperty("id")
+ public String id() { return id; }
+
+ @JsonProperty("description")
+ public String description() { return description; }
+
+ @JsonProperty("languages")
+ public HashMap<String, DocumentFrequencyFile> languages() { return languages; }
+
+ public void addLanguage(String language, DocumentFrequencyFile documentFrequencyFile) {
+ languages.put(language, documentFrequencyFile);
+ }
+}