diff options
author | MariusArhaug <mariusarhaug@hotmail.com> | 2024-04-09 10:56:47 +0200 |
---|---|---|
committer | MariusArhaug <mariusarhaug@hotmail.com> | 2024-04-09 14:48:10 +0200 |
commit | c3e1f6aac29beba81ee877277d3bf4bb64ab4574 (patch) | |
tree | 08e5ba3b07c06c45998606cfc62de15f6ec681e6 | |
parent | 501f69bef60ebe61beb52ef369c158c38b976c8b (diff) |
fix cr failures
12 files changed, 104 insertions, 52 deletions
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json index 0bd4638bb05..9f91c32cf62 100644 --- a/linguistics/abi-spec.json +++ b/linguistics/abi-spec.json @@ -774,5 +774,49 @@ "public abstract java.lang.String accentDrop(java.lang.String, com.yahoo.language.Language)" ], "fields" : [ ] + }, + "com.yahoo.language.significance.DocumentFrequency" : { + "superClass" : "java.lang.Record", + "interfaces" : [ ], + "attributes" : [ + "public", + "final", + "record" + ], + "methods" : [ + "public void <init>(long, long)", + "public final java.lang.String toString()", + "public final int hashCode()", + "public final boolean equals(java.lang.Object)", + "public long frequency()", + "public long corpusSize()" + ], + "fields" : [ ] + }, + "com.yahoo.language.significance.SignificanceModel" : { + "superClass" : "java.lang.Object", + "interfaces" : [ ], + "attributes" : [ + "public", + "interface", + "abstract" + ], + "methods" : [ + "public abstract com.yahoo.language.significance.DocumentFrequency documentFrequency(java.lang.String)" + ], + "fields" : [ ] + }, + "com.yahoo.language.significance.SignificanceModelRegistry" : { + "superClass" : "java.lang.Object", + "interfaces" : [ ], + "attributes" : [ + "public", + "interface", + "abstract" + ], + "methods" : [ + "public abstract com.yahoo.language.significance.SignificanceModel getModel(com.yahoo.language.Language)" + ], + "fields" : [ ] } }
\ No newline at end of file diff --git a/linguistics/pom.xml b/linguistics/pom.xml index a358141af21..d07ff5d9fdb 100644 --- a/linguistics/pom.xml +++ b/linguistics/pom.xml @@ -68,10 +68,14 @@ <scope>provided</scope> </dependency> <dependency> - <groupId>com.yahoo.vespa</groupId> - <artifactId>flags</artifactId> - <version>8-SNAPSHOT</version> - <scope>compile</scope> + <groupId>org.junit.vintage</groupId> + <artifactId>junit-vintage-engine</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter</artifactId> + <scope>test</scope> </dependency> </dependencies> <build> diff --git a/linguistics/src/main/java/com/yahoo/language/significance/DocumentFrequency.java b/linguistics/src/main/java/com/yahoo/language/significance/DocumentFrequency.java index a94beacfd64..ecfcd61eb71 100644 --- a/linguistics/src/main/java/com/yahoo/language/significance/DocumentFrequency.java +++ b/linguistics/src/main/java/com/yahoo/language/significance/DocumentFrequency.java @@ -7,8 +7,4 @@ package com.yahoo.language.significance; */ public record DocumentFrequency(long frequency, long corpusSize) { - public DocumentFrequency(long frequency, long corpusSize) { - this.frequency = frequency; - this.corpusSize = corpusSize; - } } diff --git a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java index 415eccac93b..a9f1e48af62 100644 --- a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java +++ b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java @@ -1,9 +1,12 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.significance; +import com.yahoo.api.annotations.Beta; + /** * @author MariusArhaug */ +@Beta public interface SignificanceModel { DocumentFrequency documentFrequency(String word); } diff --git a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java index d7f0aac9949..6d8dcc00e0a 100644 --- a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java +++ b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java @@ -1,11 +1,13 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.significance; +import com.yahoo.api.annotations.Beta; import com.yahoo.language.Language; /** * @author MariusArhaug */ +@Beta public interface SignificanceModelRegistry { SignificanceModel getModel(Language language); } diff --git a/linguistics/src/main/java/com/yahoo/language/significance/DefaultSignificanceModel.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java index 5cc82264b2b..7ed6f442610 100644 --- a/linguistics/src/main/java/com/yahoo/language/significance/DefaultSignificanceModel.java +++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java @@ -1,10 +1,12 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.language.significance; +package com.yahoo.language.significance.impl; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; +import com.yahoo.language.significance.DocumentFrequency; +import com.yahoo.language.significance.SignificanceModel; import java.nio.file.Path; import java.util.HashMap; @@ -25,25 +27,26 @@ public class DefaultSignificanceModel implements SignificanceModel { private final String description; private final long corpusSize; private final String language; + + private final long wordCount; private final HashMap<String, Long> frequencies; - private final long tokenCount; @JsonCreator public SignificanceModelFile( @JsonProperty("version") String version, @JsonProperty("id") String id, @JsonProperty("description") String description, - @JsonProperty("corpus_size") long corpusSize, + @JsonProperty("corpus-size") long corpusSize, @JsonProperty("language") String language, - @JsonProperty("frequencies") HashMap<String, Long> frequencies, - @JsonProperty("token_count") long tokenCount) { + @JsonProperty("word-count") long wordCount, + @JsonProperty("frequencies") HashMap<String, Long> frequencies) { this.version = version; this.id = id; this.description = description; this.corpusSize = corpusSize; this.language = language; + this.wordCount = wordCount; this.frequencies = frequencies; - this.tokenCount = tokenCount; } @JsonProperty("version") @@ -55,7 +58,7 @@ public class DefaultSignificanceModel implements SignificanceModel { @JsonProperty("description") public String description() { return description; } - @JsonProperty("corpus_size") + @JsonProperty("corpus-size") public long corpusSize() { return corpusSize; } @JsonProperty("language") @@ -64,8 +67,8 @@ public class DefaultSignificanceModel implements SignificanceModel { @JsonProperty("frequencies") public HashMap<String, Long> frequencies() { return frequencies; } - @JsonProperty("token_count") - public long tokenCount() { return tokenCount; } + @JsonProperty("word-count") + public long wordCount() { return wordCount; } } diff --git a/linguistics/src/main/java/com/yahoo/language/significance/DefaultSignificanceModelRegistry.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java index 59a50e2c36a..56bab17c958 100644 --- a/linguistics/src/main/java/com/yahoo/language/significance/DefaultSignificanceModelRegistry.java +++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java @@ -1,8 +1,10 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.language.significance; +package com.yahoo.language.significance.impl; import com.yahoo.component.annotation.Inject; import com.yahoo.language.Language; +import com.yahoo.language.significance.SignificanceModel; +import com.yahoo.language.significance.SignificanceModelRegistry; import com.yahoo.search.significance.config.SignificanceConfig; import java.nio.file.Path; @@ -24,25 +26,18 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist @Inject public DefaultSignificanceModelRegistry(SignificanceConfig cfg) { this(new Builder(cfg)); } private DefaultSignificanceModelRegistry(Builder b) { - - this.models = withContextClassloader(() -> { - var models = new EnumMap<Language, SignificanceModel>(Language.class); - b.models.forEach((language, path) -> { - models.put(language, - uncheck(() -> new DefaultSignificanceModel(path))); - }); - return models; + this.models = new EnumMap<>(Language.class); + b.models.forEach((language, path) -> { + models.put(language, + uncheck(() -> new DefaultSignificanceModel(path))); }); } public DefaultSignificanceModelRegistry(HashMap<Language, Path> map) { - this.models = withContextClassloader(() -> { - var models = new EnumMap<Language, SignificanceModel>(Language.class); - map.forEach((language, path) -> { - models.put(language, - uncheck(() -> new DefaultSignificanceModel(path))); - }); - return models; + this.models = new EnumMap<>(Language.class); + map.forEach((language, path) -> { + models.put(language, + uncheck(() -> new DefaultSignificanceModel(path))); }); } @@ -52,15 +47,6 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist return models.get(language); } - private static <R> R withContextClassloader(Supplier<R> r) { - var original = Thread.currentThread().getContextClassLoader(); - Thread.currentThread().setContextClassLoader(SignificanceModel.class.getClassLoader()); - try { - return r.get(); - } finally { - Thread.currentThread().setContextClassLoader(original); - } - } public static final class Builder { private final Map<Language, Path> models = new EnumMap<>(Language.class); diff --git a/linguistics/src/main/java/com/yahoo/language/significance/package-info.java b/linguistics/src/main/java/com/yahoo/language/significance/package-info.java new file mode 100644 index 00000000000..5c2f773452f --- /dev/null +++ b/linguistics/src/main/java/com/yahoo/language/significance/package-info.java @@ -0,0 +1,7 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.language.significance; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java index 698d507c7e8..0ff2eacfd37 100644 --- a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java +++ b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java @@ -2,13 +2,15 @@ package com.yahoo.language.significance; import com.yahoo.language.Language; +import com.yahoo.language.significance.impl.DefaultSignificanceModelRegistry; import org.junit.Test; import java.nio.file.Path; import java.util.HashMap; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + /** * @author MariusArhaug @@ -36,5 +38,8 @@ public class DefaultSignificanceModelRegistryTest { assertEquals(3, norwegianModel.documentFrequency("nei").frequency()); assertEquals(20, norwegianModel.documentFrequency("nei").corpusSize()); + assertEquals(1, norwegianModel.documentFrequency("non-existent-word").frequency()); + assertEquals(20, norwegianModel.documentFrequency("non-existent-word").corpusSize()); + } } diff --git a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java index 38ca24855f8..137f8d4513a 100644 --- a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java +++ b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java @@ -1,11 +1,13 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.significance; -import org.junit.Test; +import com.yahoo.language.significance.impl.DefaultSignificanceModel; +import org.junit.jupiter.api.Test; import java.nio.file.Path; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + /** * @author MariusArhaug diff --git a/linguistics/src/test/models/en.json b/linguistics/src/test/models/en.json index 3fb1f58e8df..50bae5e3451 100644 --- a/linguistics/src/test/models/en.json +++ b/linguistics/src/test/models/en.json @@ -2,13 +2,13 @@ "version" : "1.0", "id" : "test::1", "description" : "desc", - "corpus_size" : 10, + "corpus-size" : 10, "language" : "en", + "word-count" : 4, "frequencies" : { "usa" : 2, "hello": 3, "world": 5, "test": 2 - }, - "token_count" : 4 + } } diff --git a/linguistics/src/test/models/no.json b/linguistics/src/test/models/no.json index 6e5cd6cd7d5..5fca8929e74 100644 --- a/linguistics/src/test/models/no.json +++ b/linguistics/src/test/models/no.json @@ -2,8 +2,9 @@ "version" : "1.0", "id" : "test::2", "description" : "norsk beskrivelse", - "corpus_size" : 20, + "corpus-size" : 20, "language" : "nb", + "word-count" : 7, "frequencies" : { "usa" : 2, "hello": 10, @@ -12,6 +13,5 @@ "norge": 11, "ja": 12, "nei": 3 - }, - "token_count" : 4 + } } |