summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMariusArhaug <mariusarhaug@hotmail.com>2024-04-09 10:56:47 +0200
committerMariusArhaug <mariusarhaug@hotmail.com>2024-04-09 14:48:10 +0200
commitc3e1f6aac29beba81ee877277d3bf4bb64ab4574 (patch)
tree08e5ba3b07c06c45998606cfc62de15f6ec681e6
parent501f69bef60ebe61beb52ef369c158c38b976c8b (diff)
fix cr failures
-rw-r--r--linguistics/abi-spec.json44
-rw-r--r--linguistics/pom.xml12
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/DocumentFrequency.java4
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java3
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java2
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java (renamed from linguistics/src/main/java/com/yahoo/language/significance/DefaultSignificanceModel.java)21
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java (renamed from linguistics/src/main/java/com/yahoo/language/significance/DefaultSignificanceModelRegistry.java)36
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/package-info.java7
-rw-r--r--linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java9
-rw-r--r--linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java6
-rw-r--r--linguistics/src/test/models/en.json6
-rw-r--r--linguistics/src/test/models/no.json6
12 files changed, 104 insertions, 52 deletions
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json
index 0bd4638bb05..9f91c32cf62 100644
--- a/linguistics/abi-spec.json
+++ b/linguistics/abi-spec.json
@@ -774,5 +774,49 @@
"public abstract java.lang.String accentDrop(java.lang.String, com.yahoo.language.Language)"
],
"fields" : [ ]
+ },
+ "com.yahoo.language.significance.DocumentFrequency" : {
+ "superClass" : "java.lang.Record",
+ "interfaces" : [ ],
+ "attributes" : [
+ "public",
+ "final",
+ "record"
+ ],
+ "methods" : [
+ "public void <init>(long, long)",
+ "public final java.lang.String toString()",
+ "public final int hashCode()",
+ "public final boolean equals(java.lang.Object)",
+ "public long frequency()",
+ "public long corpusSize()"
+ ],
+ "fields" : [ ]
+ },
+ "com.yahoo.language.significance.SignificanceModel" : {
+ "superClass" : "java.lang.Object",
+ "interfaces" : [ ],
+ "attributes" : [
+ "public",
+ "interface",
+ "abstract"
+ ],
+ "methods" : [
+ "public abstract com.yahoo.language.significance.DocumentFrequency documentFrequency(java.lang.String)"
+ ],
+ "fields" : [ ]
+ },
+ "com.yahoo.language.significance.SignificanceModelRegistry" : {
+ "superClass" : "java.lang.Object",
+ "interfaces" : [ ],
+ "attributes" : [
+ "public",
+ "interface",
+ "abstract"
+ ],
+ "methods" : [
+ "public abstract com.yahoo.language.significance.SignificanceModel getModel(com.yahoo.language.Language)"
+ ],
+ "fields" : [ ]
}
} \ No newline at end of file
diff --git a/linguistics/pom.xml b/linguistics/pom.xml
index a358141af21..d07ff5d9fdb 100644
--- a/linguistics/pom.xml
+++ b/linguistics/pom.xml
@@ -68,10 +68,14 @@
<scope>provided</scope>
</dependency>
<dependency>
- <groupId>com.yahoo.vespa</groupId>
- <artifactId>flags</artifactId>
- <version>8-SNAPSHOT</version>
- <scope>compile</scope>
+ <groupId>org.junit.vintage</groupId>
+ <artifactId>junit-vintage-engine</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter</artifactId>
+ <scope>test</scope>
</dependency>
</dependencies>
<build>
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/DocumentFrequency.java b/linguistics/src/main/java/com/yahoo/language/significance/DocumentFrequency.java
index a94beacfd64..ecfcd61eb71 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/DocumentFrequency.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/DocumentFrequency.java
@@ -7,8 +7,4 @@ package com.yahoo.language.significance;
*/
public record DocumentFrequency(long frequency, long corpusSize) {
- public DocumentFrequency(long frequency, long corpusSize) {
- this.frequency = frequency;
- this.corpusSize = corpusSize;
- }
}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java
index 415eccac93b..a9f1e48af62 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java
@@ -1,9 +1,12 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.significance;
+import com.yahoo.api.annotations.Beta;
+
/**
* @author MariusArhaug
*/
+@Beta
public interface SignificanceModel {
DocumentFrequency documentFrequency(String word);
}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java
index d7f0aac9949..6d8dcc00e0a 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java
@@ -1,11 +1,13 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.significance;
+import com.yahoo.api.annotations.Beta;
import com.yahoo.language.Language;
/**
* @author MariusArhaug
*/
+@Beta
public interface SignificanceModelRegistry {
SignificanceModel getModel(Language language);
}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/DefaultSignificanceModel.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
index 5cc82264b2b..7ed6f442610 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/DefaultSignificanceModel.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
@@ -1,10 +1,12 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.language.significance;
+package com.yahoo.language.significance.impl;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.yahoo.language.significance.DocumentFrequency;
+import com.yahoo.language.significance.SignificanceModel;
import java.nio.file.Path;
import java.util.HashMap;
@@ -25,25 +27,26 @@ public class DefaultSignificanceModel implements SignificanceModel {
private final String description;
private final long corpusSize;
private final String language;
+
+ private final long wordCount;
private final HashMap<String, Long> frequencies;
- private final long tokenCount;
@JsonCreator
public SignificanceModelFile(
@JsonProperty("version") String version,
@JsonProperty("id") String id,
@JsonProperty("description") String description,
- @JsonProperty("corpus_size") long corpusSize,
+ @JsonProperty("corpus-size") long corpusSize,
@JsonProperty("language") String language,
- @JsonProperty("frequencies") HashMap<String, Long> frequencies,
- @JsonProperty("token_count") long tokenCount) {
+ @JsonProperty("word-count") long wordCount,
+ @JsonProperty("frequencies") HashMap<String, Long> frequencies) {
this.version = version;
this.id = id;
this.description = description;
this.corpusSize = corpusSize;
this.language = language;
+ this.wordCount = wordCount;
this.frequencies = frequencies;
- this.tokenCount = tokenCount;
}
@JsonProperty("version")
@@ -55,7 +58,7 @@ public class DefaultSignificanceModel implements SignificanceModel {
@JsonProperty("description")
public String description() { return description; }
- @JsonProperty("corpus_size")
+ @JsonProperty("corpus-size")
public long corpusSize() { return corpusSize; }
@JsonProperty("language")
@@ -64,8 +67,8 @@ public class DefaultSignificanceModel implements SignificanceModel {
@JsonProperty("frequencies")
public HashMap<String, Long> frequencies() { return frequencies; }
- @JsonProperty("token_count")
- public long tokenCount() { return tokenCount; }
+ @JsonProperty("word-count")
+ public long wordCount() { return wordCount; }
}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/DefaultSignificanceModelRegistry.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
index 59a50e2c36a..56bab17c958 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/DefaultSignificanceModelRegistry.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
@@ -1,8 +1,10 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.language.significance;
+package com.yahoo.language.significance.impl;
import com.yahoo.component.annotation.Inject;
import com.yahoo.language.Language;
+import com.yahoo.language.significance.SignificanceModel;
+import com.yahoo.language.significance.SignificanceModelRegistry;
import com.yahoo.search.significance.config.SignificanceConfig;
import java.nio.file.Path;
@@ -24,25 +26,18 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist
@Inject
public DefaultSignificanceModelRegistry(SignificanceConfig cfg) { this(new Builder(cfg)); }
private DefaultSignificanceModelRegistry(Builder b) {
-
- this.models = withContextClassloader(() -> {
- var models = new EnumMap<Language, SignificanceModel>(Language.class);
- b.models.forEach((language, path) -> {
- models.put(language,
- uncheck(() -> new DefaultSignificanceModel(path)));
- });
- return models;
+ this.models = new EnumMap<>(Language.class);
+ b.models.forEach((language, path) -> {
+ models.put(language,
+ uncheck(() -> new DefaultSignificanceModel(path)));
});
}
public DefaultSignificanceModelRegistry(HashMap<Language, Path> map) {
- this.models = withContextClassloader(() -> {
- var models = new EnumMap<Language, SignificanceModel>(Language.class);
- map.forEach((language, path) -> {
- models.put(language,
- uncheck(() -> new DefaultSignificanceModel(path)));
- });
- return models;
+ this.models = new EnumMap<>(Language.class);
+ map.forEach((language, path) -> {
+ models.put(language,
+ uncheck(() -> new DefaultSignificanceModel(path)));
});
}
@@ -52,15 +47,6 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist
return models.get(language);
}
- private static <R> R withContextClassloader(Supplier<R> r) {
- var original = Thread.currentThread().getContextClassLoader();
- Thread.currentThread().setContextClassLoader(SignificanceModel.class.getClassLoader());
- try {
- return r.get();
- } finally {
- Thread.currentThread().setContextClassLoader(original);
- }
- }
public static final class Builder {
private final Map<Language, Path> models = new EnumMap<>(Language.class);
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/package-info.java b/linguistics/src/main/java/com/yahoo/language/significance/package-info.java
new file mode 100644
index 00000000000..5c2f773452f
--- /dev/null
+++ b/linguistics/src/main/java/com/yahoo/language/significance/package-info.java
@@ -0,0 +1,7 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+@PublicApi
+package com.yahoo.language.significance;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java
index 698d507c7e8..0ff2eacfd37 100644
--- a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java
+++ b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java
@@ -2,13 +2,15 @@
package com.yahoo.language.significance;
import com.yahoo.language.Language;
+import com.yahoo.language.significance.impl.DefaultSignificanceModelRegistry;
import org.junit.Test;
import java.nio.file.Path;
import java.util.HashMap;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
/**
* @author MariusArhaug
@@ -36,5 +38,8 @@ public class DefaultSignificanceModelRegistryTest {
assertEquals(3, norwegianModel.documentFrequency("nei").frequency());
assertEquals(20, norwegianModel.documentFrequency("nei").corpusSize());
+ assertEquals(1, norwegianModel.documentFrequency("non-existent-word").frequency());
+ assertEquals(20, norwegianModel.documentFrequency("non-existent-word").corpusSize());
+
}
}
diff --git a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java
index 38ca24855f8..137f8d4513a 100644
--- a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java
+++ b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java
@@ -1,11 +1,13 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.significance;
-import org.junit.Test;
+import com.yahoo.language.significance.impl.DefaultSignificanceModel;
+import org.junit.jupiter.api.Test;
import java.nio.file.Path;
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
/**
* @author MariusArhaug
diff --git a/linguistics/src/test/models/en.json b/linguistics/src/test/models/en.json
index 3fb1f58e8df..50bae5e3451 100644
--- a/linguistics/src/test/models/en.json
+++ b/linguistics/src/test/models/en.json
@@ -2,13 +2,13 @@
"version" : "1.0",
"id" : "test::1",
"description" : "desc",
- "corpus_size" : 10,
+ "corpus-size" : 10,
"language" : "en",
+ "word-count" : 4,
"frequencies" : {
"usa" : 2,
"hello": 3,
"world": 5,
"test": 2
- },
- "token_count" : 4
+ }
}
diff --git a/linguistics/src/test/models/no.json b/linguistics/src/test/models/no.json
index 6e5cd6cd7d5..5fca8929e74 100644
--- a/linguistics/src/test/models/no.json
+++ b/linguistics/src/test/models/no.json
@@ -2,8 +2,9 @@
"version" : "1.0",
"id" : "test::2",
"description" : "norsk beskrivelse",
- "corpus_size" : 20,
+ "corpus-size" : 20,
"language" : "nb",
+ "word-count" : 7,
"frequencies" : {
"usa" : 2,
"hello": 10,
@@ -12,6 +13,5 @@
"norge": 11,
"ja": 12,
"nei": 3
- },
- "token_count" : 4
+ }
}