aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMariusArhaug <mariusarhaug@hotmail.com>2024-04-24 11:29:23 +0200
committerMariusArhaug <mariusarhaug@hotmail.com>2024-04-24 15:33:21 +0200
commitaaae1863d50586fb86685d3cbb6c73f589d3370c (patch)
treec0b0f9d8c4126da56aacadea264d66d2110b4e5c
parentff1d604e77b1943a72fc6b585b09db82a5ee791d (diff)
Update significance model field and logic from architect meeting
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/SignificanceModelRegistry.java16
-rw-r--r--config-model/src/main/resources/schema/containercluster.rnc2
-rw-r--r--config-model/src/test/cfg/significance/services.xml6
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/significance/test/SignificanceModelTestCase.java3
-rw-r--r--config-model/src/test/schema-test-files/services.xml2
-rw-r--r--configdefinitions/src/vespa/significance.def2
-rw-r--r--linguistics/abi-spec.json3
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java2
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java76
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java67
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java51
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/SignificanceModelFile.java43
-rw-r--r--linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java43
-rw-r--r--linguistics/src/test/models/docv1.json19
-rw-r--r--linguistics/src/test/models/docv2.json35
-rw-r--r--linguistics/src/test/models/en.json7
-rw-r--r--linguistics/src/test/models/no.json17
17 files changed, 256 insertions, 138 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/SignificanceModelRegistry.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/SignificanceModelRegistry.java
index c210c2621a6..ce9dc2bce0f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/SignificanceModelRegistry.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/SignificanceModelRegistry.java
@@ -32,18 +32,15 @@ public class SignificanceModelRegistry extends SimpleComponent implements Signif
super(new ComponentModel(BundleInstantiationSpecification.fromStrings(CLASS, CLASS, BUNDLE)));
if (spec != null) {
- for (Element modelElement : XML.getChildren(spec, "model")) {
- addConfig(
- modelElement.getAttribute("language"),
- Model.fromXml(deployState, modelElement, Set.of(SIGNIFICANCE_MODEL)).modelReference());
- }
+ for (Element modelElement : XML.getChildren(spec, "model")) {
+ addConfig(Model.fromXml(deployState, modelElement, Set.of(SIGNIFICANCE_MODEL)).modelReference());
}
}
- public void addConfig(String language, ModelReference path) {
+ public void addConfig(ModelReference path) {
configList.add(
- new SignificanceModelConfig(language, path)
+ new SignificanceModelConfig(path)
);
}
@@ -53,7 +50,6 @@ public class SignificanceModelRegistry extends SimpleComponent implements Signif
builder.model(
configList.stream()
.map(config -> new SignificanceConfig.Model.Builder()
- .language(config.language)
.path(config.path)
).toList()
);
@@ -61,11 +57,9 @@ public class SignificanceModelRegistry extends SimpleComponent implements Signif
class SignificanceModelConfig {
- private final String language;
private final ModelReference path;
- public SignificanceModelConfig(String language, ModelReference path) {
- this.language = language;
+ public SignificanceModelConfig(ModelReference path) {
this.path = path;
}
diff --git a/config-model/src/main/resources/schema/containercluster.rnc b/config-model/src/main/resources/schema/containercluster.rnc
index 08092f10020..c79a7b38d09 100644
--- a/config-model/src/main/resources/schema/containercluster.rnc
+++ b/config-model/src/main/resources/schema/containercluster.rnc
@@ -138,7 +138,7 @@ Threadpool = element threadpool {
}
Significance = element significance {
- element model { attribute language { xsd:string } & ModelReference }*
+ element model { ModelReference }*
}
Clients = element clients {
diff --git a/config-model/src/test/cfg/significance/services.xml b/config-model/src/test/cfg/significance/services.xml
index 6991f5498fb..ffdb73bfc2e 100644
--- a/config-model/src/test/cfg/significance/services.xml
+++ b/config-model/src/test/cfg/significance/services.xml
@@ -8,9 +8,9 @@
<container version="1.0">
<search>
<significance>
- <model language="en" model-id="idf-wiki-english" path="models/idf-english-wiki.json.zst"/>
- <model language="no" path="models/idf-norwegian-wiki.json.zst" />
- <model language="ru" url="https://some/uri/blob.json" />
+ <model model-id="idf-wiki-english" path="models/idf-english-wiki.json.zst"/>
+ <model path="models/idf-norwegian-wiki.json.zst" />
+ <model url="https://some/uri/blob.json" />
</significance>
</search>
</container>
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/significance/test/SignificanceModelTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/significance/test/SignificanceModelTestCase.java
index 00e95a34287..26e8c67a226 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/significance/test/SignificanceModelTestCase.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/significance/test/SignificanceModelTestCase.java
@@ -37,9 +37,6 @@ public class SignificanceModelTestCase {
ApplicationContainerCluster containerCluster = vespaModel.getContainerClusters().get("container");
var significanceConfig = assertSignificancePresent(containerCluster);
assertEquals(3, significanceConfig.model().size());
- assertEquals("en", significanceConfig.model().get(0).language());
- assertEquals("no", significanceConfig.model().get(1).language());
- assertEquals("ru", significanceConfig.model().get(2).language());
assertEquals("models/idf-norwegian-wiki.json.zst", modelReference(significanceConfig.model().get(1), "path").path().orElseThrow().value());
assertEquals("https://some/uri/blob.json", modelReference(significanceConfig.model().get(2), "path").url().orElseThrow().value());
diff --git a/config-model/src/test/schema-test-files/services.xml b/config-model/src/test/schema-test-files/services.xml
index 7333ef5a87b..a413ec7753b 100644
--- a/config-model/src/test/schema-test-files/services.xml
+++ b/config-model/src/test/schema-test-files/services.xml
@@ -168,7 +168,7 @@
</threadpool>
<significance>
- <model language="en" model-id="idf-wiki-simple-english" path="models/idf-simple-english-wiki.json.zst" />
+ <model model-id="idf-wiki-simple-english" path="models/idf-simple-english-wiki.json.zst" />
</significance>
</search>
diff --git a/configdefinitions/src/vespa/significance.def b/configdefinitions/src/vespa/significance.def
index e0cc5b4c611..8d40381a0c9 100644
--- a/configdefinitions/src/vespa/significance.def
+++ b/configdefinitions/src/vespa/significance.def
@@ -1,6 +1,4 @@
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
namespace=search.significance.config
-model[].language string
model[].path model
-
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json
index 1ca32a2dd37..ceab5025760 100644
--- a/linguistics/abi-spec.json
+++ b/linguistics/abi-spec.json
@@ -803,7 +803,8 @@
"abstract"
],
"methods" : [
- "public abstract com.yahoo.language.significance.DocumentFrequency documentFrequency(java.lang.String)"
+ "public abstract com.yahoo.language.significance.DocumentFrequency documentFrequency(java.lang.String)",
+ "public abstract java.lang.String getId()"
],
"fields" : [ ]
},
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java
index a9f1e48af62..c8a31e1892c 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModel.java
@@ -9,4 +9,6 @@ import com.yahoo.api.annotations.Beta;
@Beta
public interface SignificanceModel {
DocumentFrequency documentFrequency(String word);
+
+ String getId();
}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
index 7ed6f442610..3244b8373ad 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
@@ -1,13 +1,11 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.significance.impl;
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
-import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.yahoo.language.significance.DocumentFrequency;
import com.yahoo.language.significance.SignificanceModel;
+import java.io.IOException;
import java.nio.file.Path;
import java.util.HashMap;
@@ -18,70 +16,22 @@ import java.util.HashMap;
public class DefaultSignificanceModel implements SignificanceModel {
private final long corpusSize;
private final HashMap<String, Long> frequencies;
- private final Path path;
- @JsonIgnoreProperties(ignoreUnknown = true)
- public static class SignificanceModelFile {
- private final String version;
- private final String id;
- private final String description;
- private final long corpusSize;
- private final String language;
-
- private final long wordCount;
- private final HashMap<String, Long> frequencies;
-
- @JsonCreator
- public SignificanceModelFile(
- @JsonProperty("version") String version,
- @JsonProperty("id") String id,
- @JsonProperty("description") String description,
- @JsonProperty("corpus-size") long corpusSize,
- @JsonProperty("language") String language,
- @JsonProperty("word-count") long wordCount,
- @JsonProperty("frequencies") HashMap<String, Long> frequencies) {
- this.version = version;
- this.id = id;
- this.description = description;
- this.corpusSize = corpusSize;
- this.language = language;
- this.wordCount = wordCount;
- this.frequencies = frequencies;
- }
-
- @JsonProperty("version")
- public String version() { return version; }
-
- @JsonProperty("id")
- public String id() { return id; }
-
- @JsonProperty("description")
- public String description() { return description; }
-
- @JsonProperty("corpus-size")
- public long corpusSize() { return corpusSize; }
-
- @JsonProperty("language")
- public String language() { return language; }
-
- @JsonProperty("frequencies")
- public HashMap<String, Long> frequencies() { return frequencies; }
-
- @JsonProperty("word-count")
- public long wordCount() { return wordCount; }
+ private String id;
+ public DefaultSignificanceModel(DocumentFrequencyFile file, String id) {
+ this.frequencies = file.frequencies();
+ this.corpusSize = file.documentCount();
+ this.id = id;
}
public DefaultSignificanceModel(Path path) {
- this.path = path;
-
ObjectMapper objectMapper = new ObjectMapper();
-
try {
- SignificanceModelFile model = objectMapper.readValue(this.path.toFile(), SignificanceModelFile.class);
- this.corpusSize = model.corpusSize;
- this.frequencies = model.frequencies;
- } catch (Exception e) {
+ var file = objectMapper.readValue(path.toFile(), DocumentFrequencyFile.class);
+ this.frequencies = file.frequencies();
+ this.corpusSize = file.documentCount();
+ } catch (IOException e) {
throw new RuntimeException("Failed to load model from " + path, e);
}
}
@@ -93,4 +43,10 @@ public class DefaultSignificanceModel implements SignificanceModel {
}
return new DocumentFrequency(1, corpusSize);
}
+
+ @Override
+ public String getId() {
+ return this.id;
+ }
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
index 1be1d3f13b5..7bf05c71342 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java
@@ -1,20 +1,20 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.significance.impl;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationConfig;
import com.yahoo.component.annotation.Inject;
import com.yahoo.language.Language;
import com.yahoo.language.significance.SignificanceModel;
import com.yahoo.language.significance.SignificanceModelRegistry;
import com.yahoo.search.significance.config.SignificanceConfig;
+import java.io.IOException;
import java.nio.file.Path;
import java.util.EnumMap;
-import java.util.HashMap;
+import java.util.List;
import java.util.Map;
-import java.util.Optional;
-import java.util.function.Supplier;
-import static com.yahoo.yolean.Exceptions.uncheck;
/**
* Default implementation of {@link SignificanceModelRegistry}.
* This implementation loads models lazily and caches them.
@@ -24,24 +24,45 @@ import static com.yahoo.yolean.Exceptions.uncheck;
public class DefaultSignificanceModelRegistry implements SignificanceModelRegistry {
private final Map<Language, SignificanceModel> models;
+
@Inject
- public DefaultSignificanceModelRegistry(SignificanceConfig cfg) { this(new Builder(cfg)); }
- private DefaultSignificanceModelRegistry(Builder b) {
+ public DefaultSignificanceModelRegistry(SignificanceConfig cfg) {
this.models = new EnumMap<>(Language.class);
- b.models.forEach((language, path) -> {
- models.put(language,
- uncheck(() -> new DefaultSignificanceModel(path)));
- });
+ ObjectMapper objectMapper = new ObjectMapper();
+ for (var model : cfg.model()) {
+ try {
+ SignificanceModelFile file = objectMapper.readValue(model.path().toFile(), SignificanceModelFile.class);
+
+ for (var pair : file.languages().entrySet()) {
+ addModel(
+ Language.fromLanguageTag(pair.getKey()), new DefaultSignificanceModel(pair.getValue(), file.id()));
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to load model from " + model.path(), e);
+ }
+ }
}
- public DefaultSignificanceModelRegistry(HashMap<Language, Path> map) {
+ public DefaultSignificanceModelRegistry(List<Path> models) {
this.models = new EnumMap<>(Language.class);
- map.forEach((language, path) -> {
- models.put(language,
- uncheck(() -> new DefaultSignificanceModel(path)));
- });
+
+ ObjectMapper objectMapper = new ObjectMapper();
+ for (var model : models) {
+ try {
+ SignificanceModelFile file = objectMapper.readValue(model.toFile(), SignificanceModelFile.class);
+ for (var pair : file.languages().entrySet()) {
+ addModel(
+ Language.fromLanguageTag(pair.getKey()), new DefaultSignificanceModel(pair.getValue(), file.id()));
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to load model from " + model, e);
+ }
+ }
}
+ public void addModel(Language lang, SignificanceModel model) {
+ this.models.put(lang, model);
+ }
@Override
public Optional<SignificanceModel> getModel(Language language) {
@@ -51,20 +72,4 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist
}
return Optional.of(models.get(language));
}
-
-
- public static final class Builder {
- private final Map<Language, Path> models = new EnumMap<>(Language.class);
-
- public Builder() {}
- public Builder(SignificanceConfig cfg) {
- for (var model : cfg.model()) {
- addModel(Language.fromLanguageTag(model.language()), model.path());
- }
- }
-
- public Builder addModel(Language lang, Path path) { models.put(lang, path); return this; }
- public DefaultSignificanceModelRegistry build() { return new DefaultSignificanceModelRegistry(this); }
- }
-
}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java
new file mode 100644
index 00000000000..c2ad1de8fd8
--- /dev/null
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java
@@ -0,0 +1,51 @@
+package com.yahoo.language.significance.impl;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.HashMap;
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+@JsonInclude(JsonInclude.Include.NON_NULL)
+public class DocumentFrequencyFile {
+ private final String description;
+
+ private final String language;
+
+ private final int documentCount;
+
+ private final int wordCount;
+
+ private final HashMap<String, Long> frequencies;
+
+ @JsonCreator
+ public DocumentFrequencyFile(
+ @JsonProperty("description") String description,
+ @JsonProperty("document-count") int documentCount,
+ @JsonProperty("word-count") int wordCount,
+ @JsonProperty("language") String language,
+ @JsonProperty("document-frequencies") HashMap<String, Long> frequencies) {
+ this.description = description;
+ this.documentCount = documentCount;
+ this.wordCount = wordCount;
+ this.language = language;
+ this.frequencies = frequencies;
+ }
+
+ @JsonProperty("description")
+ public String description() { return description; }
+
+ @JsonProperty("document-count")
+ public int documentCount() { return documentCount; }
+
+ @JsonProperty("language")
+ public String language() { return language; }
+
+ @JsonProperty("word-count")
+ public int wordCount() { return wordCount; }
+
+ @JsonProperty("document-frequencies")
+ public HashMap<String, Long> frequencies() { return frequencies; }
+}
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/SignificanceModelFile.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/SignificanceModelFile.java
new file mode 100644
index 00000000000..cb923fe8861
--- /dev/null
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/SignificanceModelFile.java
@@ -0,0 +1,43 @@
+package com.yahoo.language.significance.impl;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.HashMap;
+import java.util.List;
+
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class SignificanceModelFile {
+ private final String version;
+ private final String id;
+ private final String description;
+
+ private final HashMap<String, DocumentFrequencyFile> languages;
+
+ @JsonCreator
+ public SignificanceModelFile(
+ @JsonProperty("version") String version,
+ @JsonProperty("id") String id,
+ @JsonProperty("description") String description,
+ @JsonProperty("languages") HashMap<String, DocumentFrequencyFile> languages) {
+ this.version = version;
+ this.id = id;
+ this.description = description;
+ this.languages = languages;
+ }
+
+ @JsonProperty("version")
+ public String version() { return version; }
+
+ @JsonProperty("id")
+ public String id() { return id; }
+
+ @JsonProperty("description")
+ public String description() { return description; }
+
+ @JsonProperty("languages")
+ public HashMap<String, DocumentFrequencyFile> languages() { return languages; }
+}
diff --git a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java
index d4849571b5e..833caca3b24 100644
--- a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java
+++ b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java
@@ -6,7 +6,8 @@ import com.yahoo.language.significance.impl.DefaultSignificanceModelRegistry;
import org.junit.Test;
import java.nio.file.Path;
-import java.util.HashMap;
+import java.util.ArrayList;
+import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
@@ -18,10 +19,10 @@ public class DefaultSignificanceModelRegistryTest {
@Test
public void testDefaultSignificanceModelRegistry() {
- HashMap<Language, Path> models = new HashMap<>();
+ List<Path> models = new ArrayList<>();
- models.put(Language.ENGLISH, Path.of("src/test/models/en.json"));
- models.put(Language.NORWEGIAN_BOKMAL, Path.of("src/test/models/no.json"));
+ models.add(Path.of("src/test/models/docv1.json"));
+ models.add(Path.of("src/test/models/docv2.json"));
DefaultSignificanceModelRegistry defaultSignificanceModelRegistry = new DefaultSignificanceModelRegistry(models);
@@ -39,6 +40,39 @@ public class DefaultSignificanceModelRegistryTest {
assertNotNull(englishModel);
assertNotNull(norwegianModel);
+ assertEquals("test::2", englishModel.getId());
+ assertEquals("test::2", norwegianModel.getId());
+
+ assertEquals(4, englishModel.documentFrequency("test").frequency());
+ assertEquals(14, englishModel.documentFrequency("test").corpusSize());
+
+ assertEquals(3, norwegianModel.documentFrequency("nei").frequency());
+ assertEquals(20, norwegianModel.documentFrequency("nei").corpusSize());
+
+ assertEquals(1, norwegianModel.documentFrequency("non-existent-word").frequency());
+ assertEquals(20, norwegianModel.documentFrequency("non-existent-word").corpusSize());
+
+ }
+
+ @Test
+ public void testDefaultSignificanceModelRegistryInOppsiteOrder() {
+
+ List<Path> models = new ArrayList<>();
+
+ models.add(Path.of("src/test/models/docv2.json"));
+ models.add(Path.of("src/test/models/docv1.json"));
+
+ DefaultSignificanceModelRegistry defaultSignificanceModelRegistry = new DefaultSignificanceModelRegistry(models);
+
+ var englishModel = defaultSignificanceModelRegistry.getModel(Language.ENGLISH);
+ var norwegianModel = defaultSignificanceModelRegistry.getModel(Language.NORWEGIAN_BOKMAL);
+
+ assertNotNull(englishModel);
+ assertNotNull(norwegianModel);
+
+ assertEquals("test::1", englishModel.getId());
+ assertEquals("test::2", norwegianModel.getId());
+
assertEquals(2, englishModel.documentFrequency("test").frequency());
assertEquals(10, englishModel.documentFrequency("test").corpusSize());
@@ -47,6 +81,5 @@ public class DefaultSignificanceModelRegistryTest {
assertEquals(1, norwegianModel.documentFrequency("non-existent-word").frequency());
assertEquals(20, norwegianModel.documentFrequency("non-existent-word").corpusSize());
-
}
}
diff --git a/linguistics/src/test/models/docv1.json b/linguistics/src/test/models/docv1.json
new file mode 100644
index 00000000000..dd211840ede
--- /dev/null
+++ b/linguistics/src/test/models/docv1.json
@@ -0,0 +1,19 @@
+{
+ "version" : "1.0",
+ "id" : "test::1",
+ "description" : "desc",
+ "languages" : {
+ "en": {
+ "description" : "english model",
+ "document-count" : 10,
+ "language" : "en",
+ "word-count" : 4,
+ "document-frequencies" : {
+ "usa" : 2,
+ "hello": 3,
+ "world": 5,
+ "test": 2
+ }
+ }
+ }
+}
diff --git a/linguistics/src/test/models/docv2.json b/linguistics/src/test/models/docv2.json
new file mode 100644
index 00000000000..c7f621b9acb
--- /dev/null
+++ b/linguistics/src/test/models/docv2.json
@@ -0,0 +1,35 @@
+{
+ "version" : "2.0",
+ "id" : "test::2",
+ "description" : "desc",
+ "languages" : {
+ "en": {
+ "description" : "english model",
+ "document-count" : 14,
+ "language" : "en",
+ "word-count" : 5,
+ "document-frequencies" : {
+ "usa" : 2,
+ "hello": 3,
+ "world": 5,
+ "test": 4,
+ "additional": 2
+ }
+ },
+ "nb": {
+ "description" : "norwegian model",
+ "document-count" : 20,
+ "language" : "en",
+ "word-count" : 7,
+ "document-frequencies" : {
+ "usa" : 2,
+ "hello": 10,
+ "verden": 5,
+ "test": 2,
+ "norge": 11,
+ "ja": 12,
+ "nei": 3
+ }
+ }
+ }
+}
diff --git a/linguistics/src/test/models/en.json b/linguistics/src/test/models/en.json
index 50bae5e3451..a46b6a5b51f 100644
--- a/linguistics/src/test/models/en.json
+++ b/linguistics/src/test/models/en.json
@@ -1,14 +1,15 @@
{
"version" : "1.0",
"id" : "test::1",
- "description" : "desc",
- "corpus-size" : 10,
+ "description" : "english model",
+ "document-count" : 10,
"language" : "en",
"word-count" : 4,
- "frequencies" : {
+ "document-frequencies" : {
"usa" : 2,
"hello": 3,
"world": 5,
"test": 2
}
}
+} \ No newline at end of file
diff --git a/linguistics/src/test/models/no.json b/linguistics/src/test/models/no.json
deleted file mode 100644
index 5fca8929e74..00000000000
--- a/linguistics/src/test/models/no.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
- "version" : "1.0",
- "id" : "test::2",
- "description" : "norsk beskrivelse",
- "corpus-size" : 20,
- "language" : "nb",
- "word-count" : 7,
- "frequencies" : {
- "usa" : 2,
- "hello": 10,
- "verden": 5,
- "test": 2,
- "norge": 11,
- "ja": 12,
- "nei": 3
- }
-}