aboutsummaryrefslogtreecommitdiffstats
path: root/vespaclient-java/src/test/java/com/yahoo
diff options
context:
space:
mode:
authorMariusArhaug <mariusarhaug@hotmail.com>2024-06-21 10:29:30 +0200
committerBjørn Christian Seime <bjorncs@vespa.ai>2024-06-21 13:39:11 +0200
commit0b28acb8655862f1f57693c9e4ccf36fe5f78dbd (patch)
tree9935856f2fa1349180058cf33aa084283892f8d2 /vespaclient-java/src/test/java/com/yahoo
parent7304b3bd208ed5c8997106dcee90e8d00d9e05dc (diff)
Add zst support for significance cli and registry
Diffstat (limited to 'vespaclient-java/src/test/java/com/yahoo')
-rw-r--r--vespaclient-java/src/test/java/com/yahoo/vespasignificance/SignificanceModelGeneratorTest.java55
1 files changed, 48 insertions, 7 deletions
diff --git a/vespaclient-java/src/test/java/com/yahoo/vespasignificance/SignificanceModelGeneratorTest.java b/vespaclient-java/src/test/java/com/yahoo/vespasignificance/SignificanceModelGeneratorTest.java
index 8e1e8fd5627..916fe05ef7b 100644
--- a/vespaclient-java/src/test/java/com/yahoo/vespasignificance/SignificanceModelGeneratorTest.java
+++ b/vespaclient-java/src/test/java/com/yahoo/vespasignificance/SignificanceModelGeneratorTest.java
@@ -5,11 +5,14 @@ package com.yahoo.vespasignificance;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.yahoo.language.significance.impl.DocumentFrequencyFile;
import com.yahoo.language.significance.impl.SignificanceModelFile;
+import io.airlift.compress.zstd.ZstdInputStream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.nio.file.Path;
import java.util.HashMap;
@@ -25,7 +28,7 @@ public class SignificanceModelGeneratorTest {
@TempDir
private Path tempDir;
- private ClientParameters.Builder createParameters(String inputPath, String outputPath, String field, String language, String docType) {
+ private ClientParameters.Builder createParameters(String inputPath, String outputPath, String field, String language, String docType, String zstCompression) {
tempDir.toFile().mkdirs();
return new ClientParameters.Builder()
@@ -33,7 +36,8 @@ public class SignificanceModelGeneratorTest {
.setOutputFile(tempDir.resolve(outputPath).toString())
.setField(field)
.setLanguage(language)
- .setDocType(docType);
+ .setDocType(docType)
+ .setZstCompression(zstCompression);
}
private SignificanceModelGenerator createSignificanceModelGenerator(ClientParameters params) {
@@ -44,7 +48,7 @@ public class SignificanceModelGeneratorTest {
void testGenerateSimpleFile() throws IOException {
String inputPath = "no.jsonl";
String outputPath = "output.json";
- ClientParameters params = createParameters(inputPath, outputPath, "text", "NB", "nb").build();
+ ClientParameters params = createParameters(inputPath, outputPath, "text", "NB", "nb", "false").build();
SignificanceModelGenerator generator = createSignificanceModelGenerator(params);
generator.generate();
@@ -68,10 +72,47 @@ public class SignificanceModelGeneratorTest {
}
@Test
+ void testGenerateSimpleFileWithZST() throws IOException {
+ String inputPath = "no.jsonl";
+ ClientParameters params1 = createParameters(inputPath, "output.json", "text", "NB", "nb", "true").build();
+
+ // Throws exception when outputfile does not have .zst extension when using zst compression
+ assertThrows(IllegalArgumentException.class, () -> createSignificanceModelGenerator(params1));
+
+ String outputPath = "output.json.zst";
+ ClientParameters params = createParameters(inputPath, outputPath, "text", "NB", "nb", "true").build();
+
+ SignificanceModelGenerator generator = createSignificanceModelGenerator(params);
+ generator.generate();
+
+
+
+ File outputFile = new File(tempDir.resolve(outputPath ).toString());
+ assertTrue(outputFile.exists());
+
+ InputStream in = new ZstdInputStream(new FileInputStream(outputFile));
+
+ SignificanceModelFile modelFile = objectMapper.readValue(in, SignificanceModelFile.class);
+
+ HashMap<String, DocumentFrequencyFile> languages = modelFile.languages();
+ assertEquals(1, languages.size());
+
+ assertTrue(languages.containsKey("NB"));
+
+ DocumentFrequencyFile documentFrequencyFile = languages.get("NB");
+
+ assertEquals(3, documentFrequencyFile.frequencies().get("fra"));
+ assertEquals(3, documentFrequencyFile.frequencies().get("skriveform"));
+ assertEquals(3, documentFrequencyFile.frequencies().get("kategori"));
+ assertEquals(3, documentFrequencyFile.frequencies().get("eldr"));
+
+ }
+
+ @Test
void testGenerateFileWithMultipleLanguages() throws IOException {
String inputPath = "no.jsonl";
String outputPath = "output.json";
- ClientParameters params1 = createParameters(inputPath, outputPath, "text", "NB", "nb").build();
+ ClientParameters params1 = createParameters(inputPath, outputPath, "text", "NB", "nb", "false").build();
SignificanceModelGenerator generator = createSignificanceModelGenerator(params1);
generator.generate();
@@ -79,7 +120,7 @@ public class SignificanceModelGeneratorTest {
assertTrue(outputFile.exists());
String inputPath2 = "en.jsonl";
- ClientParameters params2 = createParameters(inputPath2, outputPath, "text", "EN", "en").build();
+ ClientParameters params2 = createParameters(inputPath2, outputPath, "text", "EN", "en", "false").build();
generator = createSignificanceModelGenerator(params2);
generator.generate();
@@ -113,7 +154,7 @@ public class SignificanceModelGeneratorTest {
void testOverwriteExistingDocumentFrequencyLanguage() throws IOException {
String inputPath = "no.jsonl";
String outputPath = "output.json";
- ClientParameters params1 = createParameters(inputPath, outputPath, "text", "NB", "nb").build();
+ ClientParameters params1 = createParameters(inputPath, outputPath, "text", "NB", "nb", "false").build();
SignificanceModelGenerator generator = createSignificanceModelGenerator(params1);
generator.generate();
@@ -134,7 +175,7 @@ public class SignificanceModelGeneratorTest {
assertFalse(oldDf.frequencies().containsKey("nytt"));
String inputPath2 = "no_2.jsonl";
- ClientParameters params2 = createParameters(inputPath2, outputPath, "text", "NB", "nb").build();
+ ClientParameters params2 = createParameters(inputPath2, outputPath, "text", "NB", "nb", "false").build();
SignificanceModelGenerator generator2 = createSignificanceModelGenerator(params2);
generator2.generate();