aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMariusArhaug <mariusarhaug@hotmail.com>2024-05-23 14:55:24 +0200
committerMariusArhaug <mariusarhaug@hotmail.com>2024-05-23 14:59:01 +0200
commit3c280b7b800653b422ae5fb9049aa2eabaa4998f (patch)
treeab0bdb6a5f0d9607c4f4bbb98b0ff7e0ab691f38
parent2b08f380d52c7c5b5e5678f5f582fb93647c2529 (diff)
Update document frequency from hashmap to treemap
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java4
-rw-r--r--linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java8
-rw-r--r--vespaclient-java/src/main/java/com/yahoo/vespasignificance/SignificanceModelGenerator.java17
3 files changed, 20 insertions, 9 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
index 3244b8373ad..6e024c3025e 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModel.java
@@ -8,6 +8,8 @@ import com.yahoo.language.significance.SignificanceModel;
import java.io.IOException;
import java.nio.file.Path;
import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
/**
*
@@ -15,7 +17,7 @@ import java.util.HashMap;
*/
public class DefaultSignificanceModel implements SignificanceModel {
private final long corpusSize;
- private final HashMap<String, Long> frequencies;
+ private final Map<String, Long> frequencies;
private String id;
diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java
index 9b7cbae834a..34e73e1b547 100644
--- a/linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java
+++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DocumentFrequencyFile.java
@@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
/**
*
@@ -19,13 +21,13 @@ public class DocumentFrequencyFile {
private final long documentCount;
- private final HashMap<String, Long> frequencies;
+ private final Map<String, Long> frequencies;
@JsonCreator
public DocumentFrequencyFile(
@JsonProperty("description") String description,
@JsonProperty("document-count") long documentCount,
- @JsonProperty("document-frequencies") HashMap<String, Long> frequencies) {
+ @JsonProperty("document-frequencies") Map<String, Long> frequencies) {
this.description = description;
this.documentCount = documentCount;
this.frequencies = frequencies;
@@ -38,5 +40,5 @@ public class DocumentFrequencyFile {
public long documentCount() { return documentCount; }
@JsonProperty("document-frequencies")
- public HashMap<String, Long> frequencies() { return frequencies; }
+ public Map<String, Long> frequencies() { return frequencies; }
}
diff --git a/vespaclient-java/src/main/java/com/yahoo/vespasignificance/SignificanceModelGenerator.java b/vespaclient-java/src/main/java/com/yahoo/vespasignificance/SignificanceModelGenerator.java
index e27158da3cb..7972a70cd10 100644
--- a/vespaclient-java/src/main/java/com/yahoo/vespasignificance/SignificanceModelGenerator.java
+++ b/vespaclient-java/src/main/java/com/yahoo/vespasignificance/SignificanceModelGenerator.java
@@ -38,7 +38,9 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
+import java.util.Map;
import java.util.Set;
+import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
@@ -49,7 +51,8 @@ public class SignificanceModelGenerator {
private final ClientParameters clientParameters;
private final Tokenizer tokenizer;
- private final HashMap<String, Long> documentFrequency = new HashMap<>();
+ private final TreeMap<String, Long> documentFrequency = new TreeMap<>();
+
private final Language language;
private final ObjectMapper objectMapper;
private final static JsonFactory parserFactory = new JsonFactory();
@@ -110,10 +113,9 @@ public class SignificanceModelGenerator {
put(clientParameters.language, new DocumentFrequencyFile(DOC_FREQ_DESCRIPTION, pageCount, getFinalDocumentFrequency()));
}};
- modelFile = new SignificanceModelFile(VERSION, ID, SIGNIFICANCE_DESCRIPTION, languages);
+ modelFile = new SignificanceModelFile(VERSION, ID, SIGNIFICANCE_DESCRIPTION + clientParameters.inputFile, languages);
}
try {
- //objectMapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
ObjectWriter writer = objectMapper.writerWithDefaultPrettyPrinter();
writer.writeValue(new File(clientParameters.outputFile), modelFile);
} catch (IOException e) {
@@ -139,9 +141,14 @@ public class SignificanceModelGenerator {
}
}
- public HashMap<String, Long> getFinalDocumentFrequency() {
+ public Map<String, Long> getFinalDocumentFrequency() {
return documentFrequency.entrySet().stream()
.filter(k -> k.getValue() > 1)
- .collect(HashMap::new, (m, v) -> m.put(v.getKey(), v.getValue()), HashMap::putAll);
+ .collect(Collectors.toMap(
+ Map.Entry::getKey,
+ Map.Entry::getValue,
+ (e1, e2) -> e1,
+ TreeMap::new
+ ));
}
}