1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.significance.impl;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.yahoo.language.significance.DocumentFrequency;
import com.yahoo.language.significance.SignificanceModel;
import java.nio.file.Path;
import java.util.HashMap;
/**
*
* @author MariusArhaug
*/
public class DefaultSignificanceModel implements SignificanceModel {
private final long corpusSize;
private final HashMap<String, Long> frequencies;
private final Path path;
@JsonIgnoreProperties(ignoreUnknown = true)
public static class SignificanceModelFile {
private final String version;
private final String id;
private final String description;
private final long corpusSize;
private final String language;
private final long wordCount;
private final HashMap<String, Long> frequencies;
@JsonCreator
public SignificanceModelFile(
@JsonProperty("version") String version,
@JsonProperty("id") String id,
@JsonProperty("description") String description,
@JsonProperty("corpus-size") long corpusSize,
@JsonProperty("language") String language,
@JsonProperty("word-count") long wordCount,
@JsonProperty("frequencies") HashMap<String, Long> frequencies) {
this.version = version;
this.id = id;
this.description = description;
this.corpusSize = corpusSize;
this.language = language;
this.wordCount = wordCount;
this.frequencies = frequencies;
}
@JsonProperty("version")
public String version() { return version; }
@JsonProperty("id")
public String id() { return id; }
@JsonProperty("description")
public String description() { return description; }
@JsonProperty("corpus-size")
public long corpusSize() { return corpusSize; }
@JsonProperty("language")
public String language() { return language; }
@JsonProperty("frequencies")
public HashMap<String, Long> frequencies() { return frequencies; }
@JsonProperty("word-count")
public long wordCount() { return wordCount; }
}
public DefaultSignificanceModel(Path path) {
this.path = path;
ObjectMapper objectMapper = new ObjectMapper();
try {
SignificanceModelFile model = objectMapper.readValue(this.path.toFile(), SignificanceModelFile.class);
this.corpusSize = model.corpusSize;
this.frequencies = model.frequencies;
} catch (Exception e) {
throw new RuntimeException("Failed to load model from " + path, e);
}
}
@Override
public DocumentFrequency documentFrequency(String word) {
if (frequencies.containsKey(word)) {
return new DocumentFrequency(frequencies.get(word), corpusSize);
}
return new DocumentFrequency(1, corpusSize);
}
}
|