blob: 6e024c3025e15a88c03101f204de85c38d8191cc (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.significance.impl;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.yahoo.language.significance.DocumentFrequency;
import com.yahoo.language.significance.SignificanceModel;
import java.io.IOException;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
/**
*
* @author MariusArhaug
*/
public class DefaultSignificanceModel implements SignificanceModel {
private final long corpusSize;
private final Map<String, Long> frequencies;
private String id;
public DefaultSignificanceModel(DocumentFrequencyFile file, String id) {
this.frequencies = file.frequencies();
this.corpusSize = file.documentCount();
this.id = id;
}
public DefaultSignificanceModel(Path path) {
ObjectMapper objectMapper = new ObjectMapper();
try {
var file = objectMapper.readValue(path.toFile(), DocumentFrequencyFile.class);
this.frequencies = file.frequencies();
this.corpusSize = file.documentCount();
} catch (IOException e) {
throw new RuntimeException("Failed to load model from " + path, e);
}
}
@Override
public DocumentFrequency documentFrequency(String word) {
if (frequencies.containsKey(word)) {
return new DocumentFrequency(frequencies.get(word), corpusSize);
}
return new DocumentFrequency(1, corpusSize);
}
@Override
public String getId() {
return this.id;
}
}
|