blob: 684bae3bf975f952921a7992b38e339738b87268 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.indexinglanguage.linguistics;
import com.yahoo.language.Language;
import com.yahoo.language.process.StemMode;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
/**
* @author Simon Thoresen Hult
*/
public class AnnotatorConfig implements Cloneable {
private Language language;
private StemMode stemMode;
private boolean removeAccents;
private int maxTermOccurences;
private int maxTokenizeLength;
public static final int DEFAULT_MAX_TERM_OCCURRENCES;
private static final int DEFAULT_MAX_TOKENIZE_LENGTH;
static {
IlscriptsConfig defaults = new IlscriptsConfig(new IlscriptsConfig.Builder());
DEFAULT_MAX_TERM_OCCURRENCES = defaults.maxtermoccurrences();
DEFAULT_MAX_TOKENIZE_LENGTH = defaults.fieldmatchmaxlength();
}
public AnnotatorConfig() {
language = Language.ENGLISH;
stemMode = StemMode.NONE;
removeAccents = false;
maxTermOccurences = DEFAULT_MAX_TERM_OCCURRENCES;
maxTokenizeLength = DEFAULT_MAX_TOKENIZE_LENGTH;
}
public AnnotatorConfig(AnnotatorConfig rhs) {
language = rhs.language;
stemMode = rhs.stemMode;
removeAccents = rhs.removeAccents;
maxTermOccurences = rhs.maxTermOccurences;
maxTokenizeLength = rhs.maxTokenizeLength;
}
public Language getLanguage() {
return language;
}
public AnnotatorConfig setLanguage(Language language) {
this.language = language;
return this;
}
public StemMode getStemMode() {
return stemMode;
}
public AnnotatorConfig setStemMode(StemMode stemMode) {
this.stemMode = stemMode;
return this;
}
public AnnotatorConfig setStemMode(String name) {
this.stemMode = StemMode.valueOf(name);
return this;
}
public boolean getRemoveAccents() {
return removeAccents;
}
public AnnotatorConfig setRemoveAccents(boolean removeAccents) {
this.removeAccents = removeAccents;
return this;
}
public int getMaxTermOccurrences() {
return maxTermOccurences;
}
public AnnotatorConfig setMaxTermOccurrences(int maxTermCount) {
this.maxTermOccurences = maxTermCount;
return this;
}
public AnnotatorConfig setMaxTokenLength(int maxTokenizeLength) {
this.maxTokenizeLength = maxTokenizeLength;
return this;
}
public int getMaxTokenizeLength() {
return maxTokenizeLength;
}
public boolean hasNonDefaultMaxTokenLength() {
return maxTokenizeLength != DEFAULT_MAX_TOKENIZE_LENGTH;
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof AnnotatorConfig rhs)) {
return false;
}
if (!language.equals(rhs.language)) {
return false;
}
if (!stemMode.equals(rhs.stemMode)) {
return false;
}
if (removeAccents != rhs.removeAccents) {
return false;
}
if (maxTermOccurences != rhs.maxTermOccurences) {
return false;
}
if (maxTokenizeLength != rhs.maxTokenizeLength) {
return false;
}
return true;
}
@Override
public int hashCode() {
return getClass().hashCode() + language.hashCode() + stemMode.hashCode() +
Boolean.valueOf(removeAccents).hashCode() + maxTermOccurences + maxTokenizeLength;
}
}
|