aboutsummaryrefslogtreecommitdiffstats
path: root/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
blob: 6522e284fc8479c7da919f502fe5916b9a72ac3c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.indexinglanguage.linguistics;

import com.yahoo.language.Language;
import com.yahoo.language.process.StemMode;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;

/**
 * @author Simon Thoresen Hult
 */
public class AnnotatorConfig implements Cloneable {

    private Language language;
    private StemMode stemMode;
    private boolean removeAccents;
    private int maxTermOccurrences;
    private int maxTokenLength;
    private int maxTokenizeLength;

    public static final int DEFAULT_MAX_TERM_OCCURRENCES;
    private static final int DEFAULT_MAX_TOKEN_LENGTH;
    private static final int DEFAULT_MAX_TOKENIZE_LENGTH;

    static {
        IlscriptsConfig defaults = new IlscriptsConfig(new IlscriptsConfig.Builder());
        DEFAULT_MAX_TERM_OCCURRENCES = defaults.maxtermoccurrences();
        DEFAULT_MAX_TOKEN_LENGTH = defaults.maxtokenlength();
        DEFAULT_MAX_TOKENIZE_LENGTH = defaults.fieldmatchmaxlength();
    }

    public AnnotatorConfig() {
        language = Language.ENGLISH;
        stemMode = StemMode.NONE;
        removeAccents = false;
        maxTermOccurrences = DEFAULT_MAX_TERM_OCCURRENCES;
        maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
        maxTokenizeLength = DEFAULT_MAX_TOKENIZE_LENGTH;
    }

    public AnnotatorConfig(AnnotatorConfig rhs) {
        language = rhs.language;
        stemMode = rhs.stemMode;
        removeAccents = rhs.removeAccents;
        maxTermOccurrences = rhs.maxTermOccurrences;
        maxTokenLength = rhs.maxTokenLength;
        maxTokenizeLength = rhs.maxTokenizeLength;
    }

    public Language getLanguage() {
        return language;
    }

    public AnnotatorConfig setLanguage(Language language) {
        this.language = language;
        return this;
    }

    public StemMode getStemMode() {
        return stemMode;
    }

    public AnnotatorConfig setStemMode(StemMode stemMode) {
        this.stemMode = stemMode;
        return this;
    }

    public AnnotatorConfig setStemMode(String name) {
        this.stemMode = StemMode.valueOf(name);
        return this;
    }

    public boolean getRemoveAccents() {
        return removeAccents;
    }

    public AnnotatorConfig setRemoveAccents(boolean removeAccents) {
        this.removeAccents = removeAccents;
        return this;
    }

    public int getMaxTermOccurrences() {
        return maxTermOccurrences;
    }

    public AnnotatorConfig setMaxTermOccurrences(int maxTermCount) {
        this.maxTermOccurrences = maxTermCount;
        return this;
    }

    public AnnotatorConfig setMaxTokenLength(int maxTokenLength) {
        this.maxTokenLength = maxTokenLength;
        return this;
    }

    public int getMaxTokenLength() {
        return maxTokenLength;
    }

    public static int getDefaultMaxTokenLength() { return DEFAULT_MAX_TOKEN_LENGTH; }

    public AnnotatorConfig setMaxTokenizeLength(int maxTokenizeLength) {
        this.maxTokenizeLength = maxTokenizeLength;
        return this;
    }

    public int getMaxTokenizeLength() {
        return maxTokenizeLength;
    }

    public boolean hasNonDefaultMaxTokenLength() {
        return maxTokenLength != DEFAULT_MAX_TOKEN_LENGTH;
    }

    public boolean hasNonDefaultMaxTokenizeLength() {
        return maxTokenizeLength != DEFAULT_MAX_TOKENIZE_LENGTH;
    }

    public boolean hasNonDefaultMaxTermOccurrences() {
        return maxTermOccurrences != DEFAULT_MAX_TERM_OCCURRENCES;
    }

    @Override
    public boolean equals(Object obj) {
        if (!(obj instanceof AnnotatorConfig rhs)) {
            return false;
        }
        if (!language.equals(rhs.language)) {
            return false;
        }
        if (!stemMode.equals(rhs.stemMode)) {
            return false;
        }
        if (removeAccents != rhs.removeAccents) {
            return false;
        }
        if (maxTermOccurrences != rhs.maxTermOccurrences) {
            return false;
        }
        if (maxTokenLength != rhs.maxTokenLength) {
            return false;
        }
        if (maxTokenizeLength != rhs.maxTokenizeLength) {
            return false;
        }
        return true;
    }

    @Override
    public int hashCode() {
        return getClass().hashCode() + language.hashCode() + stemMode.hashCode() +
               Boolean.valueOf(removeAccents).hashCode() + maxTermOccurrences + maxTokenLength + maxTokenizeLength;
    }

}