aboutsummaryrefslogtreecommitdiffstats
path: root/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
blob: 7b6f350d8310b2d35798a4e2c57d028ddcb0fde5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.indexinglanguage.linguistics;

import com.yahoo.language.Language;
import com.yahoo.language.process.StemMode;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;

/**
 * @author Simon Thoresen Hult
 */
public class AnnotatorConfig implements Cloneable {

    private Language language;
    private StemMode stemMode;
    private boolean removeAccents;
    private int maxTermOccurrences;
    private int maxTokenizeLength;

    public static final int DEFAULT_MAX_TERM_OCCURRENCES;
    private static final int DEFAULT_MAX_TOKENIZE_LENGTH;

    static {
        IlscriptsConfig defaults = new IlscriptsConfig(new IlscriptsConfig.Builder());
        DEFAULT_MAX_TERM_OCCURRENCES = defaults.maxtermoccurrences();
        DEFAULT_MAX_TOKENIZE_LENGTH = defaults.fieldmatchmaxlength();
    }

    public AnnotatorConfig() {
        language = Language.ENGLISH;
        stemMode = StemMode.NONE;
        removeAccents = false;
        maxTermOccurrences = DEFAULT_MAX_TERM_OCCURRENCES;
        maxTokenizeLength = DEFAULT_MAX_TOKENIZE_LENGTH;
    }

    public AnnotatorConfig(AnnotatorConfig rhs) {
        language = rhs.language;
        stemMode = rhs.stemMode;
        removeAccents = rhs.removeAccents;
        maxTermOccurrences = rhs.maxTermOccurrences;
        maxTokenizeLength = rhs.maxTokenizeLength;
    }

    public Language getLanguage() {
        return language;
    }

    public AnnotatorConfig setLanguage(Language language) {
        this.language = language;
        return this;
    }

    public StemMode getStemMode() {
        return stemMode;
    }

    public AnnotatorConfig setStemMode(StemMode stemMode) {
        this.stemMode = stemMode;
        return this;
    }

    public AnnotatorConfig setStemMode(String name) {
        this.stemMode = StemMode.valueOf(name);
        return this;
    }

    public boolean getRemoveAccents() {
        return removeAccents;
    }

    public AnnotatorConfig setRemoveAccents(boolean removeAccents) {
        this.removeAccents = removeAccents;
        return this;
    }

    public int getMaxTermOccurrences() {
        return maxTermOccurrences;
    }

    public AnnotatorConfig setMaxTermOccurrences(int maxTermCount) {
        this.maxTermOccurrences = maxTermCount;
        return this;
    }

    public AnnotatorConfig setMaxTokenLength(int maxTokenizeLength) {
        this.maxTokenizeLength = maxTokenizeLength;
        return this;
    }

    public int getMaxTokenizeLength() {
        return maxTokenizeLength;
    }

    public boolean hasNonDefaultMaxTokenLength() {
        return maxTokenizeLength != DEFAULT_MAX_TOKENIZE_LENGTH;
    }

    public boolean hasNonDefaultMaxTermOccurrences() {
        return maxTermOccurrences != DEFAULT_MAX_TERM_OCCURRENCES;
    }

    @Override
    public boolean equals(Object obj) {
        if (!(obj instanceof AnnotatorConfig rhs)) {
            return false;
        }
        if (!language.equals(rhs.language)) {
            return false;
        }
        if (!stemMode.equals(rhs.stemMode)) {
            return false;
        }
        if (removeAccents != rhs.removeAccents) {
            return false;
        }
        if (maxTermOccurrences != rhs.maxTermOccurrences) {
            return false;
        }
        if (maxTokenizeLength != rhs.maxTokenizeLength) {
            return false;
        }
        return true;
    }

    @Override
    public int hashCode() {
        return getClass().hashCode() + language.hashCode() + stemMode.hashCode() +
               Boolean.valueOf(removeAccents).hashCode() + maxTermOccurrences + maxTokenizeLength;
    }

}