aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/prelude/Index.java
blob: af8f63ab9f2393934384465db224ffc1a289f1ce (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude;

import com.yahoo.language.process.StemMode;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/**
 * Information about configured settings of a field or field collection (an actual index or not) in a search definition.
 * There are two types of settings:
 * <ul>
 * <li><i>Typed commands</i> are checked using a particular is/get method
 * <li><i>Untyped commands</i> are checked using hasCommand and commandIterator
 * </ul>
 * addCommand sets both types.
 *
 * @author Steinar Knutsen
 * @author bratseth
 */
public class Index {

    /** The null index - don't use this for name lookups */
    public static final Index nullIndex = new Index("(null)");

    private final String name;

    private final List<String> aliases = new ArrayList<>();

    // The state resulting from adding commands to this (using addCommand)
    private boolean tensor = false;
    private boolean uriIndex = false;
    private boolean hostIndex = false;
    private StemMode stemMode = StemMode.NONE;
    private boolean isAttribute = false;
    private boolean isIndex = false;
    private boolean isDefaultPosition = false;
    private boolean dynamicSummary=false;
    private boolean highlightSummary=false;
    private boolean lowercase = false;
    private boolean plainTokens = false;
    private boolean multivalue = false;
    private boolean fastSearch = false;
    private boolean normalize = false;
    private boolean literalBoost = false;
    private boolean numerical = false;
    private boolean string = false;
    private boolean predicate = false;

    private long predicateUpperBound = Long.MAX_VALUE;
    private long predicateLowerBound = Long.MIN_VALUE;

    /** True if this is an <i>exact</i> index - which should match  tokens containing any characters */
    private boolean exact = false;

    private boolean isNGram = false;
    private int gramSize = 2;

    /** Whether implicit phrases should lead to a phrase item or an and item. */
    private Boolean phraseSegmenting = false;

    /** The string terminating an exact token in this index, or null to use the default (space) */
    private String exactTerminator = null;

    /** Commands which are not converted into a field */
    private final Set<String> commands = new java.util.HashSet<>();

    /** All the commands added to this, including those converted to fields above */
    private final List<String> allCommands = new java.util.ArrayList<>();

    private static final String CMD_STRING = "string";

    public Index(String name) {
        this.name = name;
    }

    public void addAlias(String alias) { aliases.add(alias); }

    /** Returns an unmodifiable list of the aliases of this index (not including the index proper name) */
    public List<String> aliases() { return Collections.unmodifiableList(aliases); }

    /**
     * Returns the canonical name of this index, unless it
     * is the null index, which doesn't have a canonical name
     */
    public String getName() {
        return name;
    }

    public boolean isUriIndex() {
        return uriIndex;
    }

    public boolean isDefaultPosition() {
        return isDefaultPosition;
    }

    public void setDefaultPosition(boolean v) {
        isDefaultPosition = v;
    }

    public void setUriIndex(boolean uriIndex) {
        this.uriIndex = uriIndex;
    }

    public boolean isHostIndex() {
        return hostIndex;
    }

    public void setHostIndex(boolean hostIndex) {
        this.hostIndex = hostIndex;
    }

    public StemMode getStemMode() {
        return stemMode;
    }

    public void setStemMode(StemMode stemMode) {
        this.stemMode = stemMode;
    }

    public void setStemMode(String name) {
        this.stemMode = StemMode.valueOf(name);
    }

    /** Adds a type or untyped command string to this */
    public Index addCommand(String command) {
        allCommands.add(command);

        if (command.startsWith("type tensor(") || command.startsWith("type tensor<")) { // TODO: Type info can replace numerical, predicate, multivalue
            setTensor(true);
        } else if ("fullurl".equals(command)) {
            setUriIndex(true);
        } else if ("urlhost".equals(command)) {
            setHostIndex(true);
        } else if (command.startsWith("stem ")) {
            setStemMode(command.substring(5));
        } else if (command.startsWith("stem:")) {
            setStemMode(command.substring(5));
        } else if ("stem".equals(command)) {
            setStemMode(StemMode.SHORTEST);
        } else if ("word".equals(command)) {
            setExact(true, null);
        } else if ("exact".equals(command)) {
            setExact(true, " ");
        } else if ("dynteaser".equals(command)) {
            setDynamicSummary(true);
        } else if ("highlight".equals(command)) {
            setHighlightSummary(true);
        } else if ("lowercase".equals(command)) {
            setLowercase(true);
        } else if (command.startsWith("exact ")) {
            setExact(true, command.substring(6));
        } else if (command.startsWith("ngram ")) {
            setNGram(true, Integer.parseInt(command.substring(6)));
        } else if (command.equals("attribute")) {
            setAttribute(true);
        } else if (command.equals("index")) {
            setIndex(true);
        } else if (command.equals("default-position")) {
            setDefaultPosition(true);
        } else if (command.equals("plain-tokens")) {
            setPlainTokens(true);
        } else if (command.equals("multivalue")) {
            setMultivalue(true);
        } else if (command.equals("fast-search")) {
            setFastSearch(true);
        } else if (command.equals("normalize")) {
            setNormalize(true);
        } else if (command.equals("literal-boost")) {
            setLiteralBoost(true);
        } else if (command.equals("numerical")) {
            setNumerical(true);
        } else if (command.equals("predicate")) {
            setPredicate(true);
        } else if (command.startsWith("predicate-bounds ")) {
            setPredicateBounds(command.substring(17));
        } else if (command.equals("phrase-segmenting")) {
            setPhraseSegmenting(true);
        } else if (command.startsWith("phrase-segmenting ")) {
            setPhraseSegmenting(Boolean.parseBoolean(command.substring("phrase-segmenting ".length())));
        } else if (command.equals(CMD_STRING)) {
            setString(true);
        } else {
            commands.add(command);
        }
        return this;
    }

    private void setTensor(boolean tensor) {
        this.tensor = tensor;
    }

    public boolean isTensor() { return tensor; }

    private void setPredicateBounds(String bounds) {
        if ( ! bounds.startsWith("[..")) {
            predicateLowerBound = Long.parseLong(bounds.substring(1, bounds.indexOf("..")));
        } else {
            predicateLowerBound = Long.MIN_VALUE;
        }
        if ( ! bounds.endsWith("..]")) {
            predicateUpperBound = Long.parseLong(bounds.substring(bounds.indexOf("..") + 2, bounds.length() - 1));
        } else {
            predicateUpperBound = Long.MAX_VALUE;
        }
    }

    /** Sets whether terms in this field are lowercased when indexing. */
    public void setLowercase(boolean lowercase) {
        this.lowercase = lowercase;
    }

    /** Returns whether terms in this field are lowercased when indexing. */
    public boolean isLowercase() {
        return lowercase;
    }

    /** Returns an iterator of all the untyped commands of this */
    public Iterator<String> commandIterator() {
        return commands.iterator();
    }

    /** Checks whether this has the given (exact) <i>untyped</i> command string */
    public boolean hasCommand(String commandString) {
        return commands.contains(commandString);
    }

    /**
     * Set whether this index should match any kind of characters
     *
     * @param exact true to make this index match any kind of characters, not just word and digit ones
     * @param terminator the terminator of an exact sequence (one or more characters),
     *        or null to use the default (space)
     */
    public void setExact(boolean exact, String terminator) {
        this.exact = exact;
        this.exactTerminator = terminator;
    }

    /** Returns whether this is an exact index, which should match tokens containing any characters */
    public boolean isExact() { return exact; }

    /** Returns the string terminating an exact sequence in this index, or null to use the default (space) */
    public String getExactTerminator() { return exactTerminator; }

    /** Returns true if this is an ngram index (default: false) */
    public boolean isNGram() { return isNGram; }

    /** Returns the gram size. Only used if isNGram is true (default: 2)*/
    public int getGramSize() { return gramSize; }

    public void setNGram(boolean nGram,int gramSize) {
        this.isNGram = nGram;
        this.gramSize = gramSize;
    }

    public void setDynamicSummary(boolean dynamicSummary) { this.dynamicSummary=dynamicSummary; }
    public boolean getDynamicSummary() { return dynamicSummary; }

    public void setHighlightSummary(boolean highlightSummary) { this.highlightSummary=highlightSummary; }
    public boolean getHighlightSummary() { return highlightSummary; }

    /** Returns true if this is the null index */
    // TODO: Replace by == Index.null
    public boolean isNull() {
        return "(null)".equals(name);
    }

    public boolean isAttribute() { return isAttribute; }

    public void setAttribute(boolean isAttribute) {
        this.isAttribute = isAttribute;
    }

    public boolean isIndex() { return isIndex; }

    public void setIndex(boolean isIndex) {
        this.isIndex = isIndex;
    }

    public boolean hasPlainTokens() { return plainTokens; }

    public void setPlainTokens(boolean plainTokens) {
        this.plainTokens = plainTokens;
    }

    public void setMultivalue(boolean multivalue) { this.multivalue = multivalue; }

    /** Returns true if this is a multivalue field */
    public boolean isMultivalue() { return multivalue; }

    public void setFastSearch(boolean fastSearch) { this.fastSearch = fastSearch; }

    /** Returns true if this is an attribute with fastsearch turned on */
    public boolean isFastSearch() { return fastSearch; }

    public void setNormalize(boolean normalize) { this.normalize = normalize; }

    /** Returns true if the content of this index is normalized */
    public boolean getNormalize() { return normalize; }

    public boolean getLiteralBoost() { return literalBoost; }

    public void setLiteralBoost(boolean literalBoost) { this.literalBoost = literalBoost; }

    public void setNumerical(boolean numerical) { this.numerical = numerical; }

    public boolean isNumerical() { return numerical; }

    public void setString(boolean string) { this.string = string; }

    public boolean isString() { return string; }

    public void setPredicate(boolean isPredicate) { this.predicate = isPredicate; }

    public boolean isPredicate() { return predicate; }

    public long getPredicateUpperBound() { return predicateUpperBound; }

    public long getPredicateLowerBound() { return predicateLowerBound; }

    public boolean getPhraseSegmenting() { return phraseSegmenting; }

    public boolean setPhraseSegmenting(boolean phraseSegmenting) { return this.phraseSegmenting = phraseSegmenting; }

    /** Returns all the literal command strings given as arguments to addCommand in this instance */
    public List<String> allCommands() { return allCommands; }

    @Override
    public String toString() {
        return "index '" + getName() + "'";
    }

}