aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/prelude/query/TermItem.java
blob: 9d74fdbefe53c07673befcbee9b78d9b9dda4b11 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import com.yahoo.prelude.query.textualrepresentation.Discloser;

import java.nio.ByteBuffer;
import java.util.Objects;

/**
 * Superclass of "leaf" conditions containing a single entity which is either matched in a field or not.
 *
 * @author bratseth
 * @author havardpe
 */
public abstract class TermItem extends SimpleIndexedItem implements BlockItem {

    /** Whether the term is from the raw query or is synthetic. */
    private boolean isFromQuery;

    /** Whether accent dropping should be performed */
    private boolean normalizable = true;

    /** The substring which is the raw form of the source of this token, or null if none. */
    private Substring origin;

    private SegmentingRule segmentingRule = SegmentingRule.LANGUAGE_DEFAULT;

    public TermItem() {
        this("");
    }

    public TermItem(String indexName) {
        this(indexName, false);
    }

    public TermItem(String indexName, boolean isFromQuery) {
        this(indexName, isFromQuery, null);
    }

    protected TermItem(String indexName, boolean isFromQuery, Substring origin) {
        setIndexName(indexName);
        this.isFromQuery = isFromQuery;
        this.origin = origin;
    }

    public final int encode(ByteBuffer buffer) {
        encodeThis(buffer);
        return 1;
    }

    /** Appends the index prefix if necessary and delegates to the subclass */
    protected final void appendBodyString(StringBuilder buffer) {
        appendIndexString(buffer);
        buffer.append(stringValue());
    }

    /**
     * Sets the value of this item from a string.
     *
     * @throws UnsupportedOperationException if this is not supported on this kind of item
     */
    public abstract void setValue(String value);

    /** Returns the raw form of the text leading to this term, exactly as received, including original casing */
    public abstract String getRawWord();

    /**
     * Returns the substring which is the raw form of the text leading to this token. This substring also contains
     * the superstring this substring was a part of, e.g the whole query string.
     * If this did not originate directly from a user string, this is null.
     */
    @Override
    public Substring getOrigin() { return origin; }

    /**
     * Whether this term is from the query or has been added by a searcher.
     * Only terms from the user should be modified by query rewriters which attempts to improve the
     * precision or recall of the user's query.
     */
    @Override
    public boolean isFromQuery() { return isFromQuery; }

    public void setFromQuery(boolean isFromQuery) {
        this.isFromQuery = isFromQuery;
    }

    @Override
    public abstract boolean isWords();

    /** Sets the origin of this */
    public void setOrigin(Substring origin) {
        this.origin = origin;
    }

    @Override
    public void disclose(Discloser discloser) {
        super.disclose(discloser);
        discloser.addProperty("origin", origin);
        discloser.setValue(stringValue());
    }

    @Override
    public int getTermCount() { return 1; }

    /** Returns whether accent removal is a meaningful and possible operation for this word. */
    public boolean isNormalizable() { return normalizable; }

    /**
     * Sets whether accent removal is a meaningful and possible operation for this word.
     *
     * @param normalizable set to true if accent removal can/should be performed
     */
    public void setNormalizable(boolean normalizable) { this.normalizable = normalizable; }

    @Override
    public SegmentingRule getSegmentingRule() { return segmentingRule; }

    public void setSegmentingRule(SegmentingRule segmentingRule) { this.segmentingRule = segmentingRule; }

    @Override
    public boolean equals(Object o) {
        if ( ! super.equals(o)) return false;
        var other = (TermItem)o;
        if ( this.isFromQuery != other.isFromQuery) return false;
        if ( this.normalizable != other.normalizable) return false;
        if ( this.segmentingRule != other.segmentingRule) return false;
        return true;
    }

    @Override
    public int hashCode() {
        return Objects.hash(super.hashCode(), isFromQuery, normalizable, segmentingRule);
    }

}