aboutsummaryrefslogtreecommitdiffstats
path: root/document/src/main/java/com/yahoo/document/idstring/IdString.java
blob: 763a03bfaea3ba1b6f964c214ae4ea1626395086 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.document.idstring;

import com.yahoo.api.annotations.Beta;
import com.yahoo.text.Text;
import com.yahoo.text.Utf8String;

/**
 * To be used with DocumentId constructor.
 *
 * @author Einar M R Rosenvinge
 */
public abstract class IdString {

    public boolean hasDocType() {
        return false;
    }

    public String getDocType() {
        return "";
    }

    public boolean hasGroup() {
        return false;
    }

    public boolean hasNumber() {
        return false;
    }

    public long getNumber() {
        return 0;
    }

    public String getGroup() {
        return "";
    }

    public enum Scheme { id }
    private final Scheme scheme;
    private final String namespace;
    private final String namespaceSpecific;
    private Utf8String cache;
    // This max unsigned 16 bit integer - 1 as the offset will be length + 1
    static final int MAX_LENGTH_EXCEPT_NAMESPACE_SPECIFIC = 0xff00;
    public static final int MAX_LENGTH = 0x10000;

    /**
     * Creates a IdString based on the given document id string.
     *
     * The document id string can only contain text characters.
     */
    public static IdString createIdString(String id) {
        if (id.length() > MAX_LENGTH) {
            throw new IllegalArgumentException("Document id length " + id.length() + " is longer than max length of " + MAX_LENGTH);
        }
        validateTextString(id);
        return parseAndCreate(id);
    }

    /**
     * Creates a IdString based on the given document id string. This is a less strict variant
     * for creating 'illegal' document ids for documents already fed. Only use when strictly needed.
     */
    @Beta
    public static IdString createIdStringLessStrict(String id) {
        validateTextString(id);
        return parseAndCreate(id);
    }

    /**
     * Creates a IdString based on the given serialized document id string.
     *
     * The document id string can not contain 0x0 byte characters.
     */
    public static IdString createFromSerialized(String id) {
        validateNoZeroBytes(id);
        return parseAndCreate(id);
    }

    private static void validateTextString(String id) {
        if ( ! Text.isValidTextString(id)) {
            throw new IllegalArgumentException("Unparseable id '" + id + "': Contains illegal code point 0x" +
                    Integer.toHexString(Text.validateTextString(id).getAsInt()).toUpperCase());
        }
    }

    private static void validateNoZeroBytes(String id) {
        for (int i = 0; i < id.length(); i++) {
            if (id.codePointAt(i) == 0) {
                throw new IllegalArgumentException("Unparseable id '" + id + "': Contains illegal zero byte code point");
            }
        }
    }

    private static IdString parseAndCreate(String id) {
        String namespace;

        int schemePos = id.indexOf(":");
        if (schemePos < 0) {
            throw new IllegalArgumentException("Unparseable id '" + id + "': Scheme missing");
        }

        //Find scheme
        String schemeStr = id.substring(0, schemePos);
        int currPos = schemePos + 1;

        //Find namespace
        int colonPos = id.indexOf(":", currPos);
        if (colonPos < 0) {
            throw new IllegalArgumentException("Unparseable id '" + id + "': Namespace missing");
        } else {
            namespace = id.substring(currPos, colonPos);

            if (namespace.length() == 0) {
                throw new IllegalArgumentException("Unparseable id '" + id + "': Namespace must be non-empty");
            }

            currPos = colonPos + 1;
        }

        if (schemeStr.equals("id")) {
            colonPos = id.indexOf(":", currPos);
            if (colonPos < 0) {
                throw new IllegalArgumentException("Unparseable id '" + id + "': Document type missing");
            }
            String type = id.substring(currPos, colonPos);
            currPos = colonPos + 1;
            colonPos = id.indexOf(":", currPos);
            if (colonPos < 0) {
                throw new IllegalArgumentException("Unparseable id '" + id + "': Key/value section missing");
            } else if (colonPos >= MAX_LENGTH_EXCEPT_NAMESPACE_SPECIFIC) {
                throw new IllegalArgumentException("Document id prior to the namespace specific part, " + colonPos + ", is longer than " + MAX_LENGTH_EXCEPT_NAMESPACE_SPECIFIC + " id: " + id);
            }
            String keyValues = id.substring(currPos, colonPos);

            currPos = colonPos + 1;
            return new IdIdString(namespace, type, keyValues, id.substring(currPos));
        } else {
            throw new IllegalArgumentException("Unknown id scheme '" + schemeStr + "'");
        }
    }

    protected IdString(Scheme scheme, String namespace, String namespaceSpecific) {
        this.scheme = scheme;
        this.namespace = namespace;
        this.namespaceSpecific = namespaceSpecific;
    }

    public Scheme getType() { return scheme; }

    public String getNamespace() { return namespace; }
    public String getNamespaceSpecific() { return namespaceSpecific; }
    public abstract long getLocation();
    public String getSchemeParameters() { return ""; }
    public abstract String getSchemeSpecific();

    public boolean equals(Object o) {
        return (o instanceof IdString && o.toString().equals(toString()));
    }

    public int hashCode() {
        return toString().hashCode();
    }

    private Utf8String createToString() {
        return new Utf8String(scheme.toString() + getSchemeParameters() + ':' + namespace + ':' + getSchemeSpecific() + namespaceSpecific);
    }
    public String toString() {
        if (cache == null) {
            cache = createToString();
        }
        return cache.toString();
    }
    public Utf8String toUtf8() {
        if (cache == null) {
            cache = createToString();
        }
        return cache;
    }

}