aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java
blob: 5eed51c9cebc153ffc7cbd7b0c8db40006396ee6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.yql;

class StringUnescaper {

    private static boolean lookaheadOctal(String v, int point) {
        return point < v.length() && "01234567".indexOf(v.charAt(point)) != -1;
    }

    public static String unquote(String token) {
        if (null == token || !(token.startsWith("'") && token.endsWith("'") || token.startsWith("\"") && token.endsWith("\""))) {
            return token;
        }
        // remove quotes from around string and unescape it
        String value = token.substring(1, token.length() - 1);
        // first quickly check to see if \ is present -- if not then there's no escaping and we're done
        int idx = value.indexOf('\\');
        if (idx == -1) {
            return value;
        }
        // the output string will be no bigger than the input string, since escapes add characters
        StringBuilder result = new StringBuilder(value.length());
        int start = 0;
        while (idx != -1) {
            result.append(value.subSequence(start, idx));
            start = idx + 1;
            switch (value.charAt(start)) {
                case 'b':
                    result.append('\b');
                    ++start;
                    break;
                case 't':
                    result.append('\t');
                    ++start;
                    break;
                case 'n':
                    result.append('\n');
                    ++start;
                    break;
                case 'f':
                    result.append('\f');
                    ++start;
                    break;
                case 'r':
                    result.append('\r');
                    ++start;
                    break;
                case '"':
                    result.append('"');
                    ++start;
                    break;
                case '\'':
                    result.append('\'');
                    ++start;
                    break;
                case '\\':
                    result.append('\\');
                    ++start;
                    break;
                case '/':
                    result.append('/');
                    ++start;
                    break;
                case 'u':
                    // hex hex hex hex
                    ++start;
                    result.append((char) Integer.parseInt(value.substring(start, start + 4), 16));
                    start += 4;
                    break;
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                    // octal escape
                    // 1, 2, or 3 bytes
                    // peek ahead
                    if (lookaheadOctal(value, start + 1) && lookaheadOctal(value, start + 2)) {
                        result.append((char) Integer.parseInt(value.substring(start, start + 3), 8));
                        start += 3;
                    } else if (lookaheadOctal(value, start + 1)) {
                        result.append((char) Integer.parseInt(value.substring(start, start + 2), 8));
                        start += 2;
                    } else {
                        result.append((char) Integer.parseInt(value.substring(start, start + 1), 8));
                        start += 1;
                    }
                    break;
                default:
                    // the lexer should be ensuring there are no malformed escapes here, so we'll just blow up
                    throw new IllegalArgumentException("Unknown escape sequence in token: " + token);
            }
            idx = value.indexOf('\\', start);
        }
        result.append(value.subSequence(start, value.length()));
        return result.toString();
    }

    public static String escape(String value) {
        int idx = value.indexOf('\'');
        if (idx == -1) {
            return "\'" + value + "\'";

        }
        StringBuilder result = new StringBuilder(value.length() + 5);
        result.append("'");
        // right now we only escape ' on output
        int start = 0;
        while (idx != -1) {
            result.append(value.subSequence(start, idx));
            start = idx + 1;
            result.append("\\'");
            idx = value.indexOf('\\', start);
        }
        result.append(value.subSequence(start, value.length()));
        result.append("'");
        return result.toString();
    }

}