1 files changed, 123 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java b/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java
new file mode 100644
index 00000000000..76d81429ab3
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.yql;
+
+class StringUnescaper {
+
+    private static boolean lookaheadOctal(String v, int point) {
+        return point < v.length() && "01234567".indexOf(v.charAt(point)) != -1;
+    }
+
+    public static String unquote(String token) {
+        if (null == token || !(token.startsWith("'") && token.endsWith("'") || token.startsWith("\"") && token.endsWith("\""))) {
+            return token;
+        }
+        // remove quotes from around string and unescape it
+        String value = token.substring(1, token.length() - 1);
+        // first quickly check to see if \ is present -- if not then there's no escaping and we're done
+        int idx = value.indexOf('\\');
+        if (idx == -1) {
+            return value;
+        }
+        // the output string will be no bigger than the input string, since escapes add characters
+        StringBuilder result = new StringBuilder(value.length());
+        int start = 0;
+        while (idx != -1) {
+            result.append(value.subSequence(start, idx));
+            start = idx + 1;
+            switch (value.charAt(start)) {
+                case 'b':
+                    result.append('\b');
+                    ++start;
+                    break;
+                case 't':
+                    result.append('\t');
+                    ++start;
+                    break;
+                case 'n':
+                    result.append('\n');
+                    ++start;
+                    break;
+                case 'f':
+                    result.append('\f');
+                    ++start;
+                    break;
+                case 'r':
+                    result.append('\r');
+                    ++start;
+                    break;
+                case '"':
+                    result.append('"');
+                    ++start;
+                    break;
+                case '\'':
+                    result.append('\'');
+                    ++start;
+                    break;
+                case '\\':
+                    result.append('\\');
+                    ++start;
+                    break;
+                case '/':
+                    result.append('/');
+                    ++start;
+                    break;
+                case 'u':
+                    // hex hex hex hex
+                    ++start;
+                    result.append((char) Integer.parseInt(value.substring(start, start + 4), 16));
+                    start += 4;
+                    break;
+                case '0':
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '6':
+                case '7':
+                    // octal escape
+                    // 1, 2, or 3 bytes
+                    // peek ahead
+                    if (lookaheadOctal(value, start + 1) && lookaheadOctal(value, start + 2)) {
+                        result.append((char) Integer.parseInt(value.substring(start, start + 3), 8));
+                        start += 3;
+                    } else if (lookaheadOctal(value, start + 1)) {
+                        result.append((char) Integer.parseInt(value.substring(start, start + 2), 8));
+                        start += 2;
+                    } else {
+                        result.append((char) Integer.parseInt(value.substring(start, start + 1), 8));
+                        start += 1;
+                    }
+                    break;
+                default:
+                    // the lexer should be ensuring there are no malformed escapes here, so we'll just blow up
+                    throw new IllegalArgumentException("Unknown escape sequence in token: " + token);
+            }
+            idx = value.indexOf('\\', start);
+        }
+        result.append(value.subSequence(start, value.length()));
+        return result.toString();
+    }
+
+    public static String escape(String value) {
+        int idx = value.indexOf('\'');
+        if (idx == -1) {
+            return "\'" + value + "\'";
+
+        }
+        StringBuilder result = new StringBuilder(value.length() + 5);
+        result.append("'");
+        // right now we only escape ' on output
+        int start = 0;
+        while (idx != -1) {
+            result.append(value.subSequence(start, idx));
+            start = idx + 1;
+            result.append("\\'");
+            idx = value.indexOf('\\', start);
+        }
+        result.append(value.subSequence(start, value.length()));
+        result.append("'");
+        return result.toString();
+    }
+
+}