summaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java
diff options
context:
space:
mode:
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java')
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java123
1 files changed, 123 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java b/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java
new file mode 100644
index 00000000000..76d81429ab3
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.yql;
+
+class StringUnescaper {
+
+ private static boolean lookaheadOctal(String v, int point) {
+ return point < v.length() && "01234567".indexOf(v.charAt(point)) != -1;
+ }
+
+ public static String unquote(String token) {
+ if (null == token || !(token.startsWith("'") && token.endsWith("'") || token.startsWith("\"") && token.endsWith("\""))) {
+ return token;
+ }
+ // remove quotes from around string and unescape it
+ String value = token.substring(1, token.length() - 1);
+ // first quickly check to see if \ is present -- if not then there's no escaping and we're done
+ int idx = value.indexOf('\\');
+ if (idx == -1) {
+ return value;
+ }
+ // the output string will be no bigger than the input string, since escapes add characters
+ StringBuilder result = new StringBuilder(value.length());
+ int start = 0;
+ while (idx != -1) {
+ result.append(value.subSequence(start, idx));
+ start = idx + 1;
+ switch (value.charAt(start)) {
+ case 'b':
+ result.append('\b');
+ ++start;
+ break;
+ case 't':
+ result.append('\t');
+ ++start;
+ break;
+ case 'n':
+ result.append('\n');
+ ++start;
+ break;
+ case 'f':
+ result.append('\f');
+ ++start;
+ break;
+ case 'r':
+ result.append('\r');
+ ++start;
+ break;
+ case '"':
+ result.append('"');
+ ++start;
+ break;
+ case '\'':
+ result.append('\'');
+ ++start;
+ break;
+ case '\\':
+ result.append('\\');
+ ++start;
+ break;
+ case '/':
+ result.append('/');
+ ++start;
+ break;
+ case 'u':
+ // hex hex hex hex
+ ++start;
+ result.append((char) Integer.parseInt(value.substring(start, start + 4), 16));
+ start += 4;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ // octal escape
+ // 1, 2, or 3 bytes
+ // peek ahead
+ if (lookaheadOctal(value, start + 1) && lookaheadOctal(value, start + 2)) {
+ result.append((char) Integer.parseInt(value.substring(start, start + 3), 8));
+ start += 3;
+ } else if (lookaheadOctal(value, start + 1)) {
+ result.append((char) Integer.parseInt(value.substring(start, start + 2), 8));
+ start += 2;
+ } else {
+ result.append((char) Integer.parseInt(value.substring(start, start + 1), 8));
+ start += 1;
+ }
+ break;
+ default:
+ // the lexer should be ensuring there are no malformed escapes here, so we'll just blow up
+ throw new IllegalArgumentException("Unknown escape sequence in token: " + token);
+ }
+ idx = value.indexOf('\\', start);
+ }
+ result.append(value.subSequence(start, value.length()));
+ return result.toString();
+ }
+
+ public static String escape(String value) {
+ int idx = value.indexOf('\'');
+ if (idx == -1) {
+ return "\'" + value + "\'";
+
+ }
+ StringBuilder result = new StringBuilder(value.length() + 5);
+ result.append("'");
+ // right now we only escape ' on output
+ int start = 0;
+ while (idx != -1) {
+ result.append(value.subSequence(start, idx));
+ start = idx + 1;
+ result.append("\\'");
+ idx = value.indexOf('\\', start);
+ }
+ result.append(value.subSequence(start, value.length()));
+ result.append("'");
+ return result.toString();
+ }
+
+}