summaryrefslogtreecommitdiffstats
path: root/vespajlib/src/main/java/com/yahoo/text/Ascii.java
diff options
context:
space:
mode:
Diffstat (limited to 'vespajlib/src/main/java/com/yahoo/text/Ascii.java')
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Ascii.java226
1 files changed, 226 insertions, 0 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Ascii.java b/vespajlib/src/main/java/com/yahoo/text/Ascii.java
new file mode 100644
index 00000000000..552a46fd36f
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/text/Ascii.java
@@ -0,0 +1,226 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.text;
+
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen Hult</a>
+ */
+public class Ascii {
+
+ public final static char ESCAPE_CHAR = '\\';
+
+ public static String encode(String str, Charset charset, int... requiresEscape) {
+ return newEncoder(charset, requiresEscape).encode(str);
+ }
+
+ public static String decode(String str, Charset charset) {
+ return newDecoder(charset).decode(str);
+ }
+
+ public static Encoder newEncoder(Charset charset, int... requiresEscape) {
+ switch (requiresEscape.length) {
+ case 0:
+ return new Encoder(charset, new EmptyPredicate());
+ case 1:
+ return new Encoder(charset, new SingletonPredicate(requiresEscape[0]));
+ default:
+ return new Encoder(charset, new ArrayPredicate(requiresEscape));
+ }
+ }
+
+ public static Decoder newDecoder(Charset charset) {
+ return new Decoder(charset);
+ }
+
+ public static class Encoder {
+
+ private final Charset charset;
+ private final EncodePredicate predicate;
+
+ private Encoder(Charset charset, EncodePredicate predicate) {
+ this.charset = charset;
+ this.predicate = predicate;
+ }
+
+ public String encode(String str) {
+ StringBuilder out = new StringBuilder();
+ for (int c : new CodePointSequence(str)) {
+ if (c < 0x20 || c >= 0x7F || c == ESCAPE_CHAR || predicate.requiresEscape(c)) {
+ escape(c, out);
+ } else {
+ out.appendCodePoint(c);
+ }
+ }
+ return out.toString();
+ }
+
+ private void escape(int c, StringBuilder out) {
+ switch (c) {
+ case ESCAPE_CHAR:
+ out.append(ESCAPE_CHAR).append(ESCAPE_CHAR);
+ break;
+ case '\f':
+ out.append(ESCAPE_CHAR).append("f");
+ break;
+ case '\n':
+ out.append(ESCAPE_CHAR).append("n");
+ break;
+ case '\r':
+ out.append(ESCAPE_CHAR).append("r");
+ break;
+ case '\t':
+ out.append(ESCAPE_CHAR).append("t");
+ break;
+ default:
+ ByteBuffer buf = charset.encode(CharBuffer.wrap(Character.toChars(c)));
+ while (buf.hasRemaining()) {
+ out.append(ESCAPE_CHAR).append(String.format("x%02X", buf.get()));
+ }
+ break;
+ }
+ }
+ }
+
+ public static class Decoder {
+
+ private final Charset charset;
+
+ private Decoder(Charset charset) {
+ this.charset = charset;
+ }
+
+ public String decode(String str) {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ for (Iterator<Integer> it = new CodePointIterator(str); it.hasNext(); ) {
+ int c = it.next();
+ if (c == ESCAPE_CHAR) {
+ unescape(it, out);
+ } else {
+ ByteBuffer buf = charset.encode(CharBuffer.wrap(Character.toChars(c)));
+ while (buf.hasRemaining()) {
+ out.write(buf.get());
+ }
+ }
+ }
+ return new String(out.toByteArray(), charset);
+ }
+
+ private void unescape(Iterator<Integer> it, ByteArrayOutputStream out) {
+ int c = it.next();
+ switch (c) {
+ case 'f':
+ out.write('\f');
+ break;
+ case 'n':
+ out.write('\n');
+ break;
+ case 'r':
+ out.write('\r');
+ break;
+ case 't':
+ out.write('\t');
+ break;
+ case 'x':
+ int x1 = it.next();
+ int x2 = it.next();
+ out.write((Character.digit(x1, 16) << 4) +
+ (Character.digit(x2, 16)));
+ break;
+ default:
+ out.write(c);
+ break;
+ }
+ }
+ }
+
+ private static interface EncodePredicate {
+
+ boolean requiresEscape(int codePoint);
+ }
+
+ private static class EmptyPredicate implements EncodePredicate {
+
+ @Override
+ public boolean requiresEscape(int codePoint) {
+ return false;
+ }
+ }
+
+ private static class SingletonPredicate implements EncodePredicate {
+
+ final int requiresEscape;
+
+ private SingletonPredicate(int requiresEscape) {
+ this.requiresEscape = requiresEscape;
+ }
+
+ @Override
+ public boolean requiresEscape(int codePoint) {
+ return codePoint == requiresEscape;
+ }
+ }
+
+ private static class ArrayPredicate implements EncodePredicate {
+
+ final Set<Integer> requiresEscape = new TreeSet<>();
+
+ private ArrayPredicate(int[] requiresEscape) {
+ for (int codePoint : requiresEscape) {
+ this.requiresEscape.add(codePoint);
+ }
+ }
+
+ @Override
+ public boolean requiresEscape(int codePoint) {
+ return requiresEscape.contains(codePoint);
+ }
+ }
+
+ private static class CodePointSequence implements Iterable<Integer> {
+
+ final String str;
+
+ CodePointSequence(String str) {
+ this.str = str;
+ }
+
+ @Override
+ public Iterator<Integer> iterator() {
+ return new CodePointIterator(str);
+ }
+ }
+
+ private static class CodePointIterator implements Iterator<Integer> {
+
+ final String str;
+ int idx = 0;
+
+ CodePointIterator(String str) {
+ this.str = str;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return idx < str.length();
+ }
+
+ @Override
+ public Integer next() {
+ int c = str.codePointAt(idx);
+ idx += Character.charCount(c);
+ return c;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+}