diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-10-02 14:34:44 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2020-10-02 14:34:44 +0200 |
commit | 58c464422049066d6908b9bcba1ebdf20e9c76a2 (patch) | |
tree | dc0bc4a2d79865aca1e4aed2d4b4e68a7357739d /vespajlib/src/main/java/com/yahoo/text | |
parent | 9c1cda0715260283b6a21081dc286ef86e3ab7b9 (diff) |
Remove custom Utf8.toString implementation
`String::new` is now faster for both ASCII and Unicode strings:
```
Utf8::toString of ascii string took 132 ms
String::new of ascii string took 59 ms
Change = -55.30%
Utf8::toString of unicode string took 410 ms
String::new of unicode string took 280 ms
Change = -31.71%
```
There's at least two reasons for this:
* Java 9 introduced compact strings, which means that `String` is now backed by
a byte array to reduce the memory footprint of ASCII strings.
* Detection of Unicode strings may use HotSpot intrinsics.
Diffstat (limited to 'vespajlib/src/main/java/com/yahoo/text')
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/Utf8.java | 25 |
1 files changed, 4 insertions, 21 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Utf8.java b/vespajlib/src/main/java/com/yahoo/text/Utf8.java index cb8ca244fe2..a8a0adf1a7d 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Utf8.java +++ b/vespajlib/src/main/java/com/yahoo/text/Utf8.java @@ -48,8 +48,7 @@ public final class Utf8 { * @return String decoded from UTF-8 */ public static String toString(byte[] data, int offset, int length) { - String s = toStringAscii(data, offset, length); - return s != null ? s : toString(ByteBuffer.wrap(data, offset, length)); + return toString(ByteBuffer.wrap(data, offset, length)); } /** @@ -118,14 +117,14 @@ public final class Utf8 { return utf8 != null ? utf8 : string.getBytes(StandardCharsets.UTF_8); } /** - * Will try an optimistic approach to utf8 decoding. + * Decode a UTF-8 string. * * @param utf8 The string to encode. * @return Utf8 encoded array */ public static String toString(byte [] utf8) { - String s = toStringAscii(utf8, 0, utf8.length); - return s != null ? s : new String(utf8, StandardCharsets.UTF_8); + // This is just wrapper for String::new now. Pre-Java 9 this had an more efficient approach for ASCII strings. + return new String(utf8, StandardCharsets.UTF_8); } /** @@ -145,22 +144,6 @@ public final class Utf8 { return utf8; } - private static String toStringAscii(byte [] b, int offset, int length) { - if (length > 0) { - char [] s = new char[length]; - for (int i=0; i < length; i++) { - if (b[offset + i] >= 0) { - s[i] = (char)b[offset+i]; - } else { - return null; - } - } - return new String(s); - } else { - return ""; - } - } - /** * Utility method as toBytes(String). * |