summaryrefslogtreecommitdiffstats
path: root/vespajlib/src/main/java/com/yahoo/text
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2020-10-02 14:34:44 +0200
committerMartin Polden <mpolden@mpolden.no>2020-10-02 14:34:44 +0200
commit58c464422049066d6908b9bcba1ebdf20e9c76a2 (patch)
treedc0bc4a2d79865aca1e4aed2d4b4e68a7357739d /vespajlib/src/main/java/com/yahoo/text
parent9c1cda0715260283b6a21081dc286ef86e3ab7b9 (diff)
Remove custom Utf8.toString implementation
`String::new` is now faster for both ASCII and Unicode strings: ``` Utf8::toString of ascii string took 132 ms String::new of ascii string took 59 ms Change = -55.30% Utf8::toString of unicode string took 410 ms String::new of unicode string took 280 ms Change = -31.71% ``` There's at least two reasons for this: * Java 9 introduced compact strings, which means that `String` is now backed by a byte array to reduce the memory footprint of ASCII strings. * Detection of Unicode strings may use HotSpot intrinsics.
Diffstat (limited to 'vespajlib/src/main/java/com/yahoo/text')
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Utf8.java25
1 files changed, 4 insertions, 21 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Utf8.java b/vespajlib/src/main/java/com/yahoo/text/Utf8.java
index cb8ca244fe2..a8a0adf1a7d 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Utf8.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Utf8.java
@@ -48,8 +48,7 @@ public final class Utf8 {
* @return String decoded from UTF-8
*/
public static String toString(byte[] data, int offset, int length) {
- String s = toStringAscii(data, offset, length);
- return s != null ? s : toString(ByteBuffer.wrap(data, offset, length));
+ return toString(ByteBuffer.wrap(data, offset, length));
}
/**
@@ -118,14 +117,14 @@ public final class Utf8 {
return utf8 != null ? utf8 : string.getBytes(StandardCharsets.UTF_8);
}
/**
- * Will try an optimistic approach to utf8 decoding.
+ * Decode a UTF-8 string.
*
* @param utf8 The string to encode.
* @return Utf8 encoded array
*/
public static String toString(byte [] utf8) {
- String s = toStringAscii(utf8, 0, utf8.length);
- return s != null ? s : new String(utf8, StandardCharsets.UTF_8);
+ // This is just wrapper for String::new now. Pre-Java 9 this had an more efficient approach for ASCII strings.
+ return new String(utf8, StandardCharsets.UTF_8);
}
/**
@@ -145,22 +144,6 @@ public final class Utf8 {
return utf8;
}
- private static String toStringAscii(byte [] b, int offset, int length) {
- if (length > 0) {
- char [] s = new char[length];
- for (int i=0; i < length; i++) {
- if (b[offset + i] >= 0) {
- s[i] = (char)b[offset+i];
- } else {
- return null;
- }
- }
- return new String(s);
- } else {
- return "";
- }
- }
-
/**
* Utility method as toBytes(String).
*