summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-10-03 20:57:35 +0200
committerGitHub <noreply@github.com>2020-10-03 20:57:35 +0200
commit78ed46a404d8908fd0343dc8372b9b514f67d678 (patch)
treef5340683902c60e1684956de6af6cbdabe2b4da1
parent6c1766c93bc42e2d4129e73a85ac510498893b24 (diff)
parent58c464422049066d6908b9bcba1ebdf20e9c76a2 (diff)
Merge pull request #14682 from vespa-engine/mpolden/faster-tostring
Remove custom Utf8.toString implementation
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Utf8.java25
-rw-r--r--vespajlib/src/test/java/com/yahoo/text/Utf8TestCase.java36
2 files changed, 40 insertions, 21 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Utf8.java b/vespajlib/src/main/java/com/yahoo/text/Utf8.java
index cb8ca244fe2..a8a0adf1a7d 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Utf8.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Utf8.java
@@ -48,8 +48,7 @@ public final class Utf8 {
* @return String decoded from UTF-8
*/
public static String toString(byte[] data, int offset, int length) {
- String s = toStringAscii(data, offset, length);
- return s != null ? s : toString(ByteBuffer.wrap(data, offset, length));
+ return toString(ByteBuffer.wrap(data, offset, length));
}
/**
@@ -118,14 +117,14 @@ public final class Utf8 {
return utf8 != null ? utf8 : string.getBytes(StandardCharsets.UTF_8);
}
/**
- * Will try an optimistic approach to utf8 decoding.
+ * Decode a UTF-8 string.
*
* @param utf8 The string to encode.
* @return Utf8 encoded array
*/
public static String toString(byte [] utf8) {
- String s = toStringAscii(utf8, 0, utf8.length);
- return s != null ? s : new String(utf8, StandardCharsets.UTF_8);
+ // This is just wrapper for String::new now. Pre-Java 9 this had an more efficient approach for ASCII strings.
+ return new String(utf8, StandardCharsets.UTF_8);
}
/**
@@ -145,22 +144,6 @@ public final class Utf8 {
return utf8;
}
- private static String toStringAscii(byte [] b, int offset, int length) {
- if (length > 0) {
- char [] s = new char[length];
- for (int i=0; i < length; i++) {
- if (b[offset + i] >= 0) {
- s[i] = (char)b[offset+i];
- } else {
- return null;
- }
- }
- return new String(s);
- } else {
- return "";
- }
- }
-
/**
* Utility method as toBytes(String).
*
diff --git a/vespajlib/src/test/java/com/yahoo/text/Utf8TestCase.java b/vespajlib/src/test/java/com/yahoo/text/Utf8TestCase.java
index 79437af30b9..2ffedee6a17 100644
--- a/vespajlib/src/test/java/com/yahoo/text/Utf8TestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/text/Utf8TestCase.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.text;
+import com.google.common.collect.ImmutableMap;
import org.junit.Ignore;
import org.junit.Test;
@@ -8,7 +9,9 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.CharsetEncoder;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
+import java.util.function.Function;
import static com.yahoo.text.Lowercase.toLowerCase;
import static com.yahoo.text.Utf8.calculateBytePositions;
@@ -551,4 +554,37 @@ public class Utf8TestCase {
assertArrayEquals(stringAsUtf8, handEncoded);
}
+ @Test
+ @Ignore
+ public void benchmarkDecoding() {
+ String ascii = "This is just sort of random mix.";
+ String unicode = "This is just sort of random mix. \u5370\u57df\u60c5\u5831\u53EF\u4EE5\u6709x\u00e9\u00e8";
+ int iterations = 100_000; // Use 100_000+ for benchmarking
+
+ ImmutableMap.of("ascii", ascii, "unicode", unicode).forEach((type, s) -> {
+ long time1 = benchmarkDecoding(Utf8::toString, s, iterations);
+ System.out.printf("Utf8::toString of %s string took %d ms\n", type, time1);
+ long time2 = benchmarkDecoding((b) -> new String(b, StandardCharsets.UTF_8), s, iterations);
+ System.out.printf("String::new of %s string took %d ms\n", type, time2);
+ double change = ((double) time2 / (double) time1) - 1;
+ System.out.printf("Change = %.02f%%\n", change * 100);
+ });
+ }
+
+ private String decode(Function<byte[], String> stringFunction, String s, int iterations) {
+ String res = null;
+ for (int i = 0; i < iterations; i++) {
+ res = stringFunction.apply((s + i).getBytes());
+ }
+ return res;
+ }
+
+ private long benchmarkDecoding(Function<byte[], String> stringFunction, String s, int iterations) {
+ decode(stringFunction, s, iterations); // Warmup
+ long start = System.currentTimeMillis();
+ decode(stringFunction, s, iterations);
+ long end = System.currentTimeMillis();
+ return end - start;
+ }
+
}