aboutsummaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2021-10-29 15:41:14 +0200
committerTor Brede Vekterli <vekterli@yahooinc.com>2021-11-01 15:15:10 +0100
commitbac8ab58a18d25db1871d3e933cb0cc018be5439 (patch)
treee1bdd02017a1bfdb1390442247e0a9351b748ec5 /vespajlib
parent1163edf3b7d94e9581a6670fc6b725e056e87023 (diff)
Use UTF-8 bytewise ordering for StringResultNode comparisons
The C++ backend uses `memcmp` ordering of UTF-8 strings for its `StringResultNode` instances and expects the container to feed it nodes in the same order. However, the Java code used `String` internally, which compares UTF-16 codepoints instead of UTF-8 octets. These may not agree on the ordering, particularly in the presence of surrogate pairs. Java `StringResultNode` now uses a raw UTF-8 byte array as its value backing, which has the added benefit that (de-)serializing is effectively a no-op. Some extra `String` roundtrip work needed now to support the various type-erased `ResultNode` functionality, but this is not expected to be called in a hot path.
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/abi-spec.json1
-rw-r--r--vespajlib/src/main/java/com/yahoo/vespa/objects/Identifiable.java9
2 files changed, 7 insertions, 3 deletions
diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json
index c426195bc37..5eeee267cf6 100644
--- a/vespajlib/abi-spec.json
+++ b/vespajlib/abi-spec.json
@@ -3428,6 +3428,7 @@
"protected static com.yahoo.vespa.objects.Identifiable deserializeOptional(com.yahoo.vespa.objects.Deserializer)",
"protected static boolean equals(java.lang.Object, java.lang.Object)",
"public void visitMembers(com.yahoo.vespa.objects.ObjectVisitor)",
+ "protected static byte[] getRawUtf8Bytes(com.yahoo.vespa.objects.Deserializer)",
"protected java.lang.String getUtf8(com.yahoo.vespa.objects.Deserializer)",
"protected void putUtf8(com.yahoo.vespa.objects.Serializer, java.lang.String)",
"public bridge synthetic java.lang.Object clone()"
diff --git a/vespajlib/src/main/java/com/yahoo/vespa/objects/Identifiable.java b/vespajlib/src/main/java/com/yahoo/vespa/objects/Identifiable.java
index 8c11a0cbda1..947b312ac3b 100644
--- a/vespajlib/src/main/java/com/yahoo/vespa/objects/Identifiable.java
+++ b/vespajlib/src/main/java/com/yahoo/vespa/objects/Identifiable.java
@@ -354,10 +354,13 @@ public class Identifiable extends Selectable implements Cloneable {
}
}
- protected String getUtf8(Deserializer buf) {
+ protected static byte[] getRawUtf8Bytes(Deserializer buf) {
int len = buf.getInt(null);
- byte[] arr = buf.getBytes(null, len);
- return Utf8.toString(arr);
+ return buf.getBytes(null, len);
+ }
+
+ protected String getUtf8(Deserializer buf) {
+ return Utf8.toString(getRawUtf8Bytes(buf));
}
protected void putUtf8(Serializer buf, String val) {