aboutsummaryrefslogtreecommitdiffstats
path: root/documentapi
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@vespa.ai>2024-02-23 13:22:49 +0000
committerTor Brede Vekterli <vekterli@vespa.ai>2024-02-23 13:22:49 +0000
commite4870e5b0e1e9c807ea3ce30630acfe252e879fa (patch)
tree76a7d3a8673f6709cf1c88ac46594fbd5c38d18c /documentapi
parent10267c6347197b5e9731d0a1e9169d74608178f7 (diff)
Optimize Java DocumentProtocol encoding memory usage
This commit allows protocol implementations to directly construct and return a payload byte array that contains both the message identifier and the serialized message itself _without_ having to go through a `DocumentSerializer` indirection. A new method has been added to the `RoutableFactory` whose default implementation defers to the legacy `DocumentSerializer`-accepting method. This means the v6 protocol has the same semantics and performance characteristics as before. The new Protobuf protocol implementation now allocates the result byte array once with the correct size and writes both the message ID header and the protobuf data into this. This has the following performance benefits for the new protocol: - Reduces the number of buffer _allocations_ from 3 to 1. - Avoids 2 buffer _copies_ since we now directly allocate and write into the resulting array. - Encoding allocates the exact number of required bytes instead of always allocating 8K at a minimum. This also avoids the need for growing (by realloc and copy) the buffer during encoding.
Diffstat (limited to 'documentapi')
-rw-r--r--documentapi/abi-spec.json1
-rw-r--r--documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactories80.java28
-rwxr-xr-xdocumentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactory.java28
-rwxr-xr-xdocumentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableRepository.java9
4 files changed, 52 insertions, 14 deletions
diff --git a/documentapi/abi-spec.json b/documentapi/abi-spec.json
index d00c89ae737..02d764deab8 100644
--- a/documentapi/abi-spec.json
+++ b/documentapi/abi-spec.json
@@ -3015,6 +3015,7 @@
],
"methods" : [
"public abstract boolean encode(com.yahoo.messagebus.Routable, com.yahoo.document.serialization.DocumentSerializer)",
+ "public byte[] encode(int, com.yahoo.messagebus.Routable)",
"public abstract com.yahoo.messagebus.Routable decode(com.yahoo.document.serialization.DocumentDeserializer)"
],
"fields" : [ ]
diff --git a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactories80.java b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactories80.java
index 83392679f11..4712f6d2442 100644
--- a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactories80.java
+++ b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactories80.java
@@ -57,17 +57,31 @@ abstract class RoutableFactories80 {
}
@Override
- public boolean encode(Routable obj, DocumentSerializer out) {
+ public byte[] encode(int msgType, Routable obj) {
try {
var protoMsg = encoderFn.apply(apiClass.cast(obj));
- // TODO avoid this buffer indirection by directly exposing an OutputStream to write into...!
- // ... or at the very least have a way to preallocate buffer output of protoMsg.getSerializedSize() bytes!
- out.getBuf().put(protoMsg.toByteArray());
- } catch (RuntimeException e) {
+ int protoSize = protoMsg.getSerializedSize();
+ // The message payload contains a 4-byte header int which specifies the type of the message
+ // that follows. We want to write this header and the subsequence message bytes using a single
+ // allocation and without unneeded copying, so we create one array for both purposes and encode
+ // directly into it. Aside from the header, this is pretty much a mirror image of what the
+ // toByteArray() method on Protobuf message objects already does.
+ var buf = new byte[4 + protoSize];
+ ByteBuffer.wrap(buf).putInt(msgType); // In network order (default setting)
+ var protoStream = CodedOutputStream.newInstance(buf, 4, protoSize);
+ protoMsg.writeTo(protoStream); // Writing straight to array, no need to flush
+ protoStream.checkNoSpaceLeft();
+ return buf;
+ } catch (IOException | RuntimeException e) {
logCodecError("encoding", e);
- return false;
+ return null;
}
- return true;
+ }
+
+ @Override
+ public boolean encode(Routable obj, DocumentSerializer out) {
+ // Legacy encode; not supported
+ return false;
}
@Override
diff --git a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactory.java b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactory.java
index e98c9ab3a40..c635aa1581e 100755
--- a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactory.java
+++ b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactory.java
@@ -3,6 +3,8 @@ package com.yahoo.documentapi.messagebus.protocol;
import com.yahoo.document.serialization.DocumentDeserializer;
import com.yahoo.document.serialization.DocumentSerializer;
+import com.yahoo.document.serialization.DocumentSerializerFactory;
+import com.yahoo.io.GrowableByteBuffer;
import com.yahoo.messagebus.Routable;
/**
@@ -31,6 +33,32 @@ public interface RoutableFactory {
boolean encode(Routable obj, DocumentSerializer out);
/**
+ * <p>Encode a message type and object payload to a byte array. This is an alternative,
+ * optional method to {@link #encode(Routable, DocumentSerializer)}, but which defers all
+ * buffer management to the callee. This allows protocol implementations to make more
+ * efficient use of memory, as they do not have to deal with DocumentSerializer indirections.</p>
+ *
+ * <p>Implementations <strong>must</strong> ensure that the first 4 bytes of the returned
+ * byte array contain a 32-bit integer (in network order) equal to the provided msgType value.</p>
+ *
+ * @param msgType A positive integer indicating the concrete message type of obj.
+ * @param obj The message to encode.
+ * @return A byte buffer encapsulating the message type and the serialized representation
+ * of obj, or null if encoding failed.
+ */
+ default byte[] encode(int msgType, Routable obj) {
+ var out = DocumentSerializerFactory.createHead(new GrowableByteBuffer(8192));
+ out.putInt(null, msgType);
+ if (!encode(obj, out)) {
+ return null;
+ }
+ byte[] ret = new byte[out.getBuf().position()];
+ out.getBuf().rewind();
+ out.getBuf().get(ret);
+ return ret;
+ }
+
+ /**
* <p>This method decodes the given byte buffer to a routable.</p> <p>Return false to signal failure.</p> <p>This
* method is NOT exception safe.</p>
*
diff --git a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableRepository.java b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableRepository.java
index 47117471615..56d23d36811 100755
--- a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableRepository.java
+++ b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableRepository.java
@@ -93,17 +93,12 @@ final class RoutableRepository {
log.log(Level.SEVERE,"Can not encode routable type " + type + " (version " + version + "). Only major version 5 and up supported.");
return new byte[0];
}
- DocumentSerializer out= DocumentSerializerFactory.createHead(new GrowableByteBuffer(8192));
-
- out.putInt(null, type);
- if (!factory.encode(obj, out)) {
+ byte[] ret = factory.encode(type, obj);
+ if (ret == null) {
log.log(Level.SEVERE, "Routable factory " + factory.getClass().getName() + " failed to serialize " +
"routable of type " + type + " (version " + version + ").");
return new byte[0];
}
- byte[] ret = new byte[out.getBuf().position()];
- out.getBuf().rewind();
- out.getBuf().get(ret);
return ret;
}