diff options
author | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-02-23 13:22:49 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-02-23 13:22:49 +0000 |
commit | e4870e5b0e1e9c807ea3ce30630acfe252e879fa (patch) | |
tree | 76a7d3a8673f6709cf1c88ac46594fbd5c38d18c /documentapi | |
parent | 10267c6347197b5e9731d0a1e9169d74608178f7 (diff) |
Optimize Java DocumentProtocol encoding memory usage
This commit allows protocol implementations to directly construct
and return a payload byte array that contains both the message
identifier and the serialized message itself _without_ having to
go through a `DocumentSerializer` indirection.
A new method has been added to the `RoutableFactory` whose default
implementation defers to the legacy `DocumentSerializer`-accepting
method. This means the v6 protocol has the same semantics and
performance characteristics as before.
The new Protobuf protocol implementation now allocates the result
byte array once with the correct size and writes both the message
ID header and the protobuf data into this.
This has the following performance benefits for the new protocol:
- Reduces the number of buffer _allocations_ from 3 to 1.
- Avoids 2 buffer _copies_ since we now directly allocate and write
into the resulting array.
- Encoding allocates the exact number of required bytes instead of
always allocating 8K at a minimum. This also avoids the need
for growing (by realloc and copy) the buffer during encoding.
Diffstat (limited to 'documentapi')
4 files changed, 52 insertions, 14 deletions
diff --git a/documentapi/abi-spec.json b/documentapi/abi-spec.json index d00c89ae737..02d764deab8 100644 --- a/documentapi/abi-spec.json +++ b/documentapi/abi-spec.json @@ -3015,6 +3015,7 @@ ], "methods" : [ "public abstract boolean encode(com.yahoo.messagebus.Routable, com.yahoo.document.serialization.DocumentSerializer)", + "public byte[] encode(int, com.yahoo.messagebus.Routable)", "public abstract com.yahoo.messagebus.Routable decode(com.yahoo.document.serialization.DocumentDeserializer)" ], "fields" : [ ] diff --git a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactories80.java b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactories80.java index 83392679f11..4712f6d2442 100644 --- a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactories80.java +++ b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactories80.java @@ -57,17 +57,31 @@ abstract class RoutableFactories80 { } @Override - public boolean encode(Routable obj, DocumentSerializer out) { + public byte[] encode(int msgType, Routable obj) { try { var protoMsg = encoderFn.apply(apiClass.cast(obj)); - // TODO avoid this buffer indirection by directly exposing an OutputStream to write into...! - // ... or at the very least have a way to preallocate buffer output of protoMsg.getSerializedSize() bytes! - out.getBuf().put(protoMsg.toByteArray()); - } catch (RuntimeException e) { + int protoSize = protoMsg.getSerializedSize(); + // The message payload contains a 4-byte header int which specifies the type of the message + // that follows. We want to write this header and the subsequence message bytes using a single + // allocation and without unneeded copying, so we create one array for both purposes and encode + // directly into it. Aside from the header, this is pretty much a mirror image of what the + // toByteArray() method on Protobuf message objects already does. + var buf = new byte[4 + protoSize]; + ByteBuffer.wrap(buf).putInt(msgType); // In network order (default setting) + var protoStream = CodedOutputStream.newInstance(buf, 4, protoSize); + protoMsg.writeTo(protoStream); // Writing straight to array, no need to flush + protoStream.checkNoSpaceLeft(); + return buf; + } catch (IOException | RuntimeException e) { logCodecError("encoding", e); - return false; + return null; } - return true; + } + + @Override + public boolean encode(Routable obj, DocumentSerializer out) { + // Legacy encode; not supported + return false; } @Override diff --git a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactory.java b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactory.java index e98c9ab3a40..c635aa1581e 100755 --- a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactory.java +++ b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableFactory.java @@ -3,6 +3,8 @@ package com.yahoo.documentapi.messagebus.protocol; import com.yahoo.document.serialization.DocumentDeserializer; import com.yahoo.document.serialization.DocumentSerializer; +import com.yahoo.document.serialization.DocumentSerializerFactory; +import com.yahoo.io.GrowableByteBuffer; import com.yahoo.messagebus.Routable; /** @@ -31,6 +33,32 @@ public interface RoutableFactory { boolean encode(Routable obj, DocumentSerializer out); /** + * <p>Encode a message type and object payload to a byte array. This is an alternative, + * optional method to {@link #encode(Routable, DocumentSerializer)}, but which defers all + * buffer management to the callee. This allows protocol implementations to make more + * efficient use of memory, as they do not have to deal with DocumentSerializer indirections.</p> + * + * <p>Implementations <strong>must</strong> ensure that the first 4 bytes of the returned + * byte array contain a 32-bit integer (in network order) equal to the provided msgType value.</p> + * + * @param msgType A positive integer indicating the concrete message type of obj. + * @param obj The message to encode. + * @return A byte buffer encapsulating the message type and the serialized representation + * of obj, or null if encoding failed. + */ + default byte[] encode(int msgType, Routable obj) { + var out = DocumentSerializerFactory.createHead(new GrowableByteBuffer(8192)); + out.putInt(null, msgType); + if (!encode(obj, out)) { + return null; + } + byte[] ret = new byte[out.getBuf().position()]; + out.getBuf().rewind(); + out.getBuf().get(ret); + return ret; + } + + /** * <p>This method decodes the given byte buffer to a routable.</p> <p>Return false to signal failure.</p> <p>This * method is NOT exception safe.</p> * diff --git a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableRepository.java b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableRepository.java index 47117471615..56d23d36811 100755 --- a/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableRepository.java +++ b/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/RoutableRepository.java @@ -93,17 +93,12 @@ final class RoutableRepository { log.log(Level.SEVERE,"Can not encode routable type " + type + " (version " + version + "). Only major version 5 and up supported."); return new byte[0]; } - DocumentSerializer out= DocumentSerializerFactory.createHead(new GrowableByteBuffer(8192)); - - out.putInt(null, type); - if (!factory.encode(obj, out)) { + byte[] ret = factory.encode(type, obj); + if (ret == null) { log.log(Level.SEVERE, "Routable factory " + factory.getClass().getName() + " failed to serialize " + "routable of type " + type + " (version " + version + ")."); return new byte[0]; } - byte[] ret = new byte[out.getBuf().position()]; - out.getBuf().rewind(); - out.getBuf().get(ret); return ret; } |