summaryrefslogtreecommitdiffstats
path: root/vespajlib/src/main/java/com/yahoo/slime
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2023-03-22 14:04:17 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2023-03-28 15:10:13 +0000
commitff93a0645424847199c9696863a3fbd4bc8aa394 (patch)
tree4392b23f4b835d239c9f1973d5ff7e8e420f87e1 /vespajlib/src/main/java/com/yahoo/slime
parent3ec23c024c62ab2e073343660ca8e349c4001372 (diff)
BinaryView; inspect slime value in binary format
Use int instead of long for stand-alone compressed values (sizes and symbol ids). Also added overflow/wrap-around checks for these values to avoid things like infinite recursion due to negative buffer skips during DecodeIndex creation. This makes decoding fail in more deterministic ways and also aligns with Java using int for sizes.
Diffstat (limited to 'vespajlib/src/main/java/com/yahoo/slime')
-rw-r--r--vespajlib/src/main/java/com/yahoo/slime/BinaryDecoder.java53
-rw-r--r--vespajlib/src/main/java/com/yahoo/slime/BinaryEncoder.java14
-rw-r--r--vespajlib/src/main/java/com/yahoo/slime/BinaryView.java307
-rw-r--r--vespajlib/src/main/java/com/yahoo/slime/BufferedInput.java30
-rw-r--r--vespajlib/src/main/java/com/yahoo/slime/Slime.java2
-rw-r--r--vespajlib/src/main/java/com/yahoo/slime/Value.java4
6 files changed, 363 insertions, 47 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/slime/BinaryDecoder.java b/vespajlib/src/main/java/com/yahoo/slime/BinaryDecoder.java
index b9aa8e5cf22..af6a2ae80a3 100644
--- a/vespajlib/src/main/java/com/yahoo/slime/BinaryDecoder.java
+++ b/vespajlib/src/main/java/com/yahoo/slime/BinaryDecoder.java
@@ -23,7 +23,7 @@ final class BinaryDecoder {
public Slime decode(byte[] bytes, int offset, int length) {
Slime slime = new Slime();
in = new BufferedInput(bytes, offset, length);
- decodeSymbolTable(slime);
+ decodeSymbolTable(in, slime.symbolTable());
decodeValue(slimeInserter.adjust(slime));
if (in.failed()) {
slime.wrap("partial_result");
@@ -33,22 +33,6 @@ final class BinaryDecoder {
return slime;
}
- long read_cmpr_long() {
- long next = in.getByte();
- long value = (next & 0x7f);
- int shift = 7;
- while ((next & 0x80) != 0) {
- next = in.getByte();
- value |= ((next & 0x7f) << shift);
- shift += 7;
- }
- return value;
- }
-
- long read_size(int meta) {
- return (meta == 0) ? read_cmpr_long() : (meta - 1);
- }
-
long read_bytes_le(int bytes) {
long value = 0;
int shift = 0;
@@ -90,22 +74,20 @@ final class BinaryDecoder {
}
Cursor decodeSTRING(Inserter inserter, int meta) {
- long size = read_size(meta);
- int sz = (int)size; // XXX
- byte[] image = in.getBytes(sz);
+ int size = in.read_size(meta);
+ byte[] image = in.getBytes(size);
return inserter.insertSTRING(image);
}
Cursor decodeDATA(Inserter inserter, int meta) {
- long size = read_size(meta);
- int sz = (int)size; // XXX
- byte[] image = in.getBytes(sz);
+ int size = in.read_size(meta);
+ byte[] image = in.getBytes(size);
return inserter.insertDATA(image);
}
Cursor decodeARRAY(Inserter inserter, int meta) {
Cursor cursor = inserter.insertARRAY();
- long size = read_size(meta);
+ int size = in.read_size(meta);
for (int i = 0; i < size; ++i) {
decodeValue(arrayInserter.adjust(cursor));
}
@@ -114,10 +96,9 @@ final class BinaryDecoder {
Cursor decodeOBJECT(Inserter inserter, int meta) {
Cursor cursor = inserter.insertOBJECT();
- long size = read_size(meta);
+ int size = in.read_size(meta);
for (int i = 0; i < size; ++i) {
- long l = read_cmpr_long();
- int symbol = (int)l; // check for overflow?
+ int symbol = in.read_cmpr_int();
decodeValue(objectInserter.adjust(cursor, symbol));
}
return cursor;
@@ -146,20 +127,18 @@ final class BinaryDecoder {
}
}
- void decodeSymbolTable(Slime slime) {
- long numSymbols = read_cmpr_long();
- final byte [] backing = in.getBacking();
+ static void decodeSymbolTable(BufferedInput input, SymbolTable names) {
+ int numSymbols = input.read_cmpr_int();
+ final byte[] backing = input.getBacking();
for (int i = 0; i < numSymbols; ++i) {
- long size = read_cmpr_long();
- int sz = (int)size; // XXX
- int offset = in.getPosition();
- in.skip(sz);
- int symbol = slime.insert(Utf8Codec.decode(backing, offset, sz));
+ int size = input.read_cmpr_int();
+ int offset = input.getPosition();
+ input.skip(size);
+ int symbol = names.insert(Utf8Codec.decode(backing, offset, size));
if (symbol != i) {
- in.fail("duplicate symbols in symbol table");
+ input.fail("duplicate symbols in symbol table");
return;
}
}
}
-
}
diff --git a/vespajlib/src/main/java/com/yahoo/slime/BinaryEncoder.java b/vespajlib/src/main/java/com/yahoo/slime/BinaryEncoder.java
index 7da85b5cb63..f12496f7a76 100644
--- a/vespajlib/src/main/java/com/yahoo/slime/BinaryEncoder.java
+++ b/vespajlib/src/main/java/com/yahoo/slime/BinaryEncoder.java
@@ -24,7 +24,7 @@ final class BinaryEncoder implements ArrayTraverser, ObjectSymbolTraverser {
}
- void encode_cmpr_long(long value) {
+ void encode_cmpr_int(int value) {
byte next = (byte)(value & 0x7f);
value >>>= 7; // unsigned shift
while (value != 0) {
@@ -36,12 +36,12 @@ final class BinaryEncoder implements ArrayTraverser, ObjectSymbolTraverser {
out.put(next);
}
- void write_type_and_size(int type, long size) {
+ void write_type_and_size(int type, int size) {
if (size <= 30) {
- out.put(encode_type_and_meta(type, (int)(size + 1)));
+ out.put(encode_type_and_meta(type, size + 1));
} else {
out.put(encode_type_and_meta(type, 0));
- encode_cmpr_long(size);
+ encode_cmpr_int(size);
}
}
@@ -125,11 +125,11 @@ final class BinaryEncoder implements ArrayTraverser, ObjectSymbolTraverser {
void encodeSymbolTable(Slime slime) {
int numSymbols = slime.symbols();
- encode_cmpr_long(numSymbols);
+ encode_cmpr_int(numSymbols);
for (int i = 0 ; i < numSymbols; ++i) {
String name = slime.inspect(i);
byte[] bytes = Utf8Codec.encode(name);
- encode_cmpr_long(bytes.length);
+ encode_cmpr_int(bytes.length);
out.put(bytes);
}
}
@@ -139,7 +139,7 @@ final class BinaryEncoder implements ArrayTraverser, ObjectSymbolTraverser {
}
public void field(int symbol, Inspector inspector) {
- encode_cmpr_long(symbol);
+ encode_cmpr_int(symbol);
encodeValue(inspector);
}
diff --git a/vespajlib/src/main/java/com/yahoo/slime/BinaryView.java b/vespajlib/src/main/java/com/yahoo/slime/BinaryView.java
new file mode 100644
index 00000000000..0e111d42061
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/slime/BinaryView.java
@@ -0,0 +1,307 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.slime;
+
+import java.util.function.Consumer;
+import static com.yahoo.slime.BinaryFormat.decode_double;
+import static com.yahoo.slime.BinaryFormat.decode_meta;
+import static com.yahoo.slime.BinaryFormat.decode_type;
+import static com.yahoo.slime.BinaryFormat.decode_zigzag;
+
+/**
+ * A read-only view of a Slime value that is stored in binary format.
+ **/
+public final class BinaryView implements Inspector {
+
+ private final byte[] data;
+ private final SymbolTable names;
+ private final DecodeIndex index;
+ private final int self;
+
+ private BinaryView(byte[] data, SymbolTable names, DecodeIndex index, int self) {
+ this.data = data;
+ this.names = names;
+ this.index = index;
+ this.self = self;
+ }
+ private int peek_cmpr_int(int idx) {
+ long next = data[idx++];
+ long value = (next & 0x7f);
+ int shift = 7;
+ while ((next & 0x80) != 0) {
+ next = data[idx++];
+ value |= ((next & 0x7f) << shift);
+ shift += 7;
+ }
+ return (int)value;
+ }
+ private int skip_cmpr_int(int idx) {
+ while ((data[idx++] & 0x80) != 0);
+ return idx;
+ }
+ private int extract_children(int idx) {
+ int bytes = decode_meta(data[idx++]);
+ return (bytes == 0)
+ ? peek_cmpr_int(idx)
+ : (bytes - 1);
+ }
+ private long extract_long(int idx) {
+ int bytes = decode_meta(data[idx++]);
+ long value = 0;
+ int shift = 0;
+ for (int i = 0; i < bytes; ++i) {
+ long b = data[idx++];
+ value |= (b & 0xff) << shift;
+ shift += 8;
+ }
+ return decode_zigzag(value);
+ }
+ private double extract_double(int idx) {
+ int bytes = decode_meta(data[idx++]);
+ long value = 0;
+ int shift = 56;
+ for (int i = 0; i < bytes; ++i) {
+ long b = data[idx++];
+ value |= (b & 0xff) << shift;
+ shift -= 8;
+ }
+ return decode_double(value);
+ }
+ private String extract_string(int idx) {
+ int bytes = decode_meta(data[idx++]);
+ if (bytes == 0) {
+ bytes = peek_cmpr_int(idx);
+ idx = skip_cmpr_int(idx);
+ } else {
+ --bytes;
+ }
+ return Utf8Codec.decode(data, idx, bytes);
+ }
+ private byte[] extract_bytes(int idx) {
+ int bytes = decode_meta(data[idx++]);
+ if (bytes == 0) {
+ bytes = peek_cmpr_int(idx);
+ idx = skip_cmpr_int(idx);
+ } else {
+ --bytes;
+ }
+ byte[] ret = new byte[bytes];
+ for (int i = 0; i < bytes; ++i) {
+ ret[i] = data[idx++];
+ }
+ return ret;
+ }
+ private Inspector find_field(int pos, int len, int sym) {
+ for (int i = 0; i < len; ++i) {
+ int idx = index.getByteOffset(pos + i);
+ if (peek_cmpr_int(idx - (index.getExtBits(pos + i) + 1)) == sym) {
+ return new BinaryView(data, names, index, pos + i);
+ }
+ }
+ return NixValue.invalid();
+ }
+
+ @Override public boolean valid() { return true; }
+ @Override public void ifValid(Consumer<Inspector> consumer) { consumer.accept(this); }
+ @Override public Type type() { return decode_type(data[index.getByteOffset(self)]); }
+ @Override public int children() {
+ return switch (type()) {
+ case OBJECT, ARRAY -> extract_children(index.getByteOffset(self));
+ default -> 0;
+ };
+ }
+ @Override public int entries() {
+ return switch (type()) {
+ case ARRAY -> extract_children(index.getByteOffset(self));
+ default -> 0;
+ };
+ }
+ @Override public int fields() {
+ return switch (type()) {
+ case OBJECT -> extract_children(index.getByteOffset(self));
+ default -> 0;
+ };
+ }
+ @Override public boolean asBool() {
+ return switch (type()) {
+ case BOOL -> (decode_meta(data[index.getByteOffset(self)]) != 0);
+ default -> false;
+ };
+ }
+ @Override public long asLong() {
+ return switch (type()) {
+ case LONG -> extract_long(index.getByteOffset(self));
+ case DOUBLE -> (long)extract_double(index.getByteOffset(self));
+ default -> 0;
+ };
+ }
+ @Override public double asDouble() {
+ return switch (type()) {
+ case LONG -> extract_long(index.getByteOffset(self));
+ case DOUBLE -> extract_double(index.getByteOffset(self));
+ default -> 0.0;
+ };
+ }
+ @Override public String asString() {
+ return switch (type()) {
+ case STRING -> extract_string(index.getByteOffset(self));
+ default -> Value.emptyString;
+ };
+ }
+ @Override public byte[] asUtf8() {
+ return switch (type()) {
+ case STRING -> extract_bytes(index.getByteOffset(self));
+ default -> Value.emptyData;
+ };
+ }
+ @Override public byte[] asData() {
+ return switch (type()) {
+ case DATA -> extract_bytes(index.getByteOffset(self));
+ default -> Value.emptyData;
+ };
+ }
+ @Override public void accept(Visitor v) {
+ switch (type()) {
+ case NIX: v.visitNix(); break;
+ case BOOL: v.visitBool(decode_meta(data[index.getByteOffset(self)]) != 0); break;
+ case LONG: v.visitLong(extract_long(index.getByteOffset(self))); break;
+ case DOUBLE: v.visitDouble(extract_double(index.getByteOffset(self))); break;
+ case STRING: v.visitString(extract_bytes(index.getByteOffset(self))); break;
+ case DATA: v.visitData(extract_bytes(index.getByteOffset(self))); break;
+ case ARRAY: v.visitArray(this); break;
+ case OBJECT: v.visitObject(this); break;
+ default: throw new RuntimeException("should not be reached");
+ }
+ }
+ @Override public void traverse(ArrayTraverser at) {
+ int pos = index.getFirstChild(self);
+ int len = entries();
+ for (int i = 0; i < len; ++i) {
+ at.entry(i, new BinaryView(data, names, index, pos + i));
+ }
+ }
+ @Override public void traverse(ObjectSymbolTraverser ot) {
+ int pos = index.getFirstChild(self);
+ int len = fields();
+ for (int i = 0; i < len; ++i) {
+ int sym = peek_cmpr_int(index.getByteOffset(pos + i) - (index.getExtBits(pos + i) + 1));
+ ot.field(sym, new BinaryView(data, names, index, pos + i));
+ }
+ }
+ @Override public void traverse(ObjectTraverser ot) {
+ int pos = index.getFirstChild(self);
+ int len = fields();
+ for (int i = 0; i < len; ++i) {
+ int sym = peek_cmpr_int(index.getByteOffset(pos + i) - (index.getExtBits(pos + i) + 1));
+ ot.field(names.inspect(sym), new BinaryView(data, names, index, pos + i));
+ }
+ }
+ @Override public Inspector entry(int idx) {
+ int limit = entries();
+ if (idx >= 0 && idx < limit) {
+ return new BinaryView(data, names, index, index.getFirstChild(self) + idx);
+ }
+ return NixValue.invalid();
+ }
+ @Override public Inspector field(int sym) {
+ int limit = fields();
+ if (limit > 0 && sym != SymbolTable.INVALID) {
+ return find_field(index.getFirstChild(self), limit, sym);
+ }
+ return NixValue.invalid();
+ }
+ @Override public Inspector field(String name) {
+ int limit = fields();
+ if (limit > 0) {
+ int sym = names.lookup(name);
+ if (sym != SymbolTable.INVALID) {
+ return find_field(index.getFirstChild(self), limit, sym);
+ }
+ }
+ return NixValue.invalid();
+ }
+
+ private static void buildIndex(BufferedInput input, DecodeIndex index, int self, int extBits) {
+ int pos = input.getPosition();
+ byte tag = input.getByte();
+ Type type = decode_type(tag);
+ int meta = decode_meta(tag);
+ switch (type) {
+ case NIX:
+ case BOOL:
+ index.set(self, pos, 0, extBits);
+ break;
+ case LONG:
+ case DOUBLE:
+ input.skip(meta);
+ index.set(self, pos, 0, extBits);
+ break;
+ case STRING:
+ case DATA: {
+ int size = input.read_size(meta);
+ input.skip(size);
+ index.set(self, pos, 0, extBits);
+ break; }
+ case ARRAY: {
+ int size = input.read_size(meta);
+ if (size > input.getBacking().length - index.size()) {
+ input.fail("decode index too big");
+ return;
+ }
+ int firstChild = index.reserve(size);
+ index.set(self, pos, firstChild, extBits);
+ for (int i = 0; i < size; ++i) {
+ buildIndex(input, index, firstChild + i, 0);
+ }
+ break; }
+ case OBJECT: {
+ int size = input.read_size(meta);
+ if (size > input.getBacking().length - index.size()) {
+ input.fail("decode index too big");
+ return;
+ }
+ int firstChild = index.reserve(size);
+ index.set(self, pos, firstChild, extBits);
+ for (int i = 0; i < size; ++i) {
+ int childExtBits = input.skip_cmpr_int();
+ if (childExtBits > 3) {
+ input.fail("symbol id too big");
+ return;
+ }
+ buildIndex(input, index, firstChild + i, childExtBits);
+ }
+ break; }
+ default: throw new RuntimeException("should not be reached");
+ }
+ }
+
+ static Inspector inspectImpl(BufferedInput input) {
+ var names = new SymbolTable();
+ var index = new DecodeIndex();
+ BinaryDecoder.decodeSymbolTable(input, names);
+ buildIndex(input, index, index.reserve(1), 0);
+ if (input.failed()) {
+ return NixValue.invalid();
+ }
+ return new BinaryView(input.getBacking(), names, index, 0);
+ }
+
+ public static Inspector inspect(byte[] data) {
+ return inspectImpl(new BufferedInput(data));
+ }
+
+ static int peek_cmpr_int_for_testing(byte[] data, int idx) {
+ return new BinaryView(data, null, null, -1).peek_cmpr_int(idx);
+ }
+ static int skip_cmpr_int_for_testing(byte[] data, int idx) {
+ return new BinaryView(data, null, null, -1).skip_cmpr_int(idx);
+ }
+ static int extract_children_for_testing(byte[] data, int idx) {
+ return new BinaryView(data, null, null, -1).extract_children(idx);
+ }
+ static long extract_long_for_testing(byte[] data, int idx) {
+ return new BinaryView(data, null, null, -1).extract_long(idx);
+ }
+ static double extract_double_for_testing(byte[] data, int idx) {
+ return new BinaryView(data, null, null, -1).extract_double(idx);
+ }
+}
diff --git a/vespajlib/src/main/java/com/yahoo/slime/BufferedInput.java b/vespajlib/src/main/java/com/yahoo/slime/BufferedInput.java
index ddbb25196b5..5c994d7f793 100644
--- a/vespajlib/src/main/java/com/yahoo/slime/BufferedInput.java
+++ b/vespajlib/src/main/java/com/yahoo/slime/BufferedInput.java
@@ -59,7 +59,7 @@ final class BufferedInput {
return ret;
}
- byte [] getBacking() { return source; }
+ byte[] getBacking() { return source; }
int getPosition() { return position; }
void skip(int size) {
if (position + size > end) {
@@ -80,4 +80,32 @@ final class BufferedInput {
}
return ret;
}
+
+ int read_cmpr_int() {
+ long next = getByte();
+ long value = (next & 0x7f);
+ int shift = 7;
+ while (shift < 32 && (next & 0x80) != 0) {
+ next = getByte();
+ value |= ((next & 0x7f) << shift);
+ shift += 7;
+ }
+ if (value > 0x7fff_ffffL) {
+ fail("compressed int overflow");
+ value = 0;
+ }
+ return (int)value;
+ }
+
+ int skip_cmpr_int() {
+ int extBits = 0;
+ while ((getByte() & 0x80) != 0) {
+ ++extBits;
+ }
+ return extBits;
+ }
+
+ int read_size(int meta) {
+ return (meta == 0) ? read_cmpr_int() : (meta - 1);
+ }
}
diff --git a/vespajlib/src/main/java/com/yahoo/slime/Slime.java b/vespajlib/src/main/java/com/yahoo/slime/Slime.java
index eba9226c8ef..7d29131cbdb 100644
--- a/vespajlib/src/main/java/com/yahoo/slime/Slime.java
+++ b/vespajlib/src/main/java/com/yahoo/slime/Slime.java
@@ -13,6 +13,8 @@ public final class Slime {
private final SymbolTable names = new SymbolTable();
private Value root = NixValue.instance();
+ SymbolTable symbolTable() { return names; }
+
/**
* Construct an empty Slime with an empty top-level value.
*/
diff --git a/vespajlib/src/main/java/com/yahoo/slime/Value.java b/vespajlib/src/main/java/com/yahoo/slime/Value.java
index 1943e77663f..6a1d7b2dd8e 100644
--- a/vespajlib/src/main/java/com/yahoo/slime/Value.java
+++ b/vespajlib/src/main/java/com/yahoo/slime/Value.java
@@ -13,8 +13,8 @@ import java.util.function.Consumer;
*/
abstract class Value implements Cursor {
- private static final String emptyString = "";
- private static final byte[] emptyData = new byte[0];
+ static final String emptyString = "";
+ static final byte[] emptyData = new byte[0];
public final boolean valid() { return this != NixValue.invalid(); }