summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2019-06-16 18:26:26 +0200
committerGitHub <noreply@github.com>2019-06-16 18:26:26 +0200
commitc42b06f4843c55233197bc0eafe3426f6f5d7f3a (patch)
treedc9407032f3028449b9694057deb8e3e2f009869
parentf1110ab151c0dcaf9790dbbdfac986f32d00384c (diff)
parent51e46a95176c1e24f878a8bee874818c390c0652 (diff)
Merge pull request #9818 from vespa-engine/bratseth/tensor-summary-features
Decode tensor summary features
-rw-r--r--container-search/abi-spec.json5
-rw-r--r--container-search/src/main/java/com/yahoo/data/JsonProducer.java4
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java13
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java4
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java3
-rw-r--r--container-search/src/main/java/com/yahoo/search/result/FeatureData.java90
-rw-r--r--container-search/src/main/java/com/yahoo/search/result/PositionsData.java2
-rw-r--r--container-search/src/test/java/com/yahoo/search/result/FeatureDataTestCase.java52
-rw-r--r--vespajlib/abi-spec.json17
-rw-r--r--vespajlib/src/main/java/com/yahoo/data/access/simple/JsonRender.java49
-rw-r--r--vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java20
11 files changed, 205 insertions, 54 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index 9474c9f9160..06713d14d88 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -7148,7 +7148,10 @@
"public com.yahoo.data.access.Inspector inspect()",
"public java.lang.String toString()",
"public java.lang.String toJson()",
- "public java.lang.StringBuilder writeJson(java.lang.StringBuilder)"
+ "public java.lang.StringBuilder writeJson(java.lang.StringBuilder)",
+ "public java.lang.Double getDouble(java.lang.String)",
+ "public com.yahoo.tensor.Tensor getTensor(java.lang.String)",
+ "public java.util.Set featureNames()"
],
"fields": []
},
diff --git a/container-search/src/main/java/com/yahoo/data/JsonProducer.java b/container-search/src/main/java/com/yahoo/data/JsonProducer.java
index 6d925b41379..c9dc0946a3e 100644
--- a/container-search/src/main/java/com/yahoo/data/JsonProducer.java
+++ b/container-search/src/main/java/com/yahoo/data/JsonProducer.java
@@ -12,6 +12,7 @@ public interface JsonProducer {
* be human-readable and containing embedded newlines; also the
* exact indentation etc may change, so use compact=true for a
* canonical format.
+ *
* @param target the StringBuilder to append to.
* @return the target passed in is also returned (to allow chaining).
*/
@@ -20,7 +21,8 @@ public interface JsonProducer {
/**
* Convenience method equivalent to:
* writeJson(new StringBuilder()).toString()
- * @return String containing JSON representation of this object's data.
+ *
+ * @return a String containing JSON representation of this object's data.
*/
default String toJson() {
return writeJson(new StringBuilder()).toString();
diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java
index 1f60dd3d1cf..b0003f4321e 100644
--- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java
+++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java
@@ -6,9 +6,8 @@ import com.yahoo.data.access.Type;
import com.yahoo.search.result.FeatureData;
/**
- * Class representing a "feature data" field. This was historically
- * just a string containing JSON; now it's a structure of
- * data (that will be rendered as JSON by default).
+ * Class representing a "feature data" field: A map of values which are
+ * either floats or tensors.
*/
public class FeatureDataField extends LongstringField {
@@ -23,12 +22,8 @@ public class FeatureDataField extends LongstringField {
@Override
public Object convert(Inspector value) {
- if (! value.valid()) {
- return null;
- }
- if (value.type() == Type.STRING) {
- return value.asString();
- }
+ if ( ! value.valid()) return null;
+ if (value.type() == Type.STRING) return value.asString();
return new FeatureData(value);
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java
index 2f9c6d5b325..5de38e43c96 100644
--- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java
+++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java
@@ -5,10 +5,6 @@
*/
package com.yahoo.prelude.fastsearch;
-import java.nio.ByteBuffer;
-
-import com.yahoo.io.SlowInflate;
-import com.yahoo.text.Utf8;
import com.yahoo.data.access.Inspector;
/**
diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java
index 2330ca2382a..5f921b67702 100644
--- a/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java
@@ -3,6 +3,7 @@ package com.yahoo.prelude.searcher;
import com.yahoo.prelude.fastsearch.FastHit;
import com.yahoo.prelude.hitfield.JSONString;
+import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
import com.yahoo.processing.request.CompoundName;
@@ -27,7 +28,7 @@ public class JSONDebugSearcher extends Searcher {
private static CompoundName PROPERTYNAME = new CompoundName("dumpjson");
@Override
- public Result search(com.yahoo.search.Query query, Execution execution) {
+ public Result search(Query query, Execution execution) {
Result r = execution.search(query);
String propertyName = query.properties().getString(PROPERTYNAME);
if (propertyName != null) {
diff --git a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java
index 53e77631ff9..7e5d6b12f30 100644
--- a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java
+++ b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java
@@ -6,29 +6,42 @@ import com.yahoo.data.access.Inspectable;
import com.yahoo.data.access.Type;
import com.yahoo.data.JsonProducer;
import com.yahoo.data.access.simple.JsonRender;
+import com.yahoo.io.GrowableByteBuffer;
+import com.yahoo.tensor.Tensor;
+import com.yahoo.tensor.serialization.JsonFormat;
+import com.yahoo.tensor.serialization.TypedBinaryFormat;
+
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.Set;
/**
- * A wrapper for structured data representing feature values.
+ * A wrapper for structured data representing feature values: A map of floats and tensors.
+ * This class is not thread safe even when it is only consumed.
*/
public class FeatureData implements Inspectable, JsonProducer {
private final Inspector value;
+ private Set<String> featureNames = null;
+
public FeatureData(Inspector value) {
this.value = value;
}
+ /**
+ * Returns the fields of this as an inspector, where tensors are represented as binary data
+ * which can be decoded using
+ * <code>com.yahoo.tensor.serialization.TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(featureValue.asData()))</code>
+ */
@Override
- public Inspector inspect() {
- return value;
- }
+ public Inspector inspect() { return value; }
+ @Override
public String toString() {
- if (value.type() == Type.EMPTY) {
- return "";
- } else {
- return toJson();
- }
+ if (value.type() == Type.EMPTY) return "";
+ return toJson();
}
@Override
@@ -38,7 +51,64 @@ public class FeatureData implements Inspectable, JsonProducer {
@Override
public StringBuilder writeJson(StringBuilder target) {
- return JsonRender.render(value, target, true);
+ return JsonRender.render(value, new Encoder(target, true));
+ }
+
+ /**
+ * Returns the value of a scalar feature, or null if it is not present.
+ *
+ * @throws IllegalArgumentException if the value exists but isn't a scalar
+ * (that is, if it is a tensor with nonzero rank)
+ */
+ public Double getDouble(String featureName) {
+ Inspector featureValue = value.field(featureName);
+ if ( ! featureValue.valid()) return null;
+
+ switch (featureValue.type()) {
+ case DOUBLE: return featureValue.asDouble();
+ case DATA: throw new IllegalArgumentException("Feature '" + featureName + "' is a tensor, not a double");
+ default: throw new IllegalStateException("Unexpected feature value type " + featureValue.type());
+ }
+ }
+
+ /**
+ * Returns the value of a tensor feature, or null if it is not present.
+ * This will return any feature value: Scalars are returned as a rank 0 tensor.
+ */
+ public Tensor getTensor(String featureName) {
+ Inspector featureValue = value.field(featureName);
+ if ( ! featureValue.valid()) return null;
+
+ switch (featureValue.type()) {
+ case DOUBLE: return Tensor.from(featureValue.asDouble());
+ case DATA: return TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(featureValue.asData()));
+ default: throw new IllegalStateException("Unexpected feature value type " + featureValue.type());
+ }
+ }
+
+ /** Returns the names of the features available in this */
+ public Set<String> featureNames() {
+ if (featureNames != null) return featureNames;
+
+ featureNames = new HashSet<>();
+ value.fields().forEach(field -> featureNames.add(field.getKey()));
+ return featureNames;
+ }
+
+ /** A JSON encoder which encodes DATA as a tensor */
+ private static class Encoder extends JsonRender.StringEncoder {
+
+ Encoder(StringBuilder out, boolean compact) {
+ super(out, compact);
+ }
+
+ @Override
+ public void encodeDATA(byte[] value) {
+ // This could be done more efficiently ...
+ target().append(new String(JsonFormat.encodeWithType(TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(value))),
+ StandardCharsets.UTF_8));
+ }
+
}
}
diff --git a/container-search/src/main/java/com/yahoo/search/result/PositionsData.java b/container-search/src/main/java/com/yahoo/search/result/PositionsData.java
index 483849a5435..203e0206f1e 100644
--- a/container-search/src/main/java/com/yahoo/search/result/PositionsData.java
+++ b/container-search/src/main/java/com/yahoo/search/result/PositionsData.java
@@ -10,7 +10,7 @@ import com.yahoo.data.access.simple.JsonRender;
/**
* A wrapper for structured data representing an array of position values.
- **/
+ */
public class PositionsData implements Inspectable, JsonProducer, XmlProducer {
private final Inspector value;
diff --git a/container-search/src/test/java/com/yahoo/search/result/FeatureDataTestCase.java b/container-search/src/test/java/com/yahoo/search/result/FeatureDataTestCase.java
new file mode 100644
index 00000000000..9cc7cc743fc
--- /dev/null
+++ b/container-search/src/test/java/com/yahoo/search/result/FeatureDataTestCase.java
@@ -0,0 +1,52 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.result;
+
+import com.yahoo.data.access.slime.SlimeAdapter;
+import com.yahoo.slime.Cursor;
+import com.yahoo.slime.Slime;
+import com.yahoo.tensor.Tensor;
+import com.yahoo.tensor.serialization.TypedBinaryFormat;
+import org.junit.Test;
+
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author bratseth
+ */
+public class FeatureDataTestCase {
+
+ private static final double delta = 0.00000001;
+
+ @Test
+ public void testFeatureData() {
+ Cursor features = new Slime().setObject();
+ features.setDouble("scalar1", 1.5);
+ features.setDouble("scalar2", 2.5);
+ Tensor tensor1 = Tensor.from("tensor(x[3]):[1.5, 2, 2.5]");
+ features.setData("tensor1", TypedBinaryFormat.encode(tensor1));
+ Tensor tensor2 = Tensor.from(0.5);
+ features.setData("tensor2", TypedBinaryFormat.encode(tensor2));
+
+ FeatureData featureData = new FeatureData(new SlimeAdapter(features));
+ assertEquals("scalar1,scalar2,tensor1,tensor2",
+ featureData.featureNames().stream().sorted().collect(Collectors.joining(",")));
+ assertEquals(1.5, featureData.getDouble("scalar1"), delta);
+ assertEquals(2.5, featureData.getDouble("scalar2"), delta);
+ assertEquals(Tensor.from(1.5), featureData.getTensor("scalar1"));
+ assertEquals(Tensor.from(2.5), featureData.getTensor("scalar2"));
+ assertEquals(tensor1, featureData.getTensor("tensor1"));
+ assertEquals(tensor2, featureData.getTensor("tensor2"));
+
+ String expectedJson =
+ "{" +
+ "\"scalar1\":1.5," +
+ "\"scalar2\":2.5," +
+ "\"tensor1\":{\"type\":\"tensor(x[3])\",\"cells\":[{\"address\":{\"x\":\"0\"},\"value\":1.5},{\"address\":{\"x\":\"1\"},\"value\":2.0},{\"address\":{\"x\":\"2\"},\"value\":2.5}]}," +
+ "\"tensor2\":{\"type\":\"tensor()\",\"cells\":[{\"address\":{},\"value\":0.5}]}" +
+ "}";
+ assertEquals(expectedJson, featureData.toJson());
+ }
+
+}
diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json
index b2b895040bc..3b733105d2e 100644
--- a/vespajlib/abi-spec.json
+++ b/vespajlib/abi-spec.json
@@ -104,14 +104,22 @@
"com.yahoo.data.access.ObjectTraverser"
],
"attributes": [
- "public",
- "final"
+ "public"
],
"methods": [
"public void <init>(java.lang.StringBuilder, boolean)",
"public void encode(com.yahoo.data.access.Inspector)",
+ "protected void encodeEMPTY()",
+ "protected void encodeBOOL(boolean)",
+ "protected void encodeLONG(long)",
+ "protected void encodeDOUBLE(double)",
+ "protected void encodeSTRING(java.lang.String)",
+ "protected void encodeDATA(byte[])",
+ "protected void encodeARRAY(com.yahoo.data.access.Inspector)",
+ "protected void encodeOBJECT(com.yahoo.data.access.Inspector)",
"public void entry(int, com.yahoo.data.access.Inspector)",
- "public void field(java.lang.String, com.yahoo.data.access.Inspector)"
+ "public void field(java.lang.String, com.yahoo.data.access.Inspector)",
+ "public java.lang.StringBuilder target()"
],
"fields": []
},
@@ -124,7 +132,8 @@
],
"methods": [
"public void <init>()",
- "public static java.lang.StringBuilder render(com.yahoo.data.access.Inspectable, java.lang.StringBuilder, boolean)"
+ "public static java.lang.StringBuilder render(com.yahoo.data.access.Inspectable, java.lang.StringBuilder, boolean)",
+ "public static java.lang.StringBuilder render(com.yahoo.data.access.Inspectable, com.yahoo.data.access.simple.JsonRender$StringEncoder)"
],
"fields": []
},
diff --git a/vespajlib/src/main/java/com/yahoo/data/access/simple/JsonRender.java b/vespajlib/src/main/java/com/yahoo/data/access/simple/JsonRender.java
index 253b0c60927..9f662c77c59 100644
--- a/vespajlib/src/main/java/com/yahoo/data/access/simple/JsonRender.java
+++ b/vespajlib/src/main/java/com/yahoo/data/access/simple/JsonRender.java
@@ -11,19 +11,25 @@ import com.yahoo.data.access.ObjectTraverser;
*
* @author arnej27959
*/
-public final class JsonRender
-{
+public final class JsonRender {
+
public static StringBuilder render(Inspectable value,
StringBuilder target,
- boolean compact)
- {
- StringEncoder enc = new StringEncoder(target, compact);
- enc.encode(value.inspect());
- return target;
+ boolean compact) {
+ return render(value, new StringEncoder(target, compact));
+ }
+
+ /**
+ * Renders the given value to the target stringbuilder with a given encoder.
+ * This is useful to use an encoder where rendering of some value types is customized.
+ */
+ public static StringBuilder render(Inspectable value, StringEncoder encoder) {
+ encoder.encode(value.inspect());
+ return encoder.target();
}
- public static final class StringEncoder implements ArrayTraverser, ObjectTraverser
- {
+ public static class StringEncoder implements ArrayTraverser, ObjectTraverser {
+
private final StringBuilder out;
private boolean head = true;
private boolean compact;
@@ -41,21 +47,21 @@ public final class JsonRender
}
}
- private void encodeEMPTY() {
+ protected void encodeEMPTY() {
out.append("null");
}
- private void encodeBOOL(boolean value) {
+ protected void encodeBOOL(boolean value) {
out.append(value ? "true" : "false");
}
- private void encodeLONG(long value) {
- out.append(String.valueOf(value));
+ protected void encodeLONG(long value) {
+ out.append(value);
}
- private void encodeDOUBLE(double value) {
+ protected void encodeDOUBLE(double value) {
if (Double.isFinite(value)) {
- out.append(String.valueOf(value));
+ out.append(value);
} else {
out.append("null");
}
@@ -63,7 +69,7 @@ public final class JsonRender
static final char[] hex = "0123456789ABCDEF".toCharArray();
- private void encodeSTRING(String value) {
+ protected void encodeSTRING(String value) {
out.append('"');
for (char c : value.toCharArray()) {
switch (c) {
@@ -89,7 +95,7 @@ public final class JsonRender
out.append('"');
}
- private void encodeDATA(byte[] value) {
+ protected void encodeDATA(byte[] value) {
out.append('"');
out.append("0x");
for (int pos = 0; pos < value.length; pos++) {
@@ -99,14 +105,14 @@ public final class JsonRender
out.append('"');
}
- private void encodeARRAY(Inspector inspector) {
+ protected void encodeARRAY(Inspector inspector) {
openScope("[");
ArrayTraverser at = this;
inspector.traverse(at);
closeScope("]");
}
- private void encodeOBJECT(Inspector inspector) {
+ protected void encodeOBJECT(Inspector inspector) {
openScope("{");
ObjectTraverser ot = this;
inspector.traverse(ot);
@@ -164,5 +170,10 @@ public final class JsonRender
out.append(' ');
encodeValue(inspector);
}
+
+ /** Returns the target this is encoding values to */
+ public StringBuilder target() { return out; }
+
}
+
}
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java
index 52635905d72..1a210a614cc 100644
--- a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java
+++ b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java
@@ -19,21 +19,33 @@ import java.util.Iterator;
* A JSON map containing a 'cells' array.
* See http://docs.vespa.ai/documentation/reference/document-json-put-format.html#tensor
*/
-// TODO: We should probably move reading of this format from the document module to here
public class JsonFormat {
- /** Serializes the given tensor into JSON format */
+ /** Serializes the given tensor value into JSON format */
public static byte[] encode(Tensor tensor) {
Slime slime = new Slime();
Cursor root = slime.setObject();
- Cursor cellsArray = root.setArray("cells");
+ encodeCells(tensor, root);
+ return com.yahoo.slime.JsonFormat.toJsonBytes(slime);
+ }
+
+ /** Serializes the given tensor type and value into JSON format */
+ public static byte[] encodeWithType(Tensor tensor) {
+ Slime slime = new Slime();
+ Cursor root = slime.setObject();
+ root.setString("type", tensor.type().toString());
+ encodeCells(tensor, root);
+ return com.yahoo.slime.JsonFormat.toJsonBytes(slime);
+ }
+
+ private static void encodeCells(Tensor tensor, Cursor rootObject) {
+ Cursor cellsArray = rootObject.setArray("cells");
for (Iterator<Tensor.Cell> i = tensor.cellIterator(); i.hasNext(); ) {
Tensor.Cell cell = i.next();
Cursor cellObject = cellsArray.addObject();
encodeAddress(tensor.type(), cell.getKey(), cellObject.setObject("address"));
cellObject.setDouble("value", cell.getValue());
}
- return com.yahoo.slime.JsonFormat.toJsonBytes(slime);
}
private static void encodeAddress(TensorType type, TensorAddress address, Cursor addressObject) {