diff options
11 files changed, 205 insertions, 54 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index 9474c9f9160..06713d14d88 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -7148,7 +7148,10 @@ "public com.yahoo.data.access.Inspector inspect()", "public java.lang.String toString()", "public java.lang.String toJson()", - "public java.lang.StringBuilder writeJson(java.lang.StringBuilder)" + "public java.lang.StringBuilder writeJson(java.lang.StringBuilder)", + "public java.lang.Double getDouble(java.lang.String)", + "public com.yahoo.tensor.Tensor getTensor(java.lang.String)", + "public java.util.Set featureNames()" ], "fields": [] }, diff --git a/container-search/src/main/java/com/yahoo/data/JsonProducer.java b/container-search/src/main/java/com/yahoo/data/JsonProducer.java index 6d925b41379..c9dc0946a3e 100644 --- a/container-search/src/main/java/com/yahoo/data/JsonProducer.java +++ b/container-search/src/main/java/com/yahoo/data/JsonProducer.java @@ -12,6 +12,7 @@ public interface JsonProducer { * be human-readable and containing embedded newlines; also the * exact indentation etc may change, so use compact=true for a * canonical format. + * * @param target the StringBuilder to append to. * @return the target passed in is also returned (to allow chaining). */ @@ -20,7 +21,8 @@ public interface JsonProducer { /** * Convenience method equivalent to: * writeJson(new StringBuilder()).toString() - * @return String containing JSON representation of this object's data. + * + * @return a String containing JSON representation of this object's data. */ default String toJson() { return writeJson(new StringBuilder()).toString(); diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java index 1f60dd3d1cf..b0003f4321e 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java @@ -6,9 +6,8 @@ import com.yahoo.data.access.Type; import com.yahoo.search.result.FeatureData; /** - * Class representing a "feature data" field. This was historically - * just a string containing JSON; now it's a structure of - * data (that will be rendered as JSON by default). + * Class representing a "feature data" field: A map of values which are + * either floats or tensors. */ public class FeatureDataField extends LongstringField { @@ -23,12 +22,8 @@ public class FeatureDataField extends LongstringField { @Override public Object convert(Inspector value) { - if (! value.valid()) { - return null; - } - if (value.type() == Type.STRING) { - return value.asString(); - } + if ( ! value.valid()) return null; + if (value.type() == Type.STRING) return value.asString(); return new FeatureData(value); } diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java index 2f9c6d5b325..5de38e43c96 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java @@ -5,10 +5,6 @@ */ package com.yahoo.prelude.fastsearch; -import java.nio.ByteBuffer; - -import com.yahoo.io.SlowInflate; -import com.yahoo.text.Utf8; import com.yahoo.data.access.Inspector; /** diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java index 2330ca2382a..5f921b67702 100644 --- a/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java @@ -3,6 +3,7 @@ package com.yahoo.prelude.searcher; import com.yahoo.prelude.fastsearch.FastHit; import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.Searcher; import com.yahoo.processing.request.CompoundName; @@ -27,7 +28,7 @@ public class JSONDebugSearcher extends Searcher { private static CompoundName PROPERTYNAME = new CompoundName("dumpjson"); @Override - public Result search(com.yahoo.search.Query query, Execution execution) { + public Result search(Query query, Execution execution) { Result r = execution.search(query); String propertyName = query.properties().getString(PROPERTYNAME); if (propertyName != null) { diff --git a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java index 53e77631ff9..7e5d6b12f30 100644 --- a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java +++ b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java @@ -6,29 +6,42 @@ import com.yahoo.data.access.Inspectable; import com.yahoo.data.access.Type; import com.yahoo.data.JsonProducer; import com.yahoo.data.access.simple.JsonRender; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.serialization.JsonFormat; +import com.yahoo.tensor.serialization.TypedBinaryFormat; + +import java.nio.charset.StandardCharsets; +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; /** - * A wrapper for structured data representing feature values. + * A wrapper for structured data representing feature values: A map of floats and tensors. + * This class is not thread safe even when it is only consumed. */ public class FeatureData implements Inspectable, JsonProducer { private final Inspector value; + private Set<String> featureNames = null; + public FeatureData(Inspector value) { this.value = value; } + /** + * Returns the fields of this as an inspector, where tensors are represented as binary data + * which can be decoded using + * <code>com.yahoo.tensor.serialization.TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(featureValue.asData()))</code> + */ @Override - public Inspector inspect() { - return value; - } + public Inspector inspect() { return value; } + @Override public String toString() { - if (value.type() == Type.EMPTY) { - return ""; - } else { - return toJson(); - } + if (value.type() == Type.EMPTY) return ""; + return toJson(); } @Override @@ -38,7 +51,64 @@ public class FeatureData implements Inspectable, JsonProducer { @Override public StringBuilder writeJson(StringBuilder target) { - return JsonRender.render(value, target, true); + return JsonRender.render(value, new Encoder(target, true)); + } + + /** + * Returns the value of a scalar feature, or null if it is not present. + * + * @throws IllegalArgumentException if the value exists but isn't a scalar + * (that is, if it is a tensor with nonzero rank) + */ + public Double getDouble(String featureName) { + Inspector featureValue = value.field(featureName); + if ( ! featureValue.valid()) return null; + + switch (featureValue.type()) { + case DOUBLE: return featureValue.asDouble(); + case DATA: throw new IllegalArgumentException("Feature '" + featureName + "' is a tensor, not a double"); + default: throw new IllegalStateException("Unexpected feature value type " + featureValue.type()); + } + } + + /** + * Returns the value of a tensor feature, or null if it is not present. + * This will return any feature value: Scalars are returned as a rank 0 tensor. + */ + public Tensor getTensor(String featureName) { + Inspector featureValue = value.field(featureName); + if ( ! featureValue.valid()) return null; + + switch (featureValue.type()) { + case DOUBLE: return Tensor.from(featureValue.asDouble()); + case DATA: return TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(featureValue.asData())); + default: throw new IllegalStateException("Unexpected feature value type " + featureValue.type()); + } + } + + /** Returns the names of the features available in this */ + public Set<String> featureNames() { + if (featureNames != null) return featureNames; + + featureNames = new HashSet<>(); + value.fields().forEach(field -> featureNames.add(field.getKey())); + return featureNames; + } + + /** A JSON encoder which encodes DATA as a tensor */ + private static class Encoder extends JsonRender.StringEncoder { + + Encoder(StringBuilder out, boolean compact) { + super(out, compact); + } + + @Override + public void encodeDATA(byte[] value) { + // This could be done more efficiently ... + target().append(new String(JsonFormat.encodeWithType(TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(value))), + StandardCharsets.UTF_8)); + } + } } diff --git a/container-search/src/main/java/com/yahoo/search/result/PositionsData.java b/container-search/src/main/java/com/yahoo/search/result/PositionsData.java index 483849a5435..203e0206f1e 100644 --- a/container-search/src/main/java/com/yahoo/search/result/PositionsData.java +++ b/container-search/src/main/java/com/yahoo/search/result/PositionsData.java @@ -10,7 +10,7 @@ import com.yahoo.data.access.simple.JsonRender; /** * A wrapper for structured data representing an array of position values. - **/ + */ public class PositionsData implements Inspectable, JsonProducer, XmlProducer { private final Inspector value; diff --git a/container-search/src/test/java/com/yahoo/search/result/FeatureDataTestCase.java b/container-search/src/test/java/com/yahoo/search/result/FeatureDataTestCase.java new file mode 100644 index 00000000000..9cc7cc743fc --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/result/FeatureDataTestCase.java @@ -0,0 +1,52 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.yahoo.data.access.slime.SlimeAdapter; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Slime; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.serialization.TypedBinaryFormat; +import org.junit.Test; + +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; + +/** + * @author bratseth + */ +public class FeatureDataTestCase { + + private static final double delta = 0.00000001; + + @Test + public void testFeatureData() { + Cursor features = new Slime().setObject(); + features.setDouble("scalar1", 1.5); + features.setDouble("scalar2", 2.5); + Tensor tensor1 = Tensor.from("tensor(x[3]):[1.5, 2, 2.5]"); + features.setData("tensor1", TypedBinaryFormat.encode(tensor1)); + Tensor tensor2 = Tensor.from(0.5); + features.setData("tensor2", TypedBinaryFormat.encode(tensor2)); + + FeatureData featureData = new FeatureData(new SlimeAdapter(features)); + assertEquals("scalar1,scalar2,tensor1,tensor2", + featureData.featureNames().stream().sorted().collect(Collectors.joining(","))); + assertEquals(1.5, featureData.getDouble("scalar1"), delta); + assertEquals(2.5, featureData.getDouble("scalar2"), delta); + assertEquals(Tensor.from(1.5), featureData.getTensor("scalar1")); + assertEquals(Tensor.from(2.5), featureData.getTensor("scalar2")); + assertEquals(tensor1, featureData.getTensor("tensor1")); + assertEquals(tensor2, featureData.getTensor("tensor2")); + + String expectedJson = + "{" + + "\"scalar1\":1.5," + + "\"scalar2\":2.5," + + "\"tensor1\":{\"type\":\"tensor(x[3])\",\"cells\":[{\"address\":{\"x\":\"0\"},\"value\":1.5},{\"address\":{\"x\":\"1\"},\"value\":2.0},{\"address\":{\"x\":\"2\"},\"value\":2.5}]}," + + "\"tensor2\":{\"type\":\"tensor()\",\"cells\":[{\"address\":{},\"value\":0.5}]}" + + "}"; + assertEquals(expectedJson, featureData.toJson()); + } + +} diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json index b2b895040bc..3b733105d2e 100644 --- a/vespajlib/abi-spec.json +++ b/vespajlib/abi-spec.json @@ -104,14 +104,22 @@ "com.yahoo.data.access.ObjectTraverser" ], "attributes": [ - "public", - "final" + "public" ], "methods": [ "public void <init>(java.lang.StringBuilder, boolean)", "public void encode(com.yahoo.data.access.Inspector)", + "protected void encodeEMPTY()", + "protected void encodeBOOL(boolean)", + "protected void encodeLONG(long)", + "protected void encodeDOUBLE(double)", + "protected void encodeSTRING(java.lang.String)", + "protected void encodeDATA(byte[])", + "protected void encodeARRAY(com.yahoo.data.access.Inspector)", + "protected void encodeOBJECT(com.yahoo.data.access.Inspector)", "public void entry(int, com.yahoo.data.access.Inspector)", - "public void field(java.lang.String, com.yahoo.data.access.Inspector)" + "public void field(java.lang.String, com.yahoo.data.access.Inspector)", + "public java.lang.StringBuilder target()" ], "fields": [] }, @@ -124,7 +132,8 @@ ], "methods": [ "public void <init>()", - "public static java.lang.StringBuilder render(com.yahoo.data.access.Inspectable, java.lang.StringBuilder, boolean)" + "public static java.lang.StringBuilder render(com.yahoo.data.access.Inspectable, java.lang.StringBuilder, boolean)", + "public static java.lang.StringBuilder render(com.yahoo.data.access.Inspectable, com.yahoo.data.access.simple.JsonRender$StringEncoder)" ], "fields": [] }, diff --git a/vespajlib/src/main/java/com/yahoo/data/access/simple/JsonRender.java b/vespajlib/src/main/java/com/yahoo/data/access/simple/JsonRender.java index 253b0c60927..9f662c77c59 100644 --- a/vespajlib/src/main/java/com/yahoo/data/access/simple/JsonRender.java +++ b/vespajlib/src/main/java/com/yahoo/data/access/simple/JsonRender.java @@ -11,19 +11,25 @@ import com.yahoo.data.access.ObjectTraverser; * * @author arnej27959 */ -public final class JsonRender -{ +public final class JsonRender { + public static StringBuilder render(Inspectable value, StringBuilder target, - boolean compact) - { - StringEncoder enc = new StringEncoder(target, compact); - enc.encode(value.inspect()); - return target; + boolean compact) { + return render(value, new StringEncoder(target, compact)); + } + + /** + * Renders the given value to the target stringbuilder with a given encoder. + * This is useful to use an encoder where rendering of some value types is customized. + */ + public static StringBuilder render(Inspectable value, StringEncoder encoder) { + encoder.encode(value.inspect()); + return encoder.target(); } - public static final class StringEncoder implements ArrayTraverser, ObjectTraverser - { + public static class StringEncoder implements ArrayTraverser, ObjectTraverser { + private final StringBuilder out; private boolean head = true; private boolean compact; @@ -41,21 +47,21 @@ public final class JsonRender } } - private void encodeEMPTY() { + protected void encodeEMPTY() { out.append("null"); } - private void encodeBOOL(boolean value) { + protected void encodeBOOL(boolean value) { out.append(value ? "true" : "false"); } - private void encodeLONG(long value) { - out.append(String.valueOf(value)); + protected void encodeLONG(long value) { + out.append(value); } - private void encodeDOUBLE(double value) { + protected void encodeDOUBLE(double value) { if (Double.isFinite(value)) { - out.append(String.valueOf(value)); + out.append(value); } else { out.append("null"); } @@ -63,7 +69,7 @@ public final class JsonRender static final char[] hex = "0123456789ABCDEF".toCharArray(); - private void encodeSTRING(String value) { + protected void encodeSTRING(String value) { out.append('"'); for (char c : value.toCharArray()) { switch (c) { @@ -89,7 +95,7 @@ public final class JsonRender out.append('"'); } - private void encodeDATA(byte[] value) { + protected void encodeDATA(byte[] value) { out.append('"'); out.append("0x"); for (int pos = 0; pos < value.length; pos++) { @@ -99,14 +105,14 @@ public final class JsonRender out.append('"'); } - private void encodeARRAY(Inspector inspector) { + protected void encodeARRAY(Inspector inspector) { openScope("["); ArrayTraverser at = this; inspector.traverse(at); closeScope("]"); } - private void encodeOBJECT(Inspector inspector) { + protected void encodeOBJECT(Inspector inspector) { openScope("{"); ObjectTraverser ot = this; inspector.traverse(ot); @@ -164,5 +170,10 @@ public final class JsonRender out.append(' '); encodeValue(inspector); } + + /** Returns the target this is encoding values to */ + public StringBuilder target() { return out; } + } + } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java index 52635905d72..1a210a614cc 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java @@ -19,21 +19,33 @@ import java.util.Iterator; * A JSON map containing a 'cells' array. * See http://docs.vespa.ai/documentation/reference/document-json-put-format.html#tensor */ -// TODO: We should probably move reading of this format from the document module to here public class JsonFormat { - /** Serializes the given tensor into JSON format */ + /** Serializes the given tensor value into JSON format */ public static byte[] encode(Tensor tensor) { Slime slime = new Slime(); Cursor root = slime.setObject(); - Cursor cellsArray = root.setArray("cells"); + encodeCells(tensor, root); + return com.yahoo.slime.JsonFormat.toJsonBytes(slime); + } + + /** Serializes the given tensor type and value into JSON format */ + public static byte[] encodeWithType(Tensor tensor) { + Slime slime = new Slime(); + Cursor root = slime.setObject(); + root.setString("type", tensor.type().toString()); + encodeCells(tensor, root); + return com.yahoo.slime.JsonFormat.toJsonBytes(slime); + } + + private static void encodeCells(Tensor tensor, Cursor rootObject) { + Cursor cellsArray = rootObject.setArray("cells"); for (Iterator<Tensor.Cell> i = tensor.cellIterator(); i.hasNext(); ) { Tensor.Cell cell = i.next(); Cursor cellObject = cellsArray.addObject(); encodeAddress(tensor.type(), cell.getKey(), cellObject.setObject("address")); cellObject.setDouble("value", cell.getValue()); } - return com.yahoo.slime.JsonFormat.toJsonBytes(slime); } private static void encodeAddress(TensorType type, TensorAddress address, Cursor addressObject) { |