diff options
author | Jon Bratseth <bratseth@oath.com> | 2019-06-16 18:26:26 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-06-16 18:26:26 +0200 |
commit | c42b06f4843c55233197bc0eafe3426f6f5d7f3a (patch) | |
tree | dc9407032f3028449b9694057deb8e3e2f009869 /container-search | |
parent | f1110ab151c0dcaf9790dbbdfac986f32d00384c (diff) | |
parent | 51e46a95176c1e24f878a8bee874818c390c0652 (diff) |
Merge pull request #9818 from vespa-engine/bratseth/tensor-summary-features
Decode tensor summary features
Diffstat (limited to 'container-search')
8 files changed, 146 insertions, 27 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index 9474c9f9160..06713d14d88 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -7148,7 +7148,10 @@ "public com.yahoo.data.access.Inspector inspect()", "public java.lang.String toString()", "public java.lang.String toJson()", - "public java.lang.StringBuilder writeJson(java.lang.StringBuilder)" + "public java.lang.StringBuilder writeJson(java.lang.StringBuilder)", + "public java.lang.Double getDouble(java.lang.String)", + "public com.yahoo.tensor.Tensor getTensor(java.lang.String)", + "public java.util.Set featureNames()" ], "fields": [] }, diff --git a/container-search/src/main/java/com/yahoo/data/JsonProducer.java b/container-search/src/main/java/com/yahoo/data/JsonProducer.java index 6d925b41379..c9dc0946a3e 100644 --- a/container-search/src/main/java/com/yahoo/data/JsonProducer.java +++ b/container-search/src/main/java/com/yahoo/data/JsonProducer.java @@ -12,6 +12,7 @@ public interface JsonProducer { * be human-readable and containing embedded newlines; also the * exact indentation etc may change, so use compact=true for a * canonical format. + * * @param target the StringBuilder to append to. * @return the target passed in is also returned (to allow chaining). */ @@ -20,7 +21,8 @@ public interface JsonProducer { /** * Convenience method equivalent to: * writeJson(new StringBuilder()).toString() - * @return String containing JSON representation of this object's data. + * + * @return a String containing JSON representation of this object's data. */ default String toJson() { return writeJson(new StringBuilder()).toString(); diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java index 1f60dd3d1cf..b0003f4321e 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java @@ -6,9 +6,8 @@ import com.yahoo.data.access.Type; import com.yahoo.search.result.FeatureData; /** - * Class representing a "feature data" field. This was historically - * just a string containing JSON; now it's a structure of - * data (that will be rendered as JSON by default). + * Class representing a "feature data" field: A map of values which are + * either floats or tensors. */ public class FeatureDataField extends LongstringField { @@ -23,12 +22,8 @@ public class FeatureDataField extends LongstringField { @Override public Object convert(Inspector value) { - if (! value.valid()) { - return null; - } - if (value.type() == Type.STRING) { - return value.asString(); - } + if ( ! value.valid()) return null; + if (value.type() == Type.STRING) return value.asString(); return new FeatureData(value); } diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java index 2f9c6d5b325..5de38e43c96 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java @@ -5,10 +5,6 @@ */ package com.yahoo.prelude.fastsearch; -import java.nio.ByteBuffer; - -import com.yahoo.io.SlowInflate; -import com.yahoo.text.Utf8; import com.yahoo.data.access.Inspector; /** diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java index 2330ca2382a..5f921b67702 100644 --- a/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java @@ -3,6 +3,7 @@ package com.yahoo.prelude.searcher; import com.yahoo.prelude.fastsearch.FastHit; import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.Searcher; import com.yahoo.processing.request.CompoundName; @@ -27,7 +28,7 @@ public class JSONDebugSearcher extends Searcher { private static CompoundName PROPERTYNAME = new CompoundName("dumpjson"); @Override - public Result search(com.yahoo.search.Query query, Execution execution) { + public Result search(Query query, Execution execution) { Result r = execution.search(query); String propertyName = query.properties().getString(PROPERTYNAME); if (propertyName != null) { diff --git a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java index 53e77631ff9..7e5d6b12f30 100644 --- a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java +++ b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java @@ -6,29 +6,42 @@ import com.yahoo.data.access.Inspectable; import com.yahoo.data.access.Type; import com.yahoo.data.JsonProducer; import com.yahoo.data.access.simple.JsonRender; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.serialization.JsonFormat; +import com.yahoo.tensor.serialization.TypedBinaryFormat; + +import java.nio.charset.StandardCharsets; +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; /** - * A wrapper for structured data representing feature values. + * A wrapper for structured data representing feature values: A map of floats and tensors. + * This class is not thread safe even when it is only consumed. */ public class FeatureData implements Inspectable, JsonProducer { private final Inspector value; + private Set<String> featureNames = null; + public FeatureData(Inspector value) { this.value = value; } + /** + * Returns the fields of this as an inspector, where tensors are represented as binary data + * which can be decoded using + * <code>com.yahoo.tensor.serialization.TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(featureValue.asData()))</code> + */ @Override - public Inspector inspect() { - return value; - } + public Inspector inspect() { return value; } + @Override public String toString() { - if (value.type() == Type.EMPTY) { - return ""; - } else { - return toJson(); - } + if (value.type() == Type.EMPTY) return ""; + return toJson(); } @Override @@ -38,7 +51,64 @@ public class FeatureData implements Inspectable, JsonProducer { @Override public StringBuilder writeJson(StringBuilder target) { - return JsonRender.render(value, target, true); + return JsonRender.render(value, new Encoder(target, true)); + } + + /** + * Returns the value of a scalar feature, or null if it is not present. + * + * @throws IllegalArgumentException if the value exists but isn't a scalar + * (that is, if it is a tensor with nonzero rank) + */ + public Double getDouble(String featureName) { + Inspector featureValue = value.field(featureName); + if ( ! featureValue.valid()) return null; + + switch (featureValue.type()) { + case DOUBLE: return featureValue.asDouble(); + case DATA: throw new IllegalArgumentException("Feature '" + featureName + "' is a tensor, not a double"); + default: throw new IllegalStateException("Unexpected feature value type " + featureValue.type()); + } + } + + /** + * Returns the value of a tensor feature, or null if it is not present. + * This will return any feature value: Scalars are returned as a rank 0 tensor. + */ + public Tensor getTensor(String featureName) { + Inspector featureValue = value.field(featureName); + if ( ! featureValue.valid()) return null; + + switch (featureValue.type()) { + case DOUBLE: return Tensor.from(featureValue.asDouble()); + case DATA: return TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(featureValue.asData())); + default: throw new IllegalStateException("Unexpected feature value type " + featureValue.type()); + } + } + + /** Returns the names of the features available in this */ + public Set<String> featureNames() { + if (featureNames != null) return featureNames; + + featureNames = new HashSet<>(); + value.fields().forEach(field -> featureNames.add(field.getKey())); + return featureNames; + } + + /** A JSON encoder which encodes DATA as a tensor */ + private static class Encoder extends JsonRender.StringEncoder { + + Encoder(StringBuilder out, boolean compact) { + super(out, compact); + } + + @Override + public void encodeDATA(byte[] value) { + // This could be done more efficiently ... + target().append(new String(JsonFormat.encodeWithType(TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(value))), + StandardCharsets.UTF_8)); + } + } } diff --git a/container-search/src/main/java/com/yahoo/search/result/PositionsData.java b/container-search/src/main/java/com/yahoo/search/result/PositionsData.java index 483849a5435..203e0206f1e 100644 --- a/container-search/src/main/java/com/yahoo/search/result/PositionsData.java +++ b/container-search/src/main/java/com/yahoo/search/result/PositionsData.java @@ -10,7 +10,7 @@ import com.yahoo.data.access.simple.JsonRender; /** * A wrapper for structured data representing an array of position values. - **/ + */ public class PositionsData implements Inspectable, JsonProducer, XmlProducer { private final Inspector value; diff --git a/container-search/src/test/java/com/yahoo/search/result/FeatureDataTestCase.java b/container-search/src/test/java/com/yahoo/search/result/FeatureDataTestCase.java new file mode 100644 index 00000000000..9cc7cc743fc --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/result/FeatureDataTestCase.java @@ -0,0 +1,52 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.yahoo.data.access.slime.SlimeAdapter; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Slime; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.serialization.TypedBinaryFormat; +import org.junit.Test; + +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; + +/** + * @author bratseth + */ +public class FeatureDataTestCase { + + private static final double delta = 0.00000001; + + @Test + public void testFeatureData() { + Cursor features = new Slime().setObject(); + features.setDouble("scalar1", 1.5); + features.setDouble("scalar2", 2.5); + Tensor tensor1 = Tensor.from("tensor(x[3]):[1.5, 2, 2.5]"); + features.setData("tensor1", TypedBinaryFormat.encode(tensor1)); + Tensor tensor2 = Tensor.from(0.5); + features.setData("tensor2", TypedBinaryFormat.encode(tensor2)); + + FeatureData featureData = new FeatureData(new SlimeAdapter(features)); + assertEquals("scalar1,scalar2,tensor1,tensor2", + featureData.featureNames().stream().sorted().collect(Collectors.joining(","))); + assertEquals(1.5, featureData.getDouble("scalar1"), delta); + assertEquals(2.5, featureData.getDouble("scalar2"), delta); + assertEquals(Tensor.from(1.5), featureData.getTensor("scalar1")); + assertEquals(Tensor.from(2.5), featureData.getTensor("scalar2")); + assertEquals(tensor1, featureData.getTensor("tensor1")); + assertEquals(tensor2, featureData.getTensor("tensor2")); + + String expectedJson = + "{" + + "\"scalar1\":1.5," + + "\"scalar2\":2.5," + + "\"tensor1\":{\"type\":\"tensor(x[3])\",\"cells\":[{\"address\":{\"x\":\"0\"},\"value\":1.5},{\"address\":{\"x\":\"1\"},\"value\":2.0},{\"address\":{\"x\":\"2\"},\"value\":2.5}]}," + + "\"tensor2\":{\"type\":\"tensor()\",\"cells\":[{\"address\":{},\"value\":0.5}]}" + + "}"; + assertEquals(expectedJson, featureData.toJson()); + } + +} |