diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-12-04 15:29:32 -0800 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-12-04 15:29:32 -0800 |
commit | 65190a02569bef23f3c0d3383e4c333f640ef292 (patch) | |
tree | c48a11e88141ba1eee7f732fcda27bf97642de27 /document | |
parent | ee6783f2201988e22ef91d1f354255599c8c0165 (diff) |
Towards always typed, fully specified tensors
- Tensor addresses do not repeat dimensions.
- Tensor addresses must provide a value for all dimensions.
- Tensor dimensions are not serialized in JSON (but still are in binary).
- Tensor types are required everywhere, except a workaround for JSON deserialization.
- Tensor operations are about 50% faster.
- Tensor join of two tensors in the same space is about 4000% faster.
Diffstat (limited to 'document')
7 files changed, 51 insertions, 27 deletions
diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java index 5774f9258ea..6011ee59176 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonReader.java +++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java @@ -6,6 +6,7 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.google.common.annotations.Beta; import com.google.common.base.Preconditions; +import com.yahoo.collections.Pair; import com.yahoo.document.ArrayDataType; import com.yahoo.document.CollectionDataType; import com.yahoo.document.DataType; @@ -34,6 +35,7 @@ import com.yahoo.document.update.FieldUpdate; import com.yahoo.document.update.MapValueUpdate; import com.yahoo.document.update.ValueUpdate; import com.yahoo.tensor.MapTensorBuilder; +import com.yahoo.tensor.TensorType; import org.apache.commons.codec.binary.Base64; import java.io.IOException; @@ -572,8 +574,7 @@ public class JsonReader { } } - private void fillWeightedSet(DataType valueType, - @SuppressWarnings("rawtypes") WeightedSet weightedSet) { + private void fillWeightedSet(DataType valueType, @SuppressWarnings("rawtypes") WeightedSet weightedSet) { int initNesting = buffer.nesting(); expectObjectStart(buffer.currentToken()); buffer.next(); @@ -583,39 +584,69 @@ public class JsonReader { private void fillTensor(TensorFieldValue tensorFieldValue) { expectObjectStart(buffer.currentToken()); int initNesting = buffer.nesting(); - MapTensorBuilder tensorBuilder = new MapTensorBuilder(); + MapTensorBuilder tensorBuilder = null; // read tensor cell fields and ignore everything else for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) { if (TENSOR_CELLS.equals(buffer.currentName())) - readTensorCells(tensorBuilder); + tensorBuilder = readTensorCells(tensorBuilder); } expectObjectEnd(buffer.currentToken()); + if (tensorBuilder == null) // no cells + no type: empty tensor type + tensorBuilder = new MapTensorBuilder(TensorType.empty); tensorFieldValue.assign(tensorBuilder.build()); } - private void readTensorCells(MapTensorBuilder tensorBuilder) { + private MapTensorBuilder readTensorCells(MapTensorBuilder tensorBuilder) { expectArrayStart(buffer.currentToken()); int initNesting = buffer.nesting(); for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) { - readTensorCell(tensorBuilder.cell()); + tensorBuilder = readTensorCell(tensorBuilder); } expectCompositeEnd(buffer.currentToken()); + return tensorBuilder; } - private void readTensorCell(MapTensorBuilder.CellBuilder cellBuilder) { + private MapTensorBuilder readTensorCell(MapTensorBuilder tensorBuilder) { expectObjectStart(buffer.currentToken()); int initNesting = buffer.nesting(); double cellValue = 0.0; + MapTensorBuilder.CellBuilder cellBuilder = null; for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) { String currentName = buffer.currentName(); if (TENSOR_ADDRESS.equals(currentName)) { - readTensorAddress(cellBuilder); + if (tensorBuilder != null) { + cellBuilder = tensorBuilder.cell(); + readTensorAddress(cellBuilder); + } + else { // gnarly temporary path to create a type on the fly TODO; Remove when we always have a type + expectObjectStart(buffer.currentToken()); + int initNesting2 = buffer.nesting(); + List<Pair<String,String>> entries = new ArrayList<>(); + for (buffer.next(); buffer.nesting() >= initNesting2; buffer.next()) { + String dimension = buffer.currentName(); + String label = buffer.currentText(); + entries.add(new Pair<>(dimension, label)); + } + TensorType.Builder typeBuilder = new TensorType.Builder(); + for (Pair<String,String> entry : entries) + typeBuilder.mapped(entry.getFirst()); + tensorBuilder = new MapTensorBuilder(typeBuilder.build()); + cellBuilder = tensorBuilder.cell(); + for (Pair<String,String> entry : entries) + cellBuilder.label(entry.getFirst(), entry.getSecond()); + expectObjectEnd(buffer.currentToken()); + } } else if (TENSOR_VALUE.equals(currentName)) { cellValue = Double.valueOf(buffer.currentText()); } } expectObjectEnd(buffer.currentToken()); + if (tensorBuilder == null) { // no content TODO; This will go away with the above + tensorBuilder = new MapTensorBuilder(TensorType.empty); + cellBuilder = tensorBuilder.cell(); + } cellBuilder.value(cellValue); + return tensorBuilder; } private void readTensorAddress(MapTensorBuilder.CellBuilder cellBuilder) { @@ -653,12 +684,7 @@ public class JsonReader { buffer.bufferObject(current, parser); } - private boolean jsonTokenIsBooleanOrString(JsonToken jsonToken) { - return jsonToken == JsonToken.VALUE_STRING || jsonToken == JsonToken.VALUE_TRUE || jsonToken == JsonToken.VALUE_FALSE; - } - Optional<DocumentParseInfo> parseDocument() { - Optional<Boolean> create = Optional.empty(); // we should now be at the start of a feed operation or at the end of the feed JsonToken token = nextToken(); if (token == JsonToken.END_ARRAY) { @@ -672,7 +698,7 @@ public class JsonReader { try { token = nextToken(); if ((token == JsonToken.VALUE_TRUE || token == JsonToken.VALUE_FALSE) && - CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { + CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { documentParseInfo.create = Optional.of(token == JsonToken.VALUE_TRUE); continue; } diff --git a/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java b/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java index 182150167a4..ffec7927ab3 100644 --- a/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java +++ b/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java @@ -240,7 +240,7 @@ public class DocumentUpdateJsonSerializerTest { " 'assign': {", " 'cells': [", " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 },", - " { 'address': { 'x': 'c' }, 'value': 3.0 }", + " { 'address': { 'x': 'c', 'y': 'b' }, 'value': 3.0 }", " ]", " }", " }", diff --git a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java index f727daec24f..2c455658528 100644 --- a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java +++ b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java @@ -1049,12 +1049,12 @@ public class JsonReaderTestCase { @Test public void testParsingOfTensorWithCells() { - assertTensorField("{{x:a,y:b}:2.0,{x:c}:3.0}}", + assertTensorField("{{x:a,y:b}:2.0,{x:c,y:b}:3.0}}", createPutWithTensor("{ " + " \"cells\": [ " + " { \"address\": { \"x\": \"a\", \"y\": \"b\" }, " + " \"value\": 2.0 }, " - + " { \"address\": { \"x\": \"c\" }, " + + " { \"address\": { \"x\": \"c\", \"y\": \"b\" }, " + " \"value\": 3.0 } " + " ]" + "}")); @@ -1105,12 +1105,12 @@ public class JsonReaderTestCase { @Test public void testAssignUpdateOfTensorWithCells() { - assertTensorAssignUpdate("{{x:a,y:b}:2.0,{x:c}:3.0}}", + assertTensorAssignUpdate("{{x:a,y:b}:2.0,{x:c,y:b}:3.0}}", createAssignUpdateWithTensor("{ " + " \"cells\": [ " + " { \"address\": { \"x\": \"a\", \"y\": \"b\" }, " + " \"value\": 2.0 }, " - + " { \"address\": { \"x\": \"c\" }, " + + " { \"address\": { \"x\": \"c\", \"y\": \"b\" }, " + " \"value\": 3.0 } " + " ]" + "}")); diff --git a/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java index 43b1f3cafe4..171676be694 100644 --- a/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java +++ b/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java @@ -260,10 +260,8 @@ public class JsonWriterTestCase { assertEquals(populateMap(inputMap), populateMap(generatedMap)); } - private Document readDocumentFromJson(final String docId, - final String fields) { - InputStream rawDoc = new ByteArrayInputStream(asFeed( - docId, fields)); + private Document readDocumentFromJson(String docId, String fields) { + InputStream rawDoc = new ByteArrayInputStream(asFeed(docId, fields)); JsonReader r = new JsonReader(types, rawDoc, parserFactory); JsonReader.DocumentParseInfo raw = r.parseDocument().get(); DocumentType docType = r.readDocumentType(raw.documentId); @@ -322,14 +320,14 @@ public class JsonWriterTestCase { + " \"cells\": [ " + " { \"address\": { \"x\": \"a\", \"y\": \"b\" }, " + " \"value\": 2.0 }, " - + " { \"address\": { \"x\": \"c\" }, " + + " { \"address\": { \"x\": \"c\", \"y\": \"b\" }, " + " \"value\": 3.0 } " + " ]" + "}", "{ " + " \"cells\": [ " + " { \"address\": { \"x\": \"a\", \"y\": \"b\" }, " + " \"value\": 2.0 }, " - + " { \"address\": { \"x\": \"c\" }, " + + " { \"address\": { \"x\": \"c\", \"y\": \"b\" }, " + " \"value\": 3.0 } " + " ]" + "}"); diff --git a/document/src/test/java/com/yahoo/document/serialization/TensorFieldValueSerializationTestCase.java b/document/src/test/java/com/yahoo/document/serialization/TensorFieldValueSerializationTestCase.java index f0334ee7e4f..a170e388896 100644 --- a/document/src/test/java/com/yahoo/document/serialization/TensorFieldValueSerializationTestCase.java +++ b/document/src/test/java/com/yahoo/document/serialization/TensorFieldValueSerializationTestCase.java @@ -34,7 +34,7 @@ public class TensorFieldValueSerializationTestCase { public void requireThatTensorFieldValueIsSerializedAndDeserialized() { assertSerialization(new TensorFieldValue()); assertSerialization(createTensor("{}")); - assertSerialization(createTensor("{{dimX:a,dimY:bb}:2.0,{dimX:ccc,dimY:dddd}:3.0,{dimX:e}:5.0}")); + assertSerialization(createTensor("{{dimX:a,dimY:bb}:2.0,{dimX:ccc,dimY:dddd}:3.0,{dimX:e,dimY:ff}:5.0}")); } @Test @@ -42,7 +42,7 @@ public class TensorFieldValueSerializationTestCase { assertSerializationMatchesCpp("non_existing_tensor", new TensorFieldValue()); assertSerializationMatchesCpp("empty_tensor", createTensor("{}")); assertSerializationMatchesCpp("multi_cell_tensor", - createTensor("{{dimX:a,dimY:bb}:2.0,{dimX:ccc,dimY:dddd}:3.0,{dimX:e}:5.0}")); + createTensor("{{dimX:a,dimY:bb}:2.0,{dimX:ccc,dimY:dddd}:3.0,{dimX:e,dimY:ff}:5.0}")); } private static void assertSerialization(TensorFieldValue tensor) { diff --git a/document/src/test/resources/tensor/multi_cell_tensor__cpp b/document/src/test/resources/tensor/multi_cell_tensor__cpp Binary files differindex cda97503f15..d923fc10559 100644 --- a/document/src/test/resources/tensor/multi_cell_tensor__cpp +++ b/document/src/test/resources/tensor/multi_cell_tensor__cpp diff --git a/document/src/test/resources/tensor/multi_cell_tensor__java b/document/src/test/resources/tensor/multi_cell_tensor__java Binary files differindex a202c1a09ab..d923fc10559 100644 --- a/document/src/test/resources/tensor/multi_cell_tensor__java +++ b/document/src/test/resources/tensor/multi_cell_tensor__java |