summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-12-04 15:29:32 -0800
committerJon Bratseth <bratseth@yahoo-inc.com>2016-12-04 15:29:32 -0800
commit65190a02569bef23f3c0d3383e4c333f640ef292 (patch)
treec48a11e88141ba1eee7f732fcda27bf97642de27 /document
parentee6783f2201988e22ef91d1f354255599c8c0165 (diff)
Towards always typed, fully specified tensors
- Tensor addresses do not repeat dimensions. - Tensor addresses must provide a value for all dimensions. - Tensor dimensions are not serialized in JSON (but still are in binary). - Tensor types are required everywhere, except a workaround for JSON deserialization. - Tensor operations are about 50% faster. - Tensor join of two tensors in the same space is about 4000% faster.
Diffstat (limited to 'document')
-rw-r--r--document/src/main/java/com/yahoo/document/json/JsonReader.java54
-rw-r--r--document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java2
-rw-r--r--document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java8
-rw-r--r--document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java10
-rw-r--r--document/src/test/java/com/yahoo/document/serialization/TensorFieldValueSerializationTestCase.java4
-rw-r--r--document/src/test/resources/tensor/multi_cell_tensor__cppbin105 -> 107 bytes
-rw-r--r--document/src/test/resources/tensor/multi_cell_tensor__javabin105 -> 107 bytes
7 files changed, 51 insertions, 27 deletions
diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java
index 5774f9258ea..6011ee59176 100644
--- a/document/src/main/java/com/yahoo/document/json/JsonReader.java
+++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java
@@ -6,6 +6,7 @@ import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.google.common.annotations.Beta;
import com.google.common.base.Preconditions;
+import com.yahoo.collections.Pair;
import com.yahoo.document.ArrayDataType;
import com.yahoo.document.CollectionDataType;
import com.yahoo.document.DataType;
@@ -34,6 +35,7 @@ import com.yahoo.document.update.FieldUpdate;
import com.yahoo.document.update.MapValueUpdate;
import com.yahoo.document.update.ValueUpdate;
import com.yahoo.tensor.MapTensorBuilder;
+import com.yahoo.tensor.TensorType;
import org.apache.commons.codec.binary.Base64;
import java.io.IOException;
@@ -572,8 +574,7 @@ public class JsonReader {
}
}
- private void fillWeightedSet(DataType valueType,
- @SuppressWarnings("rawtypes") WeightedSet weightedSet) {
+ private void fillWeightedSet(DataType valueType, @SuppressWarnings("rawtypes") WeightedSet weightedSet) {
int initNesting = buffer.nesting();
expectObjectStart(buffer.currentToken());
buffer.next();
@@ -583,39 +584,69 @@ public class JsonReader {
private void fillTensor(TensorFieldValue tensorFieldValue) {
expectObjectStart(buffer.currentToken());
int initNesting = buffer.nesting();
- MapTensorBuilder tensorBuilder = new MapTensorBuilder();
+ MapTensorBuilder tensorBuilder = null;
// read tensor cell fields and ignore everything else
for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) {
if (TENSOR_CELLS.equals(buffer.currentName()))
- readTensorCells(tensorBuilder);
+ tensorBuilder = readTensorCells(tensorBuilder);
}
expectObjectEnd(buffer.currentToken());
+ if (tensorBuilder == null) // no cells + no type: empty tensor type
+ tensorBuilder = new MapTensorBuilder(TensorType.empty);
tensorFieldValue.assign(tensorBuilder.build());
}
- private void readTensorCells(MapTensorBuilder tensorBuilder) {
+ private MapTensorBuilder readTensorCells(MapTensorBuilder tensorBuilder) {
expectArrayStart(buffer.currentToken());
int initNesting = buffer.nesting();
for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) {
- readTensorCell(tensorBuilder.cell());
+ tensorBuilder = readTensorCell(tensorBuilder);
}
expectCompositeEnd(buffer.currentToken());
+ return tensorBuilder;
}
- private void readTensorCell(MapTensorBuilder.CellBuilder cellBuilder) {
+ private MapTensorBuilder readTensorCell(MapTensorBuilder tensorBuilder) {
expectObjectStart(buffer.currentToken());
int initNesting = buffer.nesting();
double cellValue = 0.0;
+ MapTensorBuilder.CellBuilder cellBuilder = null;
for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) {
String currentName = buffer.currentName();
if (TENSOR_ADDRESS.equals(currentName)) {
- readTensorAddress(cellBuilder);
+ if (tensorBuilder != null) {
+ cellBuilder = tensorBuilder.cell();
+ readTensorAddress(cellBuilder);
+ }
+ else { // gnarly temporary path to create a type on the fly TODO; Remove when we always have a type
+ expectObjectStart(buffer.currentToken());
+ int initNesting2 = buffer.nesting();
+ List<Pair<String,String>> entries = new ArrayList<>();
+ for (buffer.next(); buffer.nesting() >= initNesting2; buffer.next()) {
+ String dimension = buffer.currentName();
+ String label = buffer.currentText();
+ entries.add(new Pair<>(dimension, label));
+ }
+ TensorType.Builder typeBuilder = new TensorType.Builder();
+ for (Pair<String,String> entry : entries)
+ typeBuilder.mapped(entry.getFirst());
+ tensorBuilder = new MapTensorBuilder(typeBuilder.build());
+ cellBuilder = tensorBuilder.cell();
+ for (Pair<String,String> entry : entries)
+ cellBuilder.label(entry.getFirst(), entry.getSecond());
+ expectObjectEnd(buffer.currentToken());
+ }
} else if (TENSOR_VALUE.equals(currentName)) {
cellValue = Double.valueOf(buffer.currentText());
}
}
expectObjectEnd(buffer.currentToken());
+ if (tensorBuilder == null) { // no content TODO; This will go away with the above
+ tensorBuilder = new MapTensorBuilder(TensorType.empty);
+ cellBuilder = tensorBuilder.cell();
+ }
cellBuilder.value(cellValue);
+ return tensorBuilder;
}
private void readTensorAddress(MapTensorBuilder.CellBuilder cellBuilder) {
@@ -653,12 +684,7 @@ public class JsonReader {
buffer.bufferObject(current, parser);
}
- private boolean jsonTokenIsBooleanOrString(JsonToken jsonToken) {
- return jsonToken == JsonToken.VALUE_STRING || jsonToken == JsonToken.VALUE_TRUE || jsonToken == JsonToken.VALUE_FALSE;
- }
-
Optional<DocumentParseInfo> parseDocument() {
- Optional<Boolean> create = Optional.empty();
// we should now be at the start of a feed operation or at the end of the feed
JsonToken token = nextToken();
if (token == JsonToken.END_ARRAY) {
@@ -672,7 +698,7 @@ public class JsonReader {
try {
token = nextToken();
if ((token == JsonToken.VALUE_TRUE || token == JsonToken.VALUE_FALSE) &&
- CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) {
+ CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) {
documentParseInfo.create = Optional.of(token == JsonToken.VALUE_TRUE);
continue;
}
diff --git a/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java b/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java
index 182150167a4..ffec7927ab3 100644
--- a/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java
+++ b/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java
@@ -240,7 +240,7 @@ public class DocumentUpdateJsonSerializerTest {
" 'assign': {",
" 'cells': [",
" { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 },",
- " { 'address': { 'x': 'c' }, 'value': 3.0 }",
+ " { 'address': { 'x': 'c', 'y': 'b' }, 'value': 3.0 }",
" ]",
" }",
" }",
diff --git a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java
index f727daec24f..2c455658528 100644
--- a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java
+++ b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java
@@ -1049,12 +1049,12 @@ public class JsonReaderTestCase {
@Test
public void testParsingOfTensorWithCells() {
- assertTensorField("{{x:a,y:b}:2.0,{x:c}:3.0}}",
+ assertTensorField("{{x:a,y:b}:2.0,{x:c,y:b}:3.0}}",
createPutWithTensor("{ "
+ " \"cells\": [ "
+ " { \"address\": { \"x\": \"a\", \"y\": \"b\" }, "
+ " \"value\": 2.0 }, "
- + " { \"address\": { \"x\": \"c\" }, "
+ + " { \"address\": { \"x\": \"c\", \"y\": \"b\" }, "
+ " \"value\": 3.0 } "
+ " ]"
+ "}"));
@@ -1105,12 +1105,12 @@ public class JsonReaderTestCase {
@Test
public void testAssignUpdateOfTensorWithCells() {
- assertTensorAssignUpdate("{{x:a,y:b}:2.0,{x:c}:3.0}}",
+ assertTensorAssignUpdate("{{x:a,y:b}:2.0,{x:c,y:b}:3.0}}",
createAssignUpdateWithTensor("{ "
+ " \"cells\": [ "
+ " { \"address\": { \"x\": \"a\", \"y\": \"b\" }, "
+ " \"value\": 2.0 }, "
- + " { \"address\": { \"x\": \"c\" }, "
+ + " { \"address\": { \"x\": \"c\", \"y\": \"b\" }, "
+ " \"value\": 3.0 } "
+ " ]"
+ "}"));
diff --git a/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java
index 43b1f3cafe4..171676be694 100644
--- a/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java
+++ b/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java
@@ -260,10 +260,8 @@ public class JsonWriterTestCase {
assertEquals(populateMap(inputMap), populateMap(generatedMap));
}
- private Document readDocumentFromJson(final String docId,
- final String fields) {
- InputStream rawDoc = new ByteArrayInputStream(asFeed(
- docId, fields));
+ private Document readDocumentFromJson(String docId, String fields) {
+ InputStream rawDoc = new ByteArrayInputStream(asFeed(docId, fields));
JsonReader r = new JsonReader(types, rawDoc, parserFactory);
JsonReader.DocumentParseInfo raw = r.parseDocument().get();
DocumentType docType = r.readDocumentType(raw.documentId);
@@ -322,14 +320,14 @@ public class JsonWriterTestCase {
+ " \"cells\": [ "
+ " { \"address\": { \"x\": \"a\", \"y\": \"b\" }, "
+ " \"value\": 2.0 }, "
- + " { \"address\": { \"x\": \"c\" }, "
+ + " { \"address\": { \"x\": \"c\", \"y\": \"b\" }, "
+ " \"value\": 3.0 } "
+ " ]"
+ "}", "{ "
+ " \"cells\": [ "
+ " { \"address\": { \"x\": \"a\", \"y\": \"b\" }, "
+ " \"value\": 2.0 }, "
- + " { \"address\": { \"x\": \"c\" }, "
+ + " { \"address\": { \"x\": \"c\", \"y\": \"b\" }, "
+ " \"value\": 3.0 } "
+ " ]"
+ "}");
diff --git a/document/src/test/java/com/yahoo/document/serialization/TensorFieldValueSerializationTestCase.java b/document/src/test/java/com/yahoo/document/serialization/TensorFieldValueSerializationTestCase.java
index f0334ee7e4f..a170e388896 100644
--- a/document/src/test/java/com/yahoo/document/serialization/TensorFieldValueSerializationTestCase.java
+++ b/document/src/test/java/com/yahoo/document/serialization/TensorFieldValueSerializationTestCase.java
@@ -34,7 +34,7 @@ public class TensorFieldValueSerializationTestCase {
public void requireThatTensorFieldValueIsSerializedAndDeserialized() {
assertSerialization(new TensorFieldValue());
assertSerialization(createTensor("{}"));
- assertSerialization(createTensor("{{dimX:a,dimY:bb}:2.0,{dimX:ccc,dimY:dddd}:3.0,{dimX:e}:5.0}"));
+ assertSerialization(createTensor("{{dimX:a,dimY:bb}:2.0,{dimX:ccc,dimY:dddd}:3.0,{dimX:e,dimY:ff}:5.0}"));
}
@Test
@@ -42,7 +42,7 @@ public class TensorFieldValueSerializationTestCase {
assertSerializationMatchesCpp("non_existing_tensor", new TensorFieldValue());
assertSerializationMatchesCpp("empty_tensor", createTensor("{}"));
assertSerializationMatchesCpp("multi_cell_tensor",
- createTensor("{{dimX:a,dimY:bb}:2.0,{dimX:ccc,dimY:dddd}:3.0,{dimX:e}:5.0}"));
+ createTensor("{{dimX:a,dimY:bb}:2.0,{dimX:ccc,dimY:dddd}:3.0,{dimX:e,dimY:ff}:5.0}"));
}
private static void assertSerialization(TensorFieldValue tensor) {
diff --git a/document/src/test/resources/tensor/multi_cell_tensor__cpp b/document/src/test/resources/tensor/multi_cell_tensor__cpp
index cda97503f15..d923fc10559 100644
--- a/document/src/test/resources/tensor/multi_cell_tensor__cpp
+++ b/document/src/test/resources/tensor/multi_cell_tensor__cpp
Binary files differ
diff --git a/document/src/test/resources/tensor/multi_cell_tensor__java b/document/src/test/resources/tensor/multi_cell_tensor__java
index a202c1a09ab..d923fc10559 100644
--- a/document/src/test/resources/tensor/multi_cell_tensor__java
+++ b/document/src/test/resources/tensor/multi_cell_tensor__java
Binary files differ