From c93826132f9256ce5597659a521494e6a3370a6c Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Fri, 7 Jun 2019 17:28:12 +0200 Subject: Dense string form --- .../java/com/yahoo/tensor/IndexedDoubleTensor.java | 8 +- .../main/java/com/yahoo/tensor/IndexedTensor.java | 7 +- .../src/main/java/com/yahoo/tensor/Tensor.java | 6 +- .../main/java/com/yahoo/tensor/TensorParser.java | 160 ++++++++++++++++----- .../java/com/yahoo/tensor/TensorTypeParser.java | 5 +- .../com/yahoo/tensor/TensorParserTestCase.java | 47 +++++- .../test/java/com/yahoo/tensor/TensorTestCase.java | 2 +- 7 files changed, 187 insertions(+), 48 deletions(-) (limited to 'vespajlib') diff --git a/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java index 7f1351cc42b..219a3fa2278 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java @@ -108,7 +108,13 @@ class IndexedDoubleTensor extends IndexedTensor { @Override public void cellByDirectIndex(long index, double value) { - values[(int)index] = value; + try { + values[(int) index] = value; + } + catch (IndexOutOfBoundsException e) { + throw new IllegalArgumentException("Can not set the cell at position " + index + " in a tensor " + + "of type " + type + ": Index is too large"); + } } } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java index aeb3da8ac40..aca2bfc1b0f 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java @@ -16,7 +16,7 @@ import java.util.Set; import java.util.function.DoubleBinaryOperator; /** - * An indexed (dense) tensor backed by a double array. + * An indexed (dense) tensor backed by an array. * * @author bratseth */ @@ -143,9 +143,8 @@ public abstract class IndexedTensor implements Tensor { long valueIndex = 0; for (int i = 0; i < indexes.length; i++) { - if (indexes[i] >= sizes.size(i)) { - throw new IllegalArgumentException(indexes + " are not within bounds"); - } + if (indexes[i] >= sizes.size(i)) + throw new IllegalArgumentException(Arrays.toString(indexes) + " are not within bounds"); valueIndex += productOfDimensionsAfter(i, sizes) * indexes[i]; } return valueIndex; diff --git a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java index 22ff793e6fa..c2aa155d6bb 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java @@ -333,7 +333,7 @@ public interface Tensor { } else { x = Math.nextAfter(x, y); } - return x==y; + return x == y; } // ----------------- Factories @@ -367,9 +367,7 @@ public interface Tensor { return TensorParser.tensorFrom(tensorString, Optional.empty()); } - /** - * Returns a double as a tensor: A dimensionless tensor containing the value as its cell - */ + /** Returns a double as a tensor: A dimensionless tensor containing the value as its cell */ static Tensor from(double value) { return Tensor.Builder.of(TensorType.empty).cell(value).build(); } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorParser.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorParser.java index 45a9992c9ad..4d9bb258423 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorParser.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorParser.java @@ -8,44 +8,59 @@ import java.util.Optional; */ class TensorParser { - static Tensor tensorFrom(String tensorString, Optional type) { + static Tensor tensorFrom(String tensorString, Optional explicitType) { + Optional type; + String valueString; + tensorString = tensorString.trim(); - try { - if (tensorString.startsWith("tensor")) { - int colonIndex = tensorString.indexOf(':'); - String typeString = tensorString.substring(0, colonIndex); - String valueString = tensorString.substring(colonIndex + 1); - TensorType typeFromString = TensorTypeParser.fromSpec(typeString); - if (type.isPresent() && ! type.get().equals(typeFromString)) - throw new IllegalArgumentException("Got tensor with type string '" + typeString + "', but was " + - "passed type " + type.get()); - return tensorFromValueString(valueString, typeFromString); - } - else if (tensorString.startsWith("{")) { - return tensorFromValueString(tensorString, type.orElse(typeFromValueString(tensorString))); - } - else { - if (type.isPresent() && ! type.get().equals(TensorType.empty)) - throw new IllegalArgumentException("Got zero-dimensional tensor '" + tensorString + - "' where type " + type.get() + " is required"); + if (tensorString.startsWith("tensor")) { + int colonIndex = tensorString.indexOf(':'); + String typeString = tensorString.substring(0, colonIndex); + TensorType typeFromString = TensorTypeParser.fromSpec(typeString); + if (explicitType.isPresent() && ! explicitType.get().equals(typeFromString)) + throw new IllegalArgumentException("Got tensor with type string '" + typeString + "', but was " + + "passed type " + explicitType.get()); + type = Optional.of(typeFromString); + valueString = tensorString.substring(colonIndex + 1); + } + else { + type = explicitType; + valueString = tensorString; + } + + valueString = valueString.trim(); + if (valueString.startsWith("{")) { + return tensorFromSparseValueString(valueString, type); + } + else if (valueString.startsWith("[")) { + return tensorFromDenseValueString(valueString, type); + } + else { + if (explicitType.isPresent() && ! explicitType.get().equals(TensorType.empty)) + throw new IllegalArgumentException("Got a zero-dimensional tensor value ('" + tensorString + + "') where type " + explicitType.get() + " is required"); + try { return Tensor.Builder.of(TensorType.empty).cell(Double.parseDouble(tensorString)).build(); } - } - catch (NumberFormatException e) { - throw new IllegalArgumentException("Excepted a number or a string starting by { or tensor(, got '" + - tensorString + "'"); + catch (NumberFormatException e) { + throw new IllegalArgumentException("Excepted a number or a string starting by {, [ or tensor(...):, got '" + + tensorString + "'"); + } } } - /** Derive the tensor type from the first address string in the given tensor string */ - private static TensorType typeFromValueString(String s) { - s = s.substring(1).trim(); // remove tensor start + /** Derives the tensor type from the first address string in the given tensor string */ + private static TensorType typeFromSparseValueString(String valueString) { + String s = valueString.substring(1).trim(); // remove tensor start int firstKeyOrTensorEnd = s.indexOf('}'); + if (firstKeyOrTensorEnd < 0) + throw new IllegalArgumentException("Excepted a number or a string starting by {, [ or tensor(...):, got '" + + valueString + "'"); String addressBody = s.substring(0, firstKeyOrTensorEnd).trim(); if (addressBody.isEmpty()) return TensorType.empty; // Empty tensor if ( ! addressBody.startsWith("{")) return TensorType.empty; // Single value tensor - addressBody = addressBody.substring(1); // remove key start + addressBody = addressBody.substring(1, addressBody.length()); // remove key start if (addressBody.isEmpty()) return TensorType.empty; // Empty key TensorType.Builder builder = new TensorType.Builder(TensorType.Value.DOUBLE); @@ -60,19 +75,94 @@ class TensorParser { return builder.build(); } - private static Tensor tensorFromValueString(String tensorValueString, TensorType type) { - Tensor.Builder builder = Tensor.Builder.of(type); - tensorValueString = tensorValueString.trim(); + private static Tensor tensorFromSparseValueString(String valueString, Optional type) { try { - if (tensorValueString.startsWith("{")) - return fromCellString(builder, tensorValueString); - else - return builder.cell(Double.parseDouble(tensorValueString)).build(); + valueString = valueString.trim(); + Tensor.Builder builder = Tensor.Builder.of(type.orElse(typeFromSparseValueString(valueString))); + return fromCellString(builder, valueString); } catch (NumberFormatException e) { throw new IllegalArgumentException("Excepted a number or a string starting by { or tensor(, got '" + - tensorValueString + "'"); + valueString + "'"); + } + } + + private static Tensor tensorFromDenseValueString(String valueString, Optional type) { + if (type.isEmpty()) + throw new IllegalArgumentException("The dense tensor form requires an explicit tensor type " + + "on the form 'tensor(dimensions):..."); + if (type.get().dimensions().stream().anyMatch(d -> ( d.size().isEmpty()))) + throw new IllegalArgumentException("The dense tensor form requires a tensor type containing " + + "only dense dimensions with a given size"); + IndexedTensor.BoundBuilder builder = (IndexedTensor.BoundBuilder)IndexedTensor.Builder.of(type.get()); + + // Since we know the dimensions the brackets are just syntactic sugar + long[] indexes = new long[builder.type().rank()]; + int currentChar; + int nextNumberEnd = 0; + while ((currentChar = nextStartCharIndex(nextNumberEnd + 1, valueString)) < valueString.length()) { + nextNumberEnd = nextStopCharIndex(currentChar, valueString); + if (currentChar == nextNumberEnd) return builder.build(); + + if (builder.type().valueType() == TensorType.Value.DOUBLE) + builder.cellByDirectIndex(nextCellIndex(indexes, builder), Double.parseDouble(valueString.substring(currentChar, nextNumberEnd))); + else if (builder.type().valueType() == TensorType.Value.FLOAT) + builder.cellByDirectIndex(nextCellIndex(indexes, builder), Float.parseFloat(valueString.substring(currentChar, nextNumberEnd))); + else + throw new IllegalArgumentException(builder.type().valueType() + " is not supported"); + } + return builder.build(); + } + + // ----- + + /** + * Advance to the next cell in left-adjac ent order. + * + * On rightmost vs. leftmost adjacency: + * A dense tensor is laid out with the rightmost dimension as adjacent numbers, + * but when we parse a dense tensor we encounter numbers in the leftmost-adjacent order, since + * that is the most natural way to write it: tensor(x,y)[[1,2],[3,4]] + * should mean {{x:0, y:0}:1, {x:1, y:0}:2, {x:0, y:1}:3, {x:1, y:1}:4}. + * Therefore we need to convert the encounter order (numberIndex) from left-adjacent to right-adjacent. + */ + private static long nextCellIndex(long[] indexes, IndexedTensor.BoundBuilder builder) { + long cellIndex = IndexedTensor.toValueIndex(indexes, builder.sizes()); + + // Find next dimension to advance + int nextInDimension = 0; + while (nextInDimension < indexes.length && indexes[nextInDimension] + 1 >= builder.sizes().size(nextInDimension)) { + indexes[nextInDimension] = 0; + nextInDimension++; + } + if (nextInDimension < indexes.length) + indexes[nextInDimension]++; + else // there is no next - become invalid + indexes[0]++; + + return cellIndex; + } + + /** Returns the position of the next character that should contain a number, or if none the string length */ + private static int nextStartCharIndex(int charIndex, String valueString) { + for (; charIndex < valueString.length(); charIndex++) { + if (valueString.charAt(charIndex) == ']') continue; + if (valueString.charAt(charIndex) == '[') continue; + if (valueString.charAt(charIndex) == ',') continue; + if (valueString.charAt(charIndex) == ' ') continue; + return charIndex; + } + return valueString.length(); + } + + private static int nextStopCharIndex(int charIndex, String valueString) { + while (charIndex < valueString.length()) { + if (valueString.charAt(charIndex) == ',') return charIndex; + if (valueString.charAt(charIndex) == ']') return charIndex; + charIndex++; } + throw new IllegalArgumentException("Malformed tensor value '" + valueString + + "': Expected a ',' or ']' after position " + charIndex); } private static Tensor fromCellString(Tensor.Builder builder, String s) { diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorTypeParser.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorTypeParser.java index d5f77be0dd0..1f426942c5f 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorTypeParser.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorTypeParser.java @@ -24,6 +24,7 @@ public class TensorTypeParser { private static final Pattern mappedPattern = Pattern.compile("(\\w+)\\{\\}"); public static TensorType fromSpec(String specString) { + specString = specString.trim(); if ( ! specString.startsWith(START_STRING) || ! specString.endsWith(END_STRING)) throw formatException(specString); String specBody = specString.substring(START_STRING.length(), specString.length() - END_STRING.length()); @@ -112,9 +113,9 @@ public class TensorTypeParser { private static IllegalArgumentException formatException(String spec, Optional errorDetail) { throw new IllegalArgumentException("A tensor type spec must be on the form " + - "tensor[]?(dimensionidentifier[{}|[length?]*), but was '" + spec + "'. " + + "tensor[]?(dimensionidentifier[{}|[length]*), but was '" + spec + "'. " + errorDetail.map(s -> s + ". ").orElse("") + - "Examples: tensor(x[]), tensor(name{}, x[10])"); + "Examples: tensor(x[3]), tensor(name{}, x[10])"); } } diff --git a/vespajlib/src/test/java/com/yahoo/tensor/TensorParserTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/TensorParserTestCase.java index 04ea118280c..313cca833f1 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/TensorParserTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/TensorParserTestCase.java @@ -9,13 +9,58 @@ import static org.junit.Assert.fail; public class TensorParserTestCase { @Test - public void testParsing() { + public void testSparseParsing() { assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor()")).build(), Tensor.from("{}")); assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x{})")).cell(1.0, 0).build(), Tensor.from("{{x:0}:1.0}")); assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x{})")).cell().label("x", "l0").value(1.0).build(), Tensor.from("{{x:l0}:1.0}")); + assertEquals("If the type is specified, a dense tensor can be created from the sparse text form", + Tensor.Builder.of(TensorType.fromSpec("tensor(x[1])")).cell(1.0, 0).build(), + Tensor.from("tensor(x[1]):{{x:0}:1.0}")); + } + + @Test + public void testDenseParsing() { + assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor()")).build(), + Tensor.from("tensor():[]")); + assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[1])")).cell(1.0, 0).build(), + Tensor.from("tensor(x[1]):[1.0]")); + assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[2])")).cell(1.0, 0).cell(2.0, 1).build(), + Tensor.from("tensor(x[2]):[1.0, 2.0]")); + assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[2],y[3])")) + .cell(1.0, 0, 0) + .cell(2.0, 1, 0) + .cell(3.0, 0, 1) + .cell(4.0, 1, 1) + .cell(5.0, 0, 2) + .cell(6.0, 1, 2).build(), + Tensor.from("tensor(x[2],y[3]):[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]")); + assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[1],y[2],z[3])")) + .cell(1.0, 0, 0, 0) + .cell(2.0, 0, 1, 0) + .cell(3.0, 0, 0, 1) + .cell(4.0, 0, 1, 1) + .cell(5.0, 0, 0, 2) + .cell(6.0, 0, 1, 2).build(), + Tensor.from("tensor(x[1],y[2],z[3]):[[[1.0], [2.0]], [[3.0], [4.0]], [[5.0], [6.0]]]")); + assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[3],y[2],z[1])")) + .cell(1.0, 0, 0, 0) + .cell(2.0, 1, 0, 0) + .cell(3.0, 2, 0, 0) + .cell(4.0, 0, 1, 0) + .cell(5.0, 1, 1, 0) + .cell(6.0, 2, 1, 0).build(), + Tensor.from("tensor(x[3],y[2],z[1]):[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]]")); + assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[3],y[2],z[1])")) + .cell(1.0, 0, 0, 0) + .cell(2.0, 1, 0, 0) + .cell(3.0, 2, 0, 0) + .cell(4.0, 0, 1, 0) + .cell(5.0, 1, 1, 0) + .cell(6.0, 2, 1, 0).build(), + Tensor.from("tensor( x[3],y[2],z[1]) : [ [ [1.0, 2.0, 3.0] , [4.0, 5,6.0] ] ]")); } @Test diff --git a/vespajlib/src/test/java/com/yahoo/tensor/TensorTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/TensorTestCase.java index b01d171792c..c53db160806 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/TensorTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/TensorTestCase.java @@ -54,7 +54,7 @@ public class TensorTestCase { fail("Expected parse error"); } catch (IllegalArgumentException expected) { - assertEquals("Excepted a number or a string starting by { or tensor(, got '--'", expected.getMessage()); + assertEquals("Excepted a number or a string starting by {, [ or tensor(...):, got '--'", expected.getMessage()); } } -- cgit v1.2.3