aboutsummaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2019-06-07 17:28:12 +0200
committerJon Bratseth <bratseth@verizonmedia.com>2019-06-07 17:28:12 +0200
commitc93826132f9256ce5597659a521494e6a3370a6c (patch)
tree47407074424970532df70a1e79aa27f6c9313dcc /vespajlib
parent842d1d79e6f429c13805207b7a516445abaa6446 (diff)
Dense string form
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java8
-rw-r--r--vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java7
-rw-r--r--vespajlib/src/main/java/com/yahoo/tensor/Tensor.java6
-rw-r--r--vespajlib/src/main/java/com/yahoo/tensor/TensorParser.java160
-rw-r--r--vespajlib/src/main/java/com/yahoo/tensor/TensorTypeParser.java5
-rw-r--r--vespajlib/src/test/java/com/yahoo/tensor/TensorParserTestCase.java47
-rw-r--r--vespajlib/src/test/java/com/yahoo/tensor/TensorTestCase.java2
7 files changed, 187 insertions, 48 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java
index 7f1351cc42b..219a3fa2278 100644
--- a/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java
+++ b/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java
@@ -108,7 +108,13 @@ class IndexedDoubleTensor extends IndexedTensor {
@Override
public void cellByDirectIndex(long index, double value) {
- values[(int)index] = value;
+ try {
+ values[(int) index] = value;
+ }
+ catch (IndexOutOfBoundsException e) {
+ throw new IllegalArgumentException("Can not set the cell at position " + index + " in a tensor " +
+ "of type " + type + ": Index is too large");
+ }
}
}
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java
index aeb3da8ac40..aca2bfc1b0f 100644
--- a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java
+++ b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java
@@ -16,7 +16,7 @@ import java.util.Set;
import java.util.function.DoubleBinaryOperator;
/**
- * An indexed (dense) tensor backed by a double array.
+ * An indexed (dense) tensor backed by an array.
*
* @author bratseth
*/
@@ -143,9 +143,8 @@ public abstract class IndexedTensor implements Tensor {
long valueIndex = 0;
for (int i = 0; i < indexes.length; i++) {
- if (indexes[i] >= sizes.size(i)) {
- throw new IllegalArgumentException(indexes + " are not within bounds");
- }
+ if (indexes[i] >= sizes.size(i))
+ throw new IllegalArgumentException(Arrays.toString(indexes) + " are not within bounds");
valueIndex += productOfDimensionsAfter(i, sizes) * indexes[i];
}
return valueIndex;
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java
index 22ff793e6fa..c2aa155d6bb 100644
--- a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java
+++ b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java
@@ -333,7 +333,7 @@ public interface Tensor {
} else {
x = Math.nextAfter(x, y);
}
- return x==y;
+ return x == y;
}
// ----------------- Factories
@@ -367,9 +367,7 @@ public interface Tensor {
return TensorParser.tensorFrom(tensorString, Optional.empty());
}
- /**
- * Returns a double as a tensor: A dimensionless tensor containing the value as its cell
- */
+ /** Returns a double as a tensor: A dimensionless tensor containing the value as its cell */
static Tensor from(double value) {
return Tensor.Builder.of(TensorType.empty).cell(value).build();
}
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorParser.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorParser.java
index 45a9992c9ad..4d9bb258423 100644
--- a/vespajlib/src/main/java/com/yahoo/tensor/TensorParser.java
+++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorParser.java
@@ -8,44 +8,59 @@ import java.util.Optional;
*/
class TensorParser {
- static Tensor tensorFrom(String tensorString, Optional<TensorType> type) {
+ static Tensor tensorFrom(String tensorString, Optional<TensorType> explicitType) {
+ Optional<TensorType> type;
+ String valueString;
+
tensorString = tensorString.trim();
- try {
- if (tensorString.startsWith("tensor")) {
- int colonIndex = tensorString.indexOf(':');
- String typeString = tensorString.substring(0, colonIndex);
- String valueString = tensorString.substring(colonIndex + 1);
- TensorType typeFromString = TensorTypeParser.fromSpec(typeString);
- if (type.isPresent() && ! type.get().equals(typeFromString))
- throw new IllegalArgumentException("Got tensor with type string '" + typeString + "', but was " +
- "passed type " + type.get());
- return tensorFromValueString(valueString, typeFromString);
- }
- else if (tensorString.startsWith("{")) {
- return tensorFromValueString(tensorString, type.orElse(typeFromValueString(tensorString)));
- }
- else {
- if (type.isPresent() && ! type.get().equals(TensorType.empty))
- throw new IllegalArgumentException("Got zero-dimensional tensor '" + tensorString +
- "' where type " + type.get() + " is required");
+ if (tensorString.startsWith("tensor")) {
+ int colonIndex = tensorString.indexOf(':');
+ String typeString = tensorString.substring(0, colonIndex);
+ TensorType typeFromString = TensorTypeParser.fromSpec(typeString);
+ if (explicitType.isPresent() && ! explicitType.get().equals(typeFromString))
+ throw new IllegalArgumentException("Got tensor with type string '" + typeString + "', but was " +
+ "passed type " + explicitType.get());
+ type = Optional.of(typeFromString);
+ valueString = tensorString.substring(colonIndex + 1);
+ }
+ else {
+ type = explicitType;
+ valueString = tensorString;
+ }
+
+ valueString = valueString.trim();
+ if (valueString.startsWith("{")) {
+ return tensorFromSparseValueString(valueString, type);
+ }
+ else if (valueString.startsWith("[")) {
+ return tensorFromDenseValueString(valueString, type);
+ }
+ else {
+ if (explicitType.isPresent() && ! explicitType.get().equals(TensorType.empty))
+ throw new IllegalArgumentException("Got a zero-dimensional tensor value ('" + tensorString +
+ "') where type " + explicitType.get() + " is required");
+ try {
return Tensor.Builder.of(TensorType.empty).cell(Double.parseDouble(tensorString)).build();
}
- }
- catch (NumberFormatException e) {
- throw new IllegalArgumentException("Excepted a number or a string starting by { or tensor(, got '" +
- tensorString + "'");
+ catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Excepted a number or a string starting by {, [ or tensor(...):, got '" +
+ tensorString + "'");
+ }
}
}
- /** Derive the tensor type from the first address string in the given tensor string */
- private static TensorType typeFromValueString(String s) {
- s = s.substring(1).trim(); // remove tensor start
+ /** Derives the tensor type from the first address string in the given tensor string */
+ private static TensorType typeFromSparseValueString(String valueString) {
+ String s = valueString.substring(1).trim(); // remove tensor start
int firstKeyOrTensorEnd = s.indexOf('}');
+ if (firstKeyOrTensorEnd < 0)
+ throw new IllegalArgumentException("Excepted a number or a string starting by {, [ or tensor(...):, got '" +
+ valueString + "'");
String addressBody = s.substring(0, firstKeyOrTensorEnd).trim();
if (addressBody.isEmpty()) return TensorType.empty; // Empty tensor
if ( ! addressBody.startsWith("{")) return TensorType.empty; // Single value tensor
- addressBody = addressBody.substring(1); // remove key start
+ addressBody = addressBody.substring(1, addressBody.length()); // remove key start
if (addressBody.isEmpty()) return TensorType.empty; // Empty key
TensorType.Builder builder = new TensorType.Builder(TensorType.Value.DOUBLE);
@@ -60,19 +75,94 @@ class TensorParser {
return builder.build();
}
- private static Tensor tensorFromValueString(String tensorValueString, TensorType type) {
- Tensor.Builder builder = Tensor.Builder.of(type);
- tensorValueString = tensorValueString.trim();
+ private static Tensor tensorFromSparseValueString(String valueString, Optional<TensorType> type) {
try {
- if (tensorValueString.startsWith("{"))
- return fromCellString(builder, tensorValueString);
- else
- return builder.cell(Double.parseDouble(tensorValueString)).build();
+ valueString = valueString.trim();
+ Tensor.Builder builder = Tensor.Builder.of(type.orElse(typeFromSparseValueString(valueString)));
+ return fromCellString(builder, valueString);
}
catch (NumberFormatException e) {
throw new IllegalArgumentException("Excepted a number or a string starting by { or tensor(, got '" +
- tensorValueString + "'");
+ valueString + "'");
+ }
+ }
+
+ private static Tensor tensorFromDenseValueString(String valueString, Optional<TensorType> type) {
+ if (type.isEmpty())
+ throw new IllegalArgumentException("The dense tensor form requires an explicit tensor type " +
+ "on the form 'tensor(dimensions):...");
+ if (type.get().dimensions().stream().anyMatch(d -> ( d.size().isEmpty())))
+ throw new IllegalArgumentException("The dense tensor form requires a tensor type containing " +
+ "only dense dimensions with a given size");
+ IndexedTensor.BoundBuilder builder = (IndexedTensor.BoundBuilder)IndexedTensor.Builder.of(type.get());
+
+ // Since we know the dimensions the brackets are just syntactic sugar
+ long[] indexes = new long[builder.type().rank()];
+ int currentChar;
+ int nextNumberEnd = 0;
+ while ((currentChar = nextStartCharIndex(nextNumberEnd + 1, valueString)) < valueString.length()) {
+ nextNumberEnd = nextStopCharIndex(currentChar, valueString);
+ if (currentChar == nextNumberEnd) return builder.build();
+
+ if (builder.type().valueType() == TensorType.Value.DOUBLE)
+ builder.cellByDirectIndex(nextCellIndex(indexes, builder), Double.parseDouble(valueString.substring(currentChar, nextNumberEnd)));
+ else if (builder.type().valueType() == TensorType.Value.FLOAT)
+ builder.cellByDirectIndex(nextCellIndex(indexes, builder), Float.parseFloat(valueString.substring(currentChar, nextNumberEnd)));
+ else
+ throw new IllegalArgumentException(builder.type().valueType() + " is not supported");
+ }
+ return builder.build();
+ }
+
+ // -----
+
+ /**
+ * Advance to the next cell in left-adjac ent order.
+ *
+ * On rightmost vs. leftmost adjacency:
+ * A dense tensor is laid out with the rightmost dimension as adjacent numbers,
+ * but when we parse a dense tensor we encounter numbers in the leftmost-adjacent order, since
+ * that is the most natural way to write it: tensor(x,y)[[1,2],[3,4]]
+ * should mean {{x:0, y:0}:1, {x:1, y:0}:2, {x:0, y:1}:3, {x:1, y:1}:4}.
+ * Therefore we need to convert the encounter order (numberIndex) from left-adjacent to right-adjacent.
+ */
+ private static long nextCellIndex(long[] indexes, IndexedTensor.BoundBuilder builder) {
+ long cellIndex = IndexedTensor.toValueIndex(indexes, builder.sizes());
+
+ // Find next dimension to advance
+ int nextInDimension = 0;
+ while (nextInDimension < indexes.length && indexes[nextInDimension] + 1 >= builder.sizes().size(nextInDimension)) {
+ indexes[nextInDimension] = 0;
+ nextInDimension++;
+ }
+ if (nextInDimension < indexes.length)
+ indexes[nextInDimension]++;
+ else // there is no next - become invalid
+ indexes[0]++;
+
+ return cellIndex;
+ }
+
+ /** Returns the position of the next character that should contain a number, or if none the string length */
+ private static int nextStartCharIndex(int charIndex, String valueString) {
+ for (; charIndex < valueString.length(); charIndex++) {
+ if (valueString.charAt(charIndex) == ']') continue;
+ if (valueString.charAt(charIndex) == '[') continue;
+ if (valueString.charAt(charIndex) == ',') continue;
+ if (valueString.charAt(charIndex) == ' ') continue;
+ return charIndex;
+ }
+ return valueString.length();
+ }
+
+ private static int nextStopCharIndex(int charIndex, String valueString) {
+ while (charIndex < valueString.length()) {
+ if (valueString.charAt(charIndex) == ',') return charIndex;
+ if (valueString.charAt(charIndex) == ']') return charIndex;
+ charIndex++;
}
+ throw new IllegalArgumentException("Malformed tensor value '" + valueString +
+ "': Expected a ',' or ']' after position " + charIndex);
}
private static Tensor fromCellString(Tensor.Builder builder, String s) {
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorTypeParser.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorTypeParser.java
index d5f77be0dd0..1f426942c5f 100644
--- a/vespajlib/src/main/java/com/yahoo/tensor/TensorTypeParser.java
+++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorTypeParser.java
@@ -24,6 +24,7 @@ public class TensorTypeParser {
private static final Pattern mappedPattern = Pattern.compile("(\\w+)\\{\\}");
public static TensorType fromSpec(String specString) {
+ specString = specString.trim();
if ( ! specString.startsWith(START_STRING) || ! specString.endsWith(END_STRING))
throw formatException(specString);
String specBody = specString.substring(START_STRING.length(), specString.length() - END_STRING.length());
@@ -112,9 +113,9 @@ public class TensorTypeParser {
private static IllegalArgumentException formatException(String spec, Optional<String> errorDetail) {
throw new IllegalArgumentException("A tensor type spec must be on the form " +
- "tensor[<valuetype>]?(dimensionidentifier[{}|[length?]*), but was '" + spec + "'. " +
+ "tensor[<valuetype>]?(dimensionidentifier[{}|[length]*), but was '" + spec + "'. " +
errorDetail.map(s -> s + ". ").orElse("") +
- "Examples: tensor(x[]), tensor<float>(name{}, x[10])");
+ "Examples: tensor(x[3]), tensor<float>(name{}, x[10])");
}
}
diff --git a/vespajlib/src/test/java/com/yahoo/tensor/TensorParserTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/TensorParserTestCase.java
index 04ea118280c..313cca833f1 100644
--- a/vespajlib/src/test/java/com/yahoo/tensor/TensorParserTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/tensor/TensorParserTestCase.java
@@ -9,13 +9,58 @@ import static org.junit.Assert.fail;
public class TensorParserTestCase {
@Test
- public void testParsing() {
+ public void testSparseParsing() {
assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor()")).build(),
Tensor.from("{}"));
assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x{})")).cell(1.0, 0).build(),
Tensor.from("{{x:0}:1.0}"));
assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x{})")).cell().label("x", "l0").value(1.0).build(),
Tensor.from("{{x:l0}:1.0}"));
+ assertEquals("If the type is specified, a dense tensor can be created from the sparse text form",
+ Tensor.Builder.of(TensorType.fromSpec("tensor(x[1])")).cell(1.0, 0).build(),
+ Tensor.from("tensor(x[1]):{{x:0}:1.0}"));
+ }
+
+ @Test
+ public void testDenseParsing() {
+ assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor()")).build(),
+ Tensor.from("tensor():[]"));
+ assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[1])")).cell(1.0, 0).build(),
+ Tensor.from("tensor(x[1]):[1.0]"));
+ assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[2])")).cell(1.0, 0).cell(2.0, 1).build(),
+ Tensor.from("tensor(x[2]):[1.0, 2.0]"));
+ assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[2],y[3])"))
+ .cell(1.0, 0, 0)
+ .cell(2.0, 1, 0)
+ .cell(3.0, 0, 1)
+ .cell(4.0, 1, 1)
+ .cell(5.0, 0, 2)
+ .cell(6.0, 1, 2).build(),
+ Tensor.from("tensor(x[2],y[3]):[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]"));
+ assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[1],y[2],z[3])"))
+ .cell(1.0, 0, 0, 0)
+ .cell(2.0, 0, 1, 0)
+ .cell(3.0, 0, 0, 1)
+ .cell(4.0, 0, 1, 1)
+ .cell(5.0, 0, 0, 2)
+ .cell(6.0, 0, 1, 2).build(),
+ Tensor.from("tensor(x[1],y[2],z[3]):[[[1.0], [2.0]], [[3.0], [4.0]], [[5.0], [6.0]]]"));
+ assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[3],y[2],z[1])"))
+ .cell(1.0, 0, 0, 0)
+ .cell(2.0, 1, 0, 0)
+ .cell(3.0, 2, 0, 0)
+ .cell(4.0, 0, 1, 0)
+ .cell(5.0, 1, 1, 0)
+ .cell(6.0, 2, 1, 0).build(),
+ Tensor.from("tensor(x[3],y[2],z[1]):[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]]"));
+ assertEquals(Tensor.Builder.of(TensorType.fromSpec("tensor(x[3],y[2],z[1])"))
+ .cell(1.0, 0, 0, 0)
+ .cell(2.0, 1, 0, 0)
+ .cell(3.0, 2, 0, 0)
+ .cell(4.0, 0, 1, 0)
+ .cell(5.0, 1, 1, 0)
+ .cell(6.0, 2, 1, 0).build(),
+ Tensor.from("tensor( x[3],y[2],z[1]) : [ [ [1.0, 2.0, 3.0] , [4.0, 5,6.0] ] ]"));
}
@Test
diff --git a/vespajlib/src/test/java/com/yahoo/tensor/TensorTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/TensorTestCase.java
index b01d171792c..c53db160806 100644
--- a/vespajlib/src/test/java/com/yahoo/tensor/TensorTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/tensor/TensorTestCase.java
@@ -54,7 +54,7 @@ public class TensorTestCase {
fail("Expected parse error");
}
catch (IllegalArgumentException expected) {
- assertEquals("Excepted a number or a string starting by { or tensor(, got '--'", expected.getMessage());
+ assertEquals("Excepted a number or a string starting by {, [ or tensor(...):, got '--'", expected.getMessage());
}
}