summaryrefslogtreecommitdiffstats
path: root/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java
diff options
context:
space:
mode:
Diffstat (limited to 'vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java')
-rw-r--r--vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java441
1 files changed, 441 insertions, 0 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java
new file mode 100644
index 00000000000..79bb27fcd1b
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java
@@ -0,0 +1,441 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.tensor;
+
+import com.google.common.annotations.Beta;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * A mixed tensor type. This is class is currently suitable for serialization
+ * and deserialization, not yet for computation.
+ *
+ * A mixed tensor has a combination of mapped and indexed dimensions. By
+ * reordering the mapped dimensions before the indexed dimensions, one can
+ * think of mixed tensors as the mapped dimensions mapping to a
+ * dense tensor. This dense tensor is called a dense subspace.
+ *
+ * @author lesters
+ */
+@Beta
+public class MixedTensor implements Tensor {
+
+ /** The dimension specification for this tensor */
+ private final TensorType type;
+
+ /** The list of cells in the tensor */
+ private final ImmutableList<Cell> cells;
+
+ /** An index structure over the cell list */
+ private final Index index;
+
+ private MixedTensor(TensorType type, ImmutableList<Cell> cells, Index index) {
+ this.type = type;
+ this.cells = ImmutableList.copyOf(cells);
+ this.index = index;
+ }
+
+ /** Returns the tensor type */
+ @Override
+ public TensorType type() { return type; }
+
+ /** Returns the size of the tensor measured in number of cells */
+ @Override
+ public int size() { return cells.size(); }
+
+ /** Returns the value at the given address */
+ @Override
+ public double get(TensorAddress address) {
+ int cellIndex = index.indexOf(address);
+ Cell cell = cells.get(cellIndex);
+ if (!address.equals(cell.getKey())) {
+ throw new IllegalStateException("Unable to find correct cell by direct index.");
+ }
+ return cell.getValue();
+ }
+
+ /**
+ * Returns an iterator over the cells of this tensor.
+ * Cells are returned in order of increasing indexes in the
+ * indexed dimensions, increasing indexes of later dimensions
+ * in the dimension type before earlier. No guarantee is
+ * given for the order of sparse dimensions.
+ */
+ @Override
+ public Iterator<Cell> cellIterator() {
+ return cells.iterator();
+ }
+
+ /**
+ * Returns an iterator over the values of this tensor.
+ * The iteration order is the same as for cellIterator.
+ */
+ @Override
+ public Iterator<Double> valueIterator() {
+ return new Iterator<Double>() {
+ Iterator<Cell> cellIterator = cellIterator();
+ @Override
+ public boolean hasNext() {
+ return cellIterator.hasNext();
+ }
+ @Override
+ public Double next() {
+ return cellIterator.next().getValue();
+ }
+ };
+ }
+
+ @Override
+ public Map<TensorAddress, Double> cells() {
+ ImmutableMap.Builder<TensorAddress, Double> builder = new ImmutableMap.Builder<>();
+ for (Cell cell : cells) {
+ builder.put(cell.getKey(), cell.getValue());
+ }
+ return builder.build();
+ }
+
+ @Override
+ public int hashCode() { return cells.hashCode(); }
+
+ @Override
+ public String toString() { return Tensor.toStandardString(this); }
+
+ @Override
+ public boolean equals(Object other) {
+ if ( ! ( other instanceof Tensor)) return false;
+ return Tensor.equals(this, ((Tensor)other));
+ }
+
+ /** Returns the size of dense subspaces */
+ public int denseSubspaceSize() {
+ return index.denseSubspaceSize();
+ }
+
+
+ /**
+ * Base class for building mixed tensors.
+ */
+ public abstract static class Builder implements Tensor.Builder {
+
+ final TensorType type;
+
+ /**
+ * Create a builder depending upon the type of indexed dimensions.
+ * If at least one indexed dimension is unbound, we create
+ * a temporary structure while finding dimension bounds.
+ */
+ public static Builder of(TensorType type) {
+ if (type.dimensions().stream().anyMatch(d -> d instanceof TensorType.IndexedUnboundDimension)) {
+ return new UnboundBuilder(type);
+ } else {
+ return new BoundBuilder(type);
+ }
+ }
+
+ private Builder(TensorType type) {
+ this.type = type;
+ }
+
+ @Override
+ public TensorType type() {
+ return type;
+ }
+
+ @Override
+ public Tensor.Builder cell(double value, int... labels) {
+ throw new UnsupportedOperationException("Not implemented.");
+ }
+
+ @Override
+ public CellBuilder cell() {
+ return new CellBuilder(type(), this);
+ }
+
+ @Override
+ public abstract MixedTensor build();
+
+ }
+
+
+ /**
+ * Builder for mixed tensors with bound indexed dimensions.
+ */
+ public static class BoundBuilder extends Builder {
+
+ /** For each sparse partial address, hold a dense subspace */
+ final private Map<TensorAddress, double[]> denseSubspaceMap = new HashMap<>();
+ final private Index.Builder indexBuilder;
+ final private Index index;
+
+ private BoundBuilder(TensorType type) {
+ super(type);
+ indexBuilder = new Index.Builder(type);
+ index = indexBuilder.index();
+ }
+
+ public int denseSubspaceSize() {
+ return index.denseSubspaceSize();
+ }
+
+ private double[] denseSubspace(TensorAddress sparsePartial) {
+ if (!denseSubspaceMap.containsKey(sparsePartial)) {
+ denseSubspaceMap.put(sparsePartial, new double[denseSubspaceSize()]);
+ }
+ return denseSubspaceMap.get(sparsePartial);
+ }
+
+ @Override
+ public Tensor.Builder cell(TensorAddress address, double value) {
+ TensorAddress sparsePart = index.sparsePartialAddress(address);
+ int denseOffset = index.denseOffset(address);
+ double[] denseSubspace = denseSubspace(sparsePart);
+ denseSubspace[denseOffset] = value;
+ return this;
+ }
+
+ public Tensor.Builder block(TensorAddress sparsePart, double[] values) {
+ double[] denseSubspace = denseSubspace(sparsePart);
+ System.arraycopy(values, 0, denseSubspace, 0, denseSubspaceSize());
+ return this;
+ }
+
+ @Override
+ public MixedTensor build() {
+ int count = 0;
+ ImmutableList.Builder<Cell> builder = new ImmutableList.Builder<>();
+
+ for (Map.Entry<TensorAddress, double[]> entry : denseSubspaceMap.entrySet()) {
+ TensorAddress sparsePart = entry.getKey();
+ indexBuilder.put(sparsePart, count);
+
+ double[] denseSubspace = entry.getValue();
+ for (int offset = 0; offset < denseSubspace.length; ++offset) {
+ TensorAddress cellAddress = index.addressOf(sparsePart, offset);
+ double value = denseSubspace[offset];
+ builder.add(new Cell(cellAddress, value));
+ count++;
+ }
+ }
+ return new MixedTensor(type, builder.build(), indexBuilder.build());
+ }
+
+ }
+
+
+ /**
+ * Temporarily stores all cells to find bounds of indexed dimensions,
+ * then creates a tensor using BoundBuilder. This is due to the
+ * fact that for serialization the size of the dense subspace must be
+ * known, and equal for all dense subspaces. A side effect is that the
+ * tensor type is effectively changed, such that unbound indexed
+ * dimensions become bound.
+ */
+ public static class UnboundBuilder extends Builder {
+
+ private Map<TensorAddress, Double> cells;
+ private final int[] dimensionBounds;
+
+ private UnboundBuilder(TensorType type) {
+ super(type);
+ cells = new HashMap<>();
+ dimensionBounds = new int[type.dimensions().size()];
+ }
+
+ @Override
+ public Tensor.Builder cell(TensorAddress address, double value) {
+ cells.put(address, value);
+ trackBounds(address);
+ return this;
+ }
+
+ @Override
+ public MixedTensor build() {
+ TensorType boundType = createBoundType();
+ BoundBuilder builder = new BoundBuilder(boundType);
+ for (Map.Entry<TensorAddress, Double> cell : cells.entrySet()) {
+ builder.cell(cell.getKey(), cell.getValue());
+ }
+ return builder.build();
+ }
+
+ public void trackBounds(TensorAddress address) {
+ for (int i = 0; i < type.dimensions().size(); ++i) {
+ TensorType.Dimension dimension = type.dimensions().get(i);
+ if (dimension.isIndexed()) {
+ dimensionBounds[i] = Math.max(address.intLabel(i), dimensionBounds[i]);
+ }
+ }
+ }
+
+ public TensorType createBoundType() {
+ TensorType.Builder typeBuilder = new TensorType.Builder();
+ for (int i = 0; i < type.dimensions().size(); ++i) {
+ TensorType.Dimension dimension = type.dimensions().get(i);
+ if (!dimension.isIndexed()) {
+ typeBuilder.mapped(dimension.name());
+ } else {
+ int size = dimension.size().orElse(dimensionBounds[i] + 1);
+ typeBuilder.indexed(dimension.name(), size);
+ }
+ }
+ return typeBuilder.build();
+ }
+
+ }
+
+ /**
+ * An immutable index into a list of cells.
+ * Contains additional information required
+ * for handling mixed tensor addresses.
+ * Assumes indexed dimensions are bound.
+ */
+ private static class Index {
+
+ private final TensorType type;
+ private final TensorType sparseType;
+ private final TensorType denseType;
+ private final List<TensorType.Dimension> mappedDimensions;
+ private final List<TensorType.Dimension> indexedDimensions;
+
+ private ImmutableMap<TensorAddress, Integer> sparseMap;
+ private int denseSubspaceSize = -1;
+
+ private Index(TensorType type) {
+ this.type = type;
+ this.mappedDimensions = type.dimensions().stream().filter(d -> !d.isIndexed()).collect(Collectors.toList());
+ this.indexedDimensions = type.dimensions().stream().filter(d -> d.isIndexed()).collect(Collectors.toList());
+ this.sparseType = createPartialType(mappedDimensions);
+ this.denseType = createPartialType(indexedDimensions);
+ }
+
+ public int indexOf(TensorAddress address) {
+ TensorAddress sparsePart = sparsePartialAddress(address);
+ if (!sparseMap.containsKey(sparsePart)) {
+ throw new IllegalArgumentException("Address not found");
+ }
+ int base = sparseMap.get(sparsePart);
+ int offset = denseOffset(address);
+ return base + offset;
+ }
+
+ public static class Builder {
+ private final Index index;
+ private final ImmutableMap.Builder<TensorAddress, Integer> builder;
+
+ public Builder(TensorType type) {
+ index = new Index(type);
+ builder = new ImmutableMap.Builder<>();
+ }
+
+ public void put(TensorAddress address, int index) {
+ builder.put(address, index);
+ }
+
+ public Index build() {
+ index.sparseMap = builder.build();
+ return index;
+ }
+
+ public Index index() {
+ return index;
+ }
+ }
+
+ public int denseSubspaceSize() {
+ if (denseSubspaceSize == -1) {
+ denseSubspaceSize = 1;
+ for (int i = 0; i < type.dimensions().size(); ++i) {
+ TensorType.Dimension dimension = type.dimensions().get(i);
+ if (dimension.isIndexed()) {
+ denseSubspaceSize *= dimension.size().orElseThrow(() ->
+ new IllegalArgumentException("Unknown size of indexed dimension."));
+ }
+ }
+ }
+ return denseSubspaceSize;
+ }
+
+ private TensorAddress sparsePartialAddress(TensorAddress address) {
+ if (type.dimensions().size() != address.size()) {
+ throw new IllegalArgumentException("Tensor type and address are not of same size.");
+ }
+ TensorAddress.Builder builder = new TensorAddress.Builder(sparseType);
+ for (int i = 0; i < type.dimensions().size(); ++i) {
+ TensorType.Dimension dimension = type.dimensions().get(i);
+ if (!dimension.isIndexed()) {
+ builder.add(dimension.name(), address.label(i));
+ }
+ }
+ return builder.build();
+ }
+
+ private int denseOffset(TensorAddress address) {
+ int innerSize = 1;
+ int offset = 0;
+ for (int i = type.dimensions().size(); --i >= 0; ) {
+ TensorType.Dimension dimension = type.dimensions().get(i);
+ if (dimension.isIndexed()) {
+ int label = address.intLabel(i);
+ offset += label * innerSize;
+ innerSize *= dimension.size().orElseThrow(() ->
+ new IllegalArgumentException("Unknown size of indexed dimension."));
+ }
+ }
+ return offset;
+ }
+
+ private TensorAddress denseOffsetToAddress(int denseOffset) {
+ if (denseOffset < 0 || denseOffset > denseSubspaceSize) {
+ throw new IllegalArgumentException("Offset out of bounds");
+ }
+
+ int restSize = denseOffset;
+ int innerSize = denseSubspaceSize;
+ int[] labels = new int[indexedDimensions.size()];
+
+ for (int i = 0; i < labels.length; ++i) {
+ TensorType.Dimension dimension = indexedDimensions.get(i);
+ int dimensionSize = dimension.size().orElseThrow(() ->
+ new IllegalArgumentException("Unknown size of indexed dimension."));
+
+ innerSize /= dimensionSize;
+ labels[i] = restSize / innerSize;
+ restSize %= innerSize;
+ }
+ return TensorAddress.of(labels);
+ }
+
+ private TensorAddress addressOf(TensorAddress sparsePart, int denseOffset) {
+ TensorAddress densePart = denseOffsetToAddress(denseOffset);
+ String[] labels = new String[type.dimensions().size()];
+ int mappedIndex = 0;
+ int indexedIndex = 0;
+ for (TensorType.Dimension d : type.dimensions()) {
+ if (d.isIndexed()) {
+ labels[mappedIndex + indexedIndex] = densePart.label(indexedIndex);
+ indexedIndex++;
+ } else {
+ labels[mappedIndex + indexedIndex] = sparsePart.label(mappedIndex);
+ mappedIndex++;
+ }
+ }
+ return TensorAddress.of(labels);
+ }
+
+ }
+
+ public static TensorType createPartialType(List<TensorType.Dimension> dimensions) {
+ TensorType.Builder builder = new TensorType.Builder();
+ for (TensorType.Dimension dimension : dimensions) {
+ builder.set(dimension);
+ }
+ return builder.build();
+ }
+
+}