diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-01-05 15:04:56 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-01-05 15:04:56 +0100 |
commit | fd22e7e254528bea682a2e585f5cbb1fc625c93d (patch) | |
tree | bcce373b5f5f9c2a982db099ba12e00266281a9b /vespajlib | |
parent | 5977d8e45b6306a8516c06bbf57190ab3931380b (diff) |
Optimize indexed iterator access
Diffstat (limited to 'vespajlib')
5 files changed, 195 insertions, 102 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java index 509877f48f6..2c3cb6ebde2 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java @@ -164,7 +164,7 @@ public class IndexedTensor implements Tensor { Indexes indexes = Indexes.of(dimensionSizes, dimensionSizes, values.length); for (int i = 0; i < values.length; i++) { indexes.next(); - builder.put(indexes.toAddress(i), values[i]); + builder.put(indexes.toAddress(), values[i]); } return builder.build(); } @@ -218,9 +218,6 @@ public class IndexedTensor implements Tensor { public abstract Builder cell(double value, int ... indexes); - /** Add a cell by internal index */ - public abstract Builder cellWithInternalIndex(int internalIndex, double value); - protected double[] arrayFor(int[] dimensionSizes) { int productSize = 1; for (int dimensionSize : dimensionSizes) @@ -293,8 +290,15 @@ public class IndexedTensor implements Tensor { } @Override - public Builder cellWithInternalIndex(int internalIndex, double value) { - values[internalIndex] = value; + public Builder cell(Cell cell, double value) { + // TODO: Use internal index if applicable + // values[internalIndex] = value; + // return this; + int directIndex = cell.getDirectIndex(); + if (directIndex >= 0) // optimization + values[directIndex] = value; + else + super.cell(cell, value); return this; } @@ -417,17 +421,13 @@ public class IndexedTensor implements Tensor { list.add(list.size(), null); } - @Override - public Builder cellWithInternalIndex(int internalIndex, double value) { - throw new UnsupportedOperationException("Not supoprted for unbound builders"); - } - } private final class CellIterator implements Iterator<Cell> { private int count = 0; private final Indexes indexes = Indexes.of(dimensionSizes, dimensionSizes, values.length); + private final LazyCell reusedCell = new LazyCell(indexes, Double.NaN); @Override public boolean hasNext() { @@ -439,9 +439,8 @@ public class IndexedTensor implements Tensor { if ( ! hasNext()) throw new NoSuchElementException("No cell at " + indexes); count++; indexes.next(); - int valueIndex = toValueIndex(indexes.indexesForReading(), IndexedTensor.this.dimensionSizes); - TensorAddress address = indexes.toAddress(valueIndex); - return new Cell(address, get(valueIndex)); + reusedCell.value = get(indexes.toSourceValueIndex()); + return reusedCell; } } @@ -514,8 +513,10 @@ public class IndexedTensor implements Tensor { /** * An iterator over a subspace of this tensor. This is exposed to allow clients to query the size. + * NOTE THAT the Cell returned by next is only valid until the next() call is made. + * This is a concession to performance due to this typically being used in inner loops. */ - public final class SubspaceIterator implements Iterator<Map.Entry<TensorAddress, Double>> { + public final class SubspaceIterator implements Iterator<Tensor.Cell> { /** * This iterator will iterate over the given dimensions, in the order given @@ -529,6 +530,9 @@ public class IndexedTensor implements Tensor { private Indexes indexes; private int count = 0; + /** A lazy cell for reuse */ + private final LazyCell reusedCell; + /** * Creates a new subspace iterator * @@ -546,6 +550,7 @@ public class IndexedTensor implements Tensor { this.address = address; this.iterateDimensionSizes = iterateDimensionSizes; this.indexes = Indexes.of(IndexedTensor.this.dimensionSizes, iterateDimensionSizes, iterateDimensions, address); + reusedCell = new LazyCell(indexes, Double.NaN); } /** Returns the total number of cells in this subspace */ @@ -554,7 +559,7 @@ public class IndexedTensor implements Tensor { } /** Returns the address of the cell this currently points to (which may be an invalid position) */ - public TensorAddress address() { return indexes.toAddress(-1); } + public TensorAddress address() { return indexes.toAddress(); } /** Rewind this iterator to the first element */ public void reset() { @@ -567,15 +572,39 @@ public class IndexedTensor implements Tensor { return count < indexes.size(); } + /** Returns the next cell, which is valid until next() is called again */ @Override - public Map.Entry<TensorAddress, Double> next() { + public Cell next() { if ( ! hasNext()) throw new NoSuchElementException("No cell at " + indexes); count++; indexes.next(); - int valueIndex = indexes.toValueIndex(); - TensorAddress address = indexes.toAddress(valueIndex); - return new Cell(address, get(valueIndex)); // TODO: Change type to Cell, then change Cell to work with indexes + valueIndex instead of creating an address + reusedCell.value = get(indexes.toSourceValueIndex()); + return reusedCell; + } + + } + + /** A Cell which does not compute its TensorAddress unless it really has to */ + private final static class LazyCell extends Tensor.Cell { + + private double value; + private Indexes indexes; + + private LazyCell(Indexes indexes, Double value) { + super(null, value); + this.indexes = indexes; + } + + @Override + int getDirectIndex() { return indexes.toIterationValueIndex(); } + + @Override + public TensorAddress getKey() { + return indexes.toAddress(); } + + @Override + public Double getValue() { return value; } } @@ -590,10 +619,10 @@ public class IndexedTensor implements Tensor { private final int[] sourceDimensionSizes; - private final int[] iterateDimensionSizes; + private final int[] iterationDimensionSizes; protected final int[] indexes; - + public static Indexes of(int[] dimensionSizes) { return of(dimensionSizes, dimensionSizes); } @@ -619,14 +648,24 @@ public class IndexedTensor implements Tensor { } private static Indexes of(int[] sourceDimensionSizes, int[] iterateDimensionSizes, List<Integer> iterateDimensions, int[] initialIndexes, int size) { - if (size == 0) + if (size == 0) { return new EmptyIndexes(sourceDimensionSizes, iterateDimensionSizes, initialIndexes); // we're told explicitly there are truly no values available - else if (size == 1) + } + else if (size == 1) { return new SingleValueIndexes(sourceDimensionSizes, iterateDimensionSizes, initialIndexes); // with no (iterating) dimensions, we still return one value, not zero - else if (iterateDimensions.size() == 1) - return new SingleDimensionIndexes(sourceDimensionSizes, iterateDimensionSizes, iterateDimensions.get(0), initialIndexes, size); // optimization - else - return new MultivalueIndexes(sourceDimensionSizes, iterateDimensionSizes, iterateDimensions, initialIndexes, size); + } + else if (iterateDimensions.size() == 1) { + if (Arrays.equals(sourceDimensionSizes, iterateDimensionSizes)) + return new EqualSizeSingleDimensionIndexes(sourceDimensionSizes, iterateDimensions.get(0), initialIndexes, size); + else + return new SingleDimensionIndexes(sourceDimensionSizes, iterateDimensionSizes, iterateDimensions.get(0), initialIndexes, size); // optimization + } + else { + if (Arrays.equals(sourceDimensionSizes, iterateDimensionSizes)) + return new EqualSizeMultiDimensionIndexes(sourceDimensionSizes, iterateDimensions, initialIndexes, size); + else + return new MultiDimensionIndexes(sourceDimensionSizes, iterateDimensionSizes, iterateDimensions, initialIndexes, size); + } } private static List<Integer> completeIterationOrder(int length) { @@ -636,9 +675,9 @@ public class IndexedTensor implements Tensor { return iterationDimensions; } - private Indexes(int[] sourceDimensionSizes, int[] iterateDimensionSizes, int[] indexes) { + private Indexes(int[] sourceDimensionSizes, int[] iterationDimensionSizes, int[] indexes) { this.sourceDimensionSizes = sourceDimensionSizes; - this.iterateDimensionSizes = iterateDimensionSizes; + this.iterationDimensionSizes = iterationDimensionSizes; this.indexes = indexes; } @@ -650,8 +689,8 @@ public class IndexedTensor implements Tensor { } /** Returns the address of the current position of these indexes */ - private TensorAddress toAddress(int valueIndex) { - return TensorAddress.withValueIndex(valueIndex, indexes); + private TensorAddress toAddress() { + return TensorAddress.of(indexes); } public int[] indexesCopy() { @@ -661,13 +700,14 @@ public class IndexedTensor implements Tensor { /** Returns a copy of the indexes of this which must not be modified */ public int[] indexesForReading() { return indexes; } - /** Returns the value index for this in the tensor we are iterating over */ - int toValueIndex() { - return IndexedTensor.toValueIndex(indexes, sourceDimensionSizes); + int toSourceValueIndex() { + return IndexedTensor.toValueIndex(indexes, sourceDimensionSizes); } - + + int toIterationValueIndex() { return IndexedTensor.toValueIndex(indexes, iterationDimensionSizes); } + /** Returns the dimension sizes of this. Do not modify the return value */ - int[] dimensionSizes() { return iterateDimensionSizes; } + int[] dimensionSizes() { return iterationDimensionSizes; } /** Returns an immutable list containing a copy of the indexes in this */ public List<Integer> toList() { @@ -716,13 +756,13 @@ public class IndexedTensor implements Tensor { } - private final static class MultivalueIndexes extends Indexes { + private static class MultiDimensionIndexes extends Indexes { private final int size; private final List<Integer> iterateDimensions; - - private MultivalueIndexes(int[] sourceDimensionSizes, int[] iterateDimensionSizes, List<Integer> iterateDimensions, int[] initialIndexes, int size) { + + private MultiDimensionIndexes(int[] sourceDimensionSizes, int[] iterateDimensionSizes, List<Integer> iterateDimensions, int[] initialIndexes, int size) { super(sourceDimensionSizes, iterateDimensionSizes, initialIndexes); this.iterateDimensions = iterateDimensions; this.size = size; @@ -754,7 +794,26 @@ public class IndexedTensor implements Tensor { } } + + /** In this case we can reuse the source index computation for the iteration index */ + private final static class EqualSizeMultiDimensionIndexes extends MultiDimensionIndexes { + + private int lastComputedSourceValueIndex = -1; + + private EqualSizeMultiDimensionIndexes(int[] dimensionSizes, List<Integer> iterateDimensions, int[] initialIndexes, int size) { + super(dimensionSizes, dimensionSizes, iterateDimensions, initialIndexes, size); + } + + int toSourceValueIndex() { + return lastComputedSourceValueIndex = super.toSourceValueIndex(); + } + + // NOTE: We assume the source index always gets computed first. Otherwise using this will produce a runtime exception + int toIterationValueIndex() { return lastComputedSourceValueIndex; } + + } + /** In this case we can keep track of indexes using a step instead of using the more elaborate computation */ private final static class SingleDimensionIndexes extends Indexes { private final int size; @@ -762,21 +821,23 @@ public class IndexedTensor implements Tensor { private final int iterateDimension; /** Maintain this directly as an optimization for 1-d iteration */ - private int currentValueIndex; + private int currentSourceValueIndex, currentIterationValueIndex; /** The iteration step in the value index space */ - private final int step; + private final int sourceStep, iterationStep; private SingleDimensionIndexes(int[] sourceDimensionSizes, int[] iterateDimensionSizes, int iterateDimension, int[] initialIndexes, int size) { super(sourceDimensionSizes, iterateDimensionSizes, initialIndexes); this.iterateDimension = iterateDimension; this.size = size; - this.step = productOfDimensionsAfter(iterateDimension, sourceDimensionSizes); + this.sourceStep = productOfDimensionsAfter(iterateDimension, sourceDimensionSizes); + this.iterationStep = productOfDimensionsAfter(iterateDimension, iterateDimensionSizes); // Initialize to the (virtual) position before the first cell indexes[iterateDimension]--; - currentValueIndex = IndexedTensor.toValueIndex(indexes, sourceDimensionSizes); + currentSourceValueIndex = IndexedTensor.toValueIndex(indexes, sourceDimensionSizes); + currentIterationValueIndex = IndexedTensor.toValueIndex(indexes, iterateDimensionSizes); } /** Returns the number of values this will iterate over - i.e the product if the iterating dimension sizes */ @@ -794,14 +855,67 @@ public class IndexedTensor implements Tensor { @Override public void next() { indexes[iterateDimension]++; - currentValueIndex += step; + currentSourceValueIndex += sourceStep; + currentIterationValueIndex += iterationStep; + } + + @Override + int toSourceValueIndex() { return currentSourceValueIndex; } + + @Override + int toIterationValueIndex() { return currentIterationValueIndex; } + + } + + /** In this case we only need to keep track of one index */ + private final static class EqualSizeSingleDimensionIndexes extends Indexes { + + private final int size; + + private final int iterateDimension; + + /** Maintain this directly as an optimization for 1-d iteration */ + private int currentValueIndex; + + /** The iteration step in the value index space */ + private final int step; + + private EqualSizeSingleDimensionIndexes(int[] dimensionSizes, + int iterateDimension, int[] initialIndexes, int size) { + super(dimensionSizes, dimensionSizes, initialIndexes); + this.iterateDimension = iterateDimension; + this.size = size; + this.step = productOfDimensionsAfter(iterateDimension, dimensionSizes); + + // Initialize to the (virtual) position before the first cell + indexes[iterateDimension]--; + currentValueIndex = IndexedTensor.toValueIndex(indexes, dimensionSizes); } + /** Returns the number of values this will iterate over - i.e the product if the iterating dimension sizes */ @Override - int toValueIndex() { - return currentValueIndex; + public int size() { + return size; } + /** + * Advances this to the next cell in the standard indexed tensor cell order. + * The first call to this will put it at the first position. + * + * @throws RuntimeException if this is called more times than its size + */ + @Override + public void next() { + indexes[iterateDimension]++; + currentValueIndex += step; + } + + @Override + int toSourceValueIndex() { return currentValueIndex; } + + @Override + int toIterationValueIndex() { return currentValueIndex; } + } } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java index a898ea5ad0c..800de360369 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java @@ -299,6 +299,13 @@ public interface Tensor { @Override public TensorAddress getKey() { return address; } + /** + * Returns the direct index which can be used to locate this cell, or -1 if not available. + * This is for optimizations mapping between tensors where this is possible without creating a + * TensorAddress. + */ + int getDirectIndex() { return -1; } + @Override public Double getValue() { return value; } @@ -361,7 +368,17 @@ public interface Tensor { /** Add a cell */ Builder cell(double value, int ... labels); - + + /** + * Add a cell + * + * @param cell a cell providing the location at which to add this cell + * @param value the value to assign to the cell + */ + default Builder cell(Cell cell, double value) { + return cell(cell.getKey(), value); + } + Tensor build(); class CellBuilder { diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java index 9bf88cc8213..d62e7575358 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java @@ -32,11 +32,6 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { return new IntTensorAddress(labels); } - /** A tensor address which knows its value index (computed from the labels and the size) in some tensor */ - static TensorAddress withValueIndex(int valueIndex, int[] labels) { - return new IntTensorAddress(valueIndex, labels); - } - /** Returns the number of labels in this */ public abstract int size(); @@ -59,12 +54,6 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { public final boolean isEmpty() { return size() == 0; } - /** - * Returns the value index of this address (computed from the labels and the size) in some tensor. - * This may be retained as an optimization. It is -1 if not set. - */ - public int valueIndex() { return -1; } - @Override public int compareTo(TensorAddress other) { // TODO: Formal issue (only): Ordering with different address sizes @@ -148,16 +137,9 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { private static final class IntTensorAddress extends TensorAddress { - private final int valueIndex; - private final int[] labels; private IntTensorAddress(int[] labels) { - this(-1, labels); - } - - private IntTensorAddress(int valueIndex, int[] labels) { - this.valueIndex = valueIndex; this.labels = Arrays.copyOf(labels, labels.length); } @@ -182,9 +164,6 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { return Arrays.toString(labels); } - @Override - public int valueIndex() { return valueIndex; } - } /** Supports building of a tensor address */ diff --git a/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java b/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java index d55fe9e1ac8..0844877ba29 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java @@ -122,7 +122,6 @@ public class Join extends PrimitiveTensorFunction { return Tensor.Builder.of(joinedType, new int[joinedType.dimensions().size()]).build(); int[] joinedSizes = joinedSize(joinedType, subspace, superspace); - boolean equalTargetAndSourceSize = superspace.dimensionSizesAre(joinedSizes); IndexedTensor.Builder builder = (IndexedTensor.Builder)Tensor.Builder.of(joinedType, joinedSizes); @@ -134,41 +133,25 @@ public class Join extends PrimitiveTensorFunction { IndexedTensor.SubspaceIterator subspaceInSuper = i.next(); joinSubspaces(subspace.valueIterator(), subspace.size(), subspaceInSuper, subspaceInSuper.size(), - reversedArgumentOrder, builder, equalTargetAndSourceSize); + reversedArgumentOrder, builder); } return builder.build(); } private void joinSubspaces(Iterator<Double> subspace, int subspaceSize, - Iterator<Map.Entry<TensorAddress, Double>> superspace, int superspaceSize, - boolean reversedArgumentOrder, IndexedTensor.Builder builder, boolean equalTargetAndSourceSize) { + Iterator<Tensor.Cell> superspace, int superspaceSize, + boolean reversedArgumentOrder, IndexedTensor.Builder builder) { int joinedLength = Math.min(subspaceSize, superspaceSize); - // This is inner loop and therefore it is suplicated four times to move checks out of it - if (equalTargetAndSourceSize) { // we can write cells without recomputing the address index - if (reversedArgumentOrder) { - for (int i = 0; i < joinedLength; i++) { - Map.Entry<TensorAddress, Double> supercell = superspace.next(); - builder.cellWithInternalIndex(supercell.getKey().valueIndex(), combinator.applyAsDouble(supercell.getValue(), subspace.next())); - } - } else { - for (int i = 0; i < joinedLength; i++) { - Map.Entry<TensorAddress, Double> supercell = superspace.next(); - builder.cellWithInternalIndex(supercell.getKey().valueIndex(), combinator.applyAsDouble(subspace.next(), supercell.getValue())); - } + if (reversedArgumentOrder) { + for (int i = 0; i < joinedLength; i++) { + Tensor.Cell supercell = superspace.next(); + builder.cell(supercell, combinator.applyAsDouble(supercell.getValue(), subspace.next())); } - } - else { - if (reversedArgumentOrder) { - for (int i = 0; i < joinedLength; i++) { - Map.Entry<TensorAddress, Double> supercell = superspace.next(); - builder.cell(supercell.getKey(), combinator.applyAsDouble(supercell.getValue(), subspace.next())); - } - } else { - for (int i = 0; i < joinedLength; i++) { - Map.Entry<TensorAddress, Double> supercell = superspace.next(); - builder.cell(supercell.getKey(), combinator.applyAsDouble(subspace.next(), supercell.getValue())); - } + } else { + for (int i = 0; i < joinedLength; i++) { + Tensor.Cell supercell = superspace.next(); + builder.cell(supercell, combinator.applyAsDouble(subspace.next(), supercell.getValue())); } } } diff --git a/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java b/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java index 8e2dc19e1c7..de7e49c46e4 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java @@ -102,7 +102,7 @@ public class TensorFunctionBenchmark { } public static void main(String[] args) { - double time; + double time = 0; // ---------------- Mapped with extra space (sidesteps current special-case optimizations): // 410 ms @@ -132,16 +132,16 @@ public class TensorFunctionBenchmark { // 0.14 ms time = new TensorFunctionBenchmark().benchmark(50000, vectors(100, 300, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, false); System.out.printf("Indexed unbound vectors, time per join: %1$8.3f ms\n", time); - // 0.75 ms - time = new TensorFunctionBenchmark().benchmark(5000, matrix(100, 300, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, false); + // 0.14 ms + time = new TensorFunctionBenchmark().benchmark(50000, matrix(100, 300, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, false); System.out.printf("Indexed unbound matrix, time per join: %1$8.3f ms\n", time); // ---------------- Indexed bound: // 0.14 ms time = new TensorFunctionBenchmark().benchmark(50000, vectors(100, 300, TensorType.Dimension.Type.indexedBound), TensorType.Dimension.Type.indexedBound, false); System.out.printf("Indexed bound vectors, time per join: %1$8.3f ms\n", time); - // 0.78 ms - time = new TensorFunctionBenchmark().benchmark(5000, matrix(100, 300, TensorType.Dimension.Type.indexedBound), TensorType.Dimension.Type.indexedBound, false); + // 0.14 ms + time = new TensorFunctionBenchmark().benchmark(50000, matrix(100, 300, TensorType.Dimension.Type.indexedBound), TensorType.Dimension.Type.indexedBound, false); System.out.printf("Indexed bound matrix, time per join: %1$8.3f ms\n", time); } |