From 5c2610e9fa5ae8c293ebe872263d5fd76f525306 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 8 Nov 2023 19:23:57 +0000 Subject: store cells in blocks for MixedTensor --- vespajlib/abi-spec.json | 31 --- .../main/java/com/yahoo/tensor/MixedTensor.java | 271 ++++++++++++--------- 2 files changed, 152 insertions(+), 150 deletions(-) diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json index 5dbdb7d157b..3e9fb760e18 100644 --- a/vespajlib/abi-spec.json +++ b/vespajlib/abi-spec.json @@ -1002,37 +1002,6 @@ ], "fields" : [ ] }, - "com.yahoo.tensor.MixedTensor$Index$Builder" : { - "superClass" : "java.lang.Object", - "interfaces" : [ ], - "attributes" : [ - "public" - ], - "methods" : [ - "public void (com.yahoo.tensor.TensorType)", - "public void put(com.yahoo.tensor.TensorAddress, long)", - "public com.yahoo.tensor.MixedTensor$Index build()", - "public com.yahoo.tensor.MixedTensor$Index index()" - ], - "fields" : [ ] - }, - "com.yahoo.tensor.MixedTensor$UnboundBuilder" : { - "superClass" : "com.yahoo.tensor.MixedTensor$Builder", - "interfaces" : [ ], - "attributes" : [ - "public" - ], - "methods" : [ - "public com.yahoo.tensor.Tensor$Builder cell(com.yahoo.tensor.TensorAddress, float)", - "public com.yahoo.tensor.Tensor$Builder cell(com.yahoo.tensor.TensorAddress, double)", - "public com.yahoo.tensor.MixedTensor build()", - "public void trackBounds(com.yahoo.tensor.TensorAddress)", - "public com.yahoo.tensor.TensorType createBoundType()", - "public static com.yahoo.tensor.MixedTensor$UnboundBuilder of(com.yahoo.tensor.TensorType)", - "public bridge synthetic com.yahoo.tensor.Tensor build()" - ], - "fields" : [ ] - }, "com.yahoo.tensor.MixedTensor" : { "superClass" : "java.lang.Object", "interfaces" : [ diff --git a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java index 05d3218e900..8298285da19 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java @@ -26,17 +26,44 @@ public class MixedTensor implements Tensor { /** The dimension specification for this tensor */ private final TensorType type; + private final int blockSize; // aka dense subspace size - /** The list of cells in the tensor */ - private final List cells; + static class DenseBlock { + final TensorAddress sparseAddr; + final double[] cells; + DenseBlock(TensorAddress sparseAddr, double[] cells) { + this.sparseAddr = sparseAddr; + this.cells = cells; + } + } + + /** The cells in the tensor */ + private final List cellBlocks; /** An index structure over the cell list */ private final Index index; - private MixedTensor(TensorType type, List cells, Index index) { + private MixedTensor(TensorType type, List cellBlocks, Index index) { this.type = type; - this.cells = List.copyOf(cells); + this.blockSize = index.denseSubspaceSize(); + this.cellBlocks = List.copyOf(cellBlocks); this.index = index; + if (this.blockSize < 1) { + throw new IllegalStateException("invalid dense subspace size " + blockSize); + } + long count = 0; + for (var block : this.cellBlocks) { + if (index.sparseMap.get(block.sparseAddr) != count) { + throw new IllegalStateException("map vs list mismatch"); + } + if (block.cells.length != blockSize) { + throw new IllegalStateException("block length mismatch"); + } + ++count; + } + if (count != index.sparseMap.size()) { + throw new IllegalStateException("map vs list size mismatch"); + } } /** Returns the tensor type */ @@ -45,29 +72,32 @@ public class MixedTensor implements Tensor { /** Returns the size of the tensor measured in number of cells */ @Override - public long size() { return cells.size(); } + public long size() { return cellBlocks.size() * blockSize; } /** Returns the value at the given address */ @Override public double get(TensorAddress address) { - long cellIndex = index.indexOf(address); - if (cellIndex < 0 || cellIndex >= cells.size()) + int blockNum = index.blockIndexOf(address); + if (blockNum < 0 || blockNum > cellBlocks.size()) { return 0.0; - Cell cell = cells.get((int)cellIndex); - if ( ! address.equals(cell.getKey())) + } + int denseOffset = index.denseOffsetOf(address); + var block = cellBlocks.get(blockNum); + if (denseOffset < 0 || denseOffset >= block.cells.length) { return 0.0; - return cell.getValue(); + } + return block.cells[denseOffset]; } @Override public boolean has(TensorAddress address) { - long cellIndex = index.indexOf(address); - if (cellIndex < 0 || cellIndex >= cells.size()) - return false; - Cell cell = cells.get((int)cellIndex); - if ( ! address.equals(cell.getKey())) + int blockNum = index.blockIndexOf(address); + if (blockNum < 0 || blockNum > cellBlocks.size()) { return false; - return true; + } + int denseOffset = index.denseOffsetOf(address); + var block = cellBlocks.get(blockNum); + return (denseOffset >= 0 && denseOffset < block.cells.length); } /** @@ -79,7 +109,25 @@ public class MixedTensor implements Tensor { */ @Override public Iterator cellIterator() { - return cells.iterator(); + return new Iterator<>() { + final Iterator blockIterator = cellBlocks.iterator(); + DenseBlock currBlock = null; + int currOffset = blockSize; + @Override + public boolean hasNext() { + return (currOffset < blockSize || blockIterator.hasNext()); + } + @Override + public Cell next() { + if (currOffset == blockSize) { + currBlock = blockIterator.next(); + currOffset = 0; + } + TensorAddress fullAddr = index.fullAddressOf(currBlock.sparseAddr, currOffset); + double value = currBlock.cells[currOffset++]; + return new Cell(fullAddr, value); + } + }; } /** @@ -89,14 +137,20 @@ public class MixedTensor implements Tensor { @Override public Iterator valueIterator() { return new Iterator<>() { - final Iterator cellIterator = cellIterator(); + final Iterator blockIterator = cellBlocks.iterator(); + double[] currBlock = null; + int currOffset = blockSize; @Override public boolean hasNext() { - return cellIterator.hasNext(); + return (currOffset < blockSize || blockIterator.hasNext()); } @Override public Double next() { - return cellIterator.next().getValue(); + if (currOffset == blockSize) { + currBlock = blockIterator.next().cells; + currOffset = 0; + } + return currBlock[currOffset++]; } }; } @@ -104,7 +158,9 @@ public class MixedTensor implements Tensor { @Override public Map cells() { ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); - for (Cell cell : cells) { + var iter = cellIterator(); + while (iter.hasNext()) { + Cell cell = iter.next(); builder.put(cell.getKey(), cell.getValue()); } return builder.build(); @@ -116,29 +172,24 @@ public class MixedTensor implements Tensor { throw new IllegalArgumentException("MixedTensor.withType: types are not compatible. Current type: '" + this.type + "', requested type: '" + type + "'"); } - return new MixedTensor(other, cells, index); + return new MixedTensor(other, cellBlocks, index); } @Override public Tensor remove(Set addresses) { - Tensor.Builder builder = Tensor.Builder.of(type()); - - // iterate through all sparse addresses referencing a dense subspace - for (Map.Entry entry : index.sparseMap.entrySet()) { - TensorAddress sparsePartialAddress = entry.getKey(); - if ( ! addresses.contains(sparsePartialAddress)) { // assumption: addresses only contain the sparse part - long offset = entry.getValue(); - for (int i = 0; i < index.denseSubspaceSize; ++i) { - Cell cell = cells.get((int)offset + i); - builder.cell(cell.getKey(), cell.getValue()); - } + var indexBuilder = new Index.Builder(type); + List list = new ArrayList<>(); + for (var block : cellBlocks) { + if ( ! addresses.contains(block.sparseAddr)) { // assumption: addresses only contain the sparse part + indexBuilder.addBlock(block.sparseAddr, list.size()); + list.add(block); } } - return builder.build(); + return new MixedTensor(type, list, indexBuilder.build()); } @Override - public int hashCode() { return cells.hashCode(); } + public int hashCode() { return cellBlocks.hashCode(); } @Override public String toString() { @@ -173,7 +224,7 @@ public class MixedTensor implements Tensor { /** Returns the size of dense subspaces */ public long denseSubspaceSize() { - return index.denseSubspaceSize(); + return blockSize; } /** @@ -222,7 +273,6 @@ public class MixedTensor implements Tensor { @Override public abstract MixedTensor build(); - } /** @@ -269,9 +319,9 @@ public class MixedTensor implements Tensor { @Override public Tensor.Builder cell(TensorAddress address, double value) { TensorAddress sparsePart = index.sparsePartialAddress(address); - long denseOffset = index.denseOffset(address); + int denseOffset = index.denseOffsetOf(address); double[] denseSubspace = denseSubspace(sparsePart); - denseSubspace[(int)denseOffset] = value; + denseSubspace[denseOffset] = value; return this; } @@ -287,28 +337,20 @@ public class MixedTensor implements Tensor { @Override public MixedTensor build() { - long count = 0; - List builder = new ArrayList<>(); - + List list = new ArrayList<>(); for (Map.Entry entry : denseSubspaceMap.entrySet()) { TensorAddress sparsePart = entry.getKey(); - indexBuilder.put(sparsePart, count); - double[] denseSubspace = entry.getValue(); - for (long offset = 0; offset < denseSubspace.length; ++offset) { - TensorAddress cellAddress = index.addressOf(sparsePart, offset); - double value = denseSubspace[(int)offset]; - builder.add(new Cell(cellAddress, value)); - count++; - } + var block = new DenseBlock(sparsePart, denseSubspace); + indexBuilder.addBlock(sparsePart, list.size()); + list.add(block); } - return new MixedTensor(type, builder, indexBuilder.build()); + return new MixedTensor(type, list, indexBuilder.build()); } public static BoundBuilder of(TensorType type) { return new BoundBuilder(type); } - } /** @@ -319,7 +361,7 @@ public class MixedTensor implements Tensor { * tensor type is effectively changed, such that unbound indexed * dimensions become bound. */ - public static class UnboundBuilder extends Builder { + private static class UnboundBuilder extends Builder { private final Map cells; private final long[] dimensionBounds; @@ -352,7 +394,7 @@ public class MixedTensor implements Tensor { return builder.build(); } - public void trackBounds(TensorAddress address) { + private void trackBounds(TensorAddress address) { for (int i = 0; i < type.dimensions().size(); ++i) { TensorType.Dimension dimension = type.dimensions().get(i); if (dimension.isIndexed()) { @@ -361,7 +403,7 @@ public class MixedTensor implements Tensor { } } - public TensorType createBoundType() { + private TensorType createBoundType() { TensorType.Builder typeBuilder = new TensorType.Builder(type().valueType()); for (int i = 0; i < type.dimensions().size(); ++i) { TensorType.Dimension dimension = type.dimensions().get(i); @@ -378,7 +420,6 @@ public class MixedTensor implements Tensor { public static UnboundBuilder of(TensorType type) { return new UnboundBuilder(type); } - } /** @@ -395,8 +436,17 @@ public class MixedTensor implements Tensor { private final List mappedDimensions; private final List indexedDimensions; - private ImmutableMap sparseMap; - private long denseSubspaceSize = -1; + private ImmutableMap sparseMap; + private final int denseSubspaceSize; + + static private int computeDSS(List dimensions) { + long denseSubspaceSize = 1; + for (var dimension : dimensions) { + denseSubspaceSize *= dimension.size() + .orElseThrow(() -> new IllegalArgumentException("Unknown size of indexed dimension")); + } + return (int) denseSubspaceSize; + } private Index(TensorType type) { this.type = type; @@ -404,53 +454,30 @@ public class MixedTensor implements Tensor { this.indexedDimensions = type.dimensions().stream().filter(TensorType.Dimension::isIndexed).toList(); this.sparseType = createPartialType(type.valueType(), mappedDimensions); this.denseType = createPartialType(type.valueType(), indexedDimensions); + this.denseSubspaceSize = computeDSS(this.indexedDimensions); } - /** Returns the index of the given address, or -1 if it is not present */ - public long indexOf(TensorAddress address) { + int blockIndexOf(TensorAddress address) { TensorAddress sparsePart = sparsePartialAddress(address); - if ( ! sparseMap.containsKey(sparsePart)) - return -1; - long base = sparseMap.get(sparsePart); - long offset = denseOffset(address); - return base + offset; + return sparseMap.getOrDefault(sparsePart, -1); } - public static class Builder { - - private final Index index; - private final ImmutableMap.Builder builder; - - public Builder(TensorType type) { - index = new Index(type); - builder = new ImmutableMap.Builder<>(); - } - - public void put(TensorAddress address, long index) { - builder.put(address, index); - } - - public Index build() { - index.sparseMap = builder.build(); - return index; - } - - public Index index() { - return index; + int denseOffsetOf(TensorAddress address) { + long innerSize = 1; + long offset = 0; + for (int i = type.dimensions().size(); --i >= 0; ) { + TensorType.Dimension dimension = type.dimensions().get(i); + if (dimension.isIndexed()) { + long label = address.numericLabel(i); + offset += label * innerSize; + innerSize *= dimension.size().orElseThrow(() -> + new IllegalArgumentException("Unknown size of indexed dimension.")); + } } + return (int) offset; } - public long denseSubspaceSize() { - if (denseSubspaceSize == -1) { - denseSubspaceSize = 1; - for (int i = 0; i < type.dimensions().size(); ++i) { - TensorType.Dimension dimension = type.dimensions().get(i); - if (dimension.isIndexed()) { - denseSubspaceSize *= dimension.size().orElseThrow(() -> - new IllegalArgumentException("Unknown size of indexed dimension")); - } - } - } + public int denseSubspaceSize() { return denseSubspaceSize; } @@ -466,21 +493,6 @@ public class MixedTensor implements Tensor { return builder.build(); } - private long denseOffset(TensorAddress address) { - long innerSize = 1; - long offset = 0; - for (int i = type.dimensions().size(); --i >= 0; ) { - TensorType.Dimension dimension = type.dimensions().get(i); - if (dimension.isIndexed()) { - long label = address.numericLabel(i); - offset += label * innerSize; - innerSize *= dimension.size().orElseThrow(() -> - new IllegalArgumentException("Unknown size of indexed dimension.")); - } - } - return offset; - } - private TensorAddress denseOffsetToAddress(long denseOffset) { if (denseOffset < 0 || denseOffset > denseSubspaceSize) { throw new IllegalArgumentException("Offset out of bounds"); @@ -502,7 +514,7 @@ public class MixedTensor implements Tensor { return TensorAddress.of(labels); } - private TensorAddress addressOf(TensorAddress sparsePart, long denseOffset) { + TensorAddress fullAddressOf(TensorAddress sparsePart, long denseOffset) { TensorAddress densePart = denseOffsetToAddress(denseOffset); String[] labels = new String[type.dimensions().size()]; int mappedIndex = 0; @@ -542,7 +554,7 @@ public class MixedTensor implements Tensor { for (int index = 0; index < cellEntries.size() && cellsWritten < maxCells; index++) { if (index > 0) b.append(", "); - b.append(TensorAddress.labelToString(cellEntries.get(index).getKey().label(0 ))); + b.append(TensorAddress.labelToString(cellEntries.get(index).getKey().label(0))); b.append(":"); cellsWritten += denseSubspaceToString(tensor, cellEntries.get(index).getValue(), maxCells - cellsWritten, b); } @@ -552,16 +564,14 @@ public class MixedTensor implements Tensor { return b.toString(); } - private int denseSubspaceToString(MixedTensor tensor, long subspaceIndex, long maxCells, StringBuilder b) { + private int denseSubspaceToString(MixedTensor tensor, int subspaceIndex, long maxCells, StringBuilder b) { if (maxCells <= 0) { return 0; } - if (denseSubspaceSize == 1) { b.append(getDouble(subspaceIndex, 0, tensor)); return 1; } - IndexedTensor.Indexes indexes = IndexedTensor.Indexes.of(denseType); int index = 0; for (; index < denseSubspaceSize && index < maxCells; index++) { @@ -590,10 +600,33 @@ public class MixedTensor implements Tensor { return index; } - private double getDouble(long indexedSubspaceIndex, long indexInIndexedSubspace, MixedTensor tensor) { - return tensor.cells.get((int)(indexedSubspaceIndex + indexInIndexedSubspace)).getDoubleValue(); + private double getDouble(int subspaceIndex, int denseOffset, MixedTensor tensor) { + return tensor.cellBlocks.get(subspaceIndex).cells[denseOffset]; } + static class Builder { + + private final Index index; + private final ImmutableMap.Builder builder; + + Builder(TensorType type) { + index = new Index(type); + builder = new ImmutableMap.Builder<>(); + } + + void addBlock(TensorAddress address, int sz) { + builder.put(address, sz); + } + + Index build() { + index.sparseMap = builder.build(); + return index; + } + + Index index() { + return index; + } + } } private static class DenseSubspaceBuilder implements IndexedTensor.DirectIndexBuilder { -- cgit v1.2.3 From 3956563e17ea78418657f264d8efae93494c7172 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 8 Nov 2023 20:02:24 +0000 Subject: try to improve hashCode/equals --- vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java index 8298285da19..5b277e4716f 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java @@ -5,10 +5,12 @@ package com.yahoo.tensor; import com.google.common.collect.ImmutableMap; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; /** @@ -28,13 +30,22 @@ public class MixedTensor implements Tensor { private final TensorType type; private final int blockSize; // aka dense subspace size - static class DenseBlock { + static final class DenseBlock { final TensorAddress sparseAddr; final double[] cells; DenseBlock(TensorAddress sparseAddr, double[] cells) { this.sparseAddr = sparseAddr; this.cells = cells; } + @Override public int hashCode() { + return Objects.hash(sparseAddr, cells); + } + @Override public boolean equals(Object other) { + if (other instanceof DenseBlock o) { + return sparseAddr.equals(o.sparseAddr) && Arrays.equals(cells, o.cells); + } + return false; + } } /** The cells in the tensor */ @@ -189,7 +200,7 @@ public class MixedTensor implements Tensor { } @Override - public int hashCode() { return cellBlocks.hashCode(); } + public int hashCode() { return Objects.hash(type, cellBlocks); } @Override public String toString() { -- cgit v1.2.3 From ff05a8fd7f067a90826ec19045257fcb897af500 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 9 Nov 2023 10:47:12 +0000 Subject: add more details if validation fails --- .../src/main/java/com/yahoo/tensor/MixedTensor.java | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java index 5b277e4716f..1d55fa78b83 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java @@ -60,20 +60,29 @@ public class MixedTensor implements Tensor { this.cellBlocks = List.copyOf(cellBlocks); this.index = index; if (this.blockSize < 1) { - throw new IllegalStateException("invalid dense subspace size " + blockSize); + throw new IllegalStateException("invalid dense subspace size: " + blockSize); } long count = 0; for (var block : this.cellBlocks) { if (index.sparseMap.get(block.sparseAddr) != count) { - throw new IllegalStateException("map vs list mismatch"); + throw new IllegalStateException("map vs list mismatch: block #" + + count + + " address maps to #" + + index.sparseMap.get(block.sparseAddr)); } if (block.cells.length != blockSize) { - throw new IllegalStateException("block length mismatch"); + throw new IllegalStateException("dense subspace size mismatch, expected " + + blockSize + + " cells, but got: " + + block.cells.length); } ++count; } if (count != index.sparseMap.size()) { - throw new IllegalStateException("map vs list size mismatch"); + throw new IllegalStateException("mismatch: list size is " + + count + + " but map size is " + + index.sparseMap.size()); } } -- cgit v1.2.3