From 789bb72d01278396fa9e1be37a5c05263b212502 Mon Sep 17 00:00:00 2001
From: Tor Egge <Tor.Egge@broadpark.no>
Date: Wed, 13 Mar 2019 15:14:07 +0100
Subject: Enable dynamic cluster size in dense tensor store.

---
 .../dense_tensor_store/dense_tensor_store_test.cpp | 20 ++++++
 .../vespa/searchlib/tensor/dense_tensor_store.cpp  | 74 +++++++++++++---------
 .../vespa/searchlib/tensor/dense_tensor_store.h    | 25 +++++---
 3 files changed, 81 insertions(+), 38 deletions(-)
diff --git a/searchlib/src/tests/tensor/dense_tensor_store/dense_tensor_store_test.cpp b/searchlib/src/tests/tensor/dense_tensor_store/dense_tensor_store_test.cpp
index 2e88f0e90b0..ab43e19251a 100644
--- a/searchlib/src/tests/tensor/dense_tensor_store/dense_tensor_store_test.cpp
+++ b/searchlib/src/tests/tensor/dense_tensor_store/dense_tensor_store_test.cpp
@@ -111,5 +111,25 @@ TEST_F("require that empty 3d tensor has size 1 in un-bound dimensions", Fixture
                                    add({{"x", 0}, {"y", 1}, {"z", 0}}, 0));
 }
 
+void
+assertClusterSize(const vespalib::string &tensorType, uint32_t expClusterSize) {
+    Fixture f(tensorType);
+    EXPECT_EQUAL(expClusterSize, f.store.getClusterSize());
+}
+
+TEST("require that cluster size is calculated correctly")
+{
+    TEST_DO(assertClusterSize("tensor(x[1])", 32));
+    TEST_DO(assertClusterSize("tensor(x[10])", 96));
+    TEST_DO(assertClusterSize("tensor(x[3])", 32));
+    TEST_DO(assertClusterSize("tensor(x[3],y[])", 32));
+    TEST_DO(assertClusterSize("tensor(x[3],y[],z[])", 32));
+    TEST_DO(assertClusterSize("tensor(x[3],y[],z[],z2[])", 64));
+    TEST_DO(assertClusterSize("tensor(x[10],y[10])", 800));
+    TEST_DO(assertClusterSize("tensor(x[])", 32));
+    TEST_DO(assertClusterSize("tensor(x[],x2[],x3[],x4[],x5[],x6[])", 32));
+    TEST_DO(assertClusterSize("tensor(x[],x2[],x3[],x4[],x5[],x6[],x7[])", 64));
+}
+
 TEST_MAIN() { TEST_RUN_ALL(); }
 
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp
index 3cb31534bb5..18c52c21206 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp
@@ -17,12 +17,38 @@ using vespalib::eval::ValueType;
 
 namespace search::tensor {
 
+namespace {
+
 constexpr size_t MIN_BUFFER_CLUSTERS = 1024;
 constexpr size_t DENSE_TENSOR_ALIGNMENT = 32;
 
-DenseTensorStore::BufferType::BufferType()
-    : datastore::BufferType<char>(DENSE_TENSOR_ALIGNMENT, MIN_BUFFER_CLUSTERS, RefType::offsetSize()),
-      _unboundDimSizesSize(0u)
+}
+
+DenseTensorStore::TensorSizeCalc::TensorSizeCalc(const ValueType &type)
+    : _numBoundCells(1u),
+      _numUnboundDims(0u),
+      _cellSize(sizeof(double))
+{
+    for (const auto & dim : type.dimensions()) {
+        if (dim.is_bound()) {
+            _numBoundCells *= dim.size;
+        } else {
+            ++_numUnboundDims;
+        }
+    }
+}
+
+size_t
+DenseTensorStore::TensorSizeCalc::clusterSize() const
+{
+    size_t tensorSize = _numBoundCells * _cellSize + 
+                        _numUnboundDims * sizeof(uint32_t);
+    return DenseTensorStore::BufferType::align(tensorSize, DENSE_TENSOR_ALIGNMENT);
+}
+
+DenseTensorStore::BufferType::BufferType(const TensorSizeCalc &tensorSizeCalc)
+    : datastore::BufferType<char>(tensorSizeCalc.clusterSize(), MIN_BUFFER_CLUSTERS, RefType::offsetSize()),
+      _unboundDimSizesSize(tensorSizeCalc._numUnboundDims * sizeof(uint32_t))
 {}
 
 DenseTensorStore::BufferType::~BufferType() = default;
@@ -45,22 +71,12 @@ DenseTensorStore::BufferType::getReservedElements(uint32_t bufferId) const
 DenseTensorStore::DenseTensorStore(const ValueType &type)
     : TensorStore(_concreteStore),
       _concreteStore(),
-      _bufferType(),
+      _tensorSizeCalc(type),
+      _bufferType(_tensorSizeCalc),
       _type(type),
-      _numBoundCells(1u),
-      _numUnboundDims(0u),
-      _cellSize(sizeof(double)),
       _emptyCells()
 {
-    for (const auto & dim : _type.dimensions()) {
-        if (dim.is_bound()) {
-            _numBoundCells *= dim.size;
-        } else {
-            ++_numUnboundDims;
-        }
-    }
-    _emptyCells.resize(_numBoundCells, 0.0);
-    _bufferType.setUnboundDimSizesSize(_numUnboundDims * sizeof(uint32_t));
+    _emptyCells.resize(_tensorSizeCalc._numBoundCells, 0.0);
     _store.addType(&_bufferType);
     _store.initActiveBuffers();
 }
@@ -82,8 +98,8 @@ size_t
 DenseTensorStore::getNumCells(const void *buffer) const
 {
     const uint32_t *unboundDimSizeEnd = static_cast<const uint32_t *>(buffer);
-    const uint32_t *unboundDimSizeStart = unboundDimSizeEnd - _numUnboundDims;
-    size_t numCells = _numBoundCells;
+    const uint32_t *unboundDimSizeStart = unboundDimSizeEnd - _tensorSizeCalc._numUnboundDims;
+    size_t numCells = _tensorSizeCalc._numBoundCells;
     for (auto unboundDimSize = unboundDimSizeStart; unboundDimSize != unboundDimSizeEnd; ++unboundDimSize) {
         numCells *= *unboundDimSize;
     }
@@ -102,7 +118,7 @@ void clearPadAreaAfterBuffer(char *buffer, size_t bufSize, size_t alignedBufSize
 Handle<char>
 DenseTensorStore::allocRawBuffer(size_t numCells)
 {
-    size_t bufSize = numCells * _cellSize;
+    size_t bufSize = numCells * _tensorSizeCalc._cellSize;
     size_t alignedBufSize = alignedSize(numCells);
     auto result = _concreteStore.rawAllocator<char>(_typeId).alloc(alignedBufSize);
     clearPadAreaAfterBuffer(result.data, bufSize, alignedBufSize, unboundDimSizesSize());
@@ -113,9 +129,9 @@ Handle<char>
 DenseTensorStore::allocRawBuffer(size_t numCells,
                                  const std::vector<uint32_t> &unboundDimSizes)
 {
-    assert(unboundDimSizes.size() == _numUnboundDims);
+    assert(unboundDimSizes.size() == _tensorSizeCalc._numUnboundDims);
     auto ret = allocRawBuffer(numCells);
-    if (_numUnboundDims > 0) {
+    if (_tensorSizeCalc._numUnboundDims > 0) {
         memcpy(ret.data - unboundDimSizesSize(),
                &unboundDimSizes[0], unboundDimSizesSize());
     }
@@ -145,7 +161,7 @@ DenseTensorStore::move(EntryRef ref)
     auto newraw = allocRawBuffer(numCells);
     memcpy(newraw.data - unboundDimSizesSize(),
            static_cast<const char *>(oldraw) - unboundDimSizesSize(),
-           numCells * _cellSize + unboundDimSizesSize());
+           numCells * _tensorSizeCalc._cellSize + unboundDimSizesSize());
     _concreteStore.holdElem(ref, alignedSize(numCells));
     return newraw.ref;
 }
@@ -172,11 +188,11 @@ DenseTensorStore::getTensor(EntryRef ref) const
     }
     auto raw = getRawBuffer(ref);
     size_t numCells = getNumCells(raw);
-    if (_numUnboundDims == 0) {
+    if (_tensorSizeCalc._numUnboundDims == 0) {
         return std::make_unique<DenseTensorView>(_type, CellsRef(static_cast<const double *>(raw), numCells));
     } else {
         auto result = std::make_unique<MutableDenseTensorView>(_type, CellsRef(static_cast<const double *>(raw), numCells));
-        makeConcreteType(*result, raw, _numUnboundDims);
+        makeConcreteType(*result, raw, _tensorSizeCalc._numUnboundDims);
         return result;
     }
 }
@@ -186,15 +202,15 @@ DenseTensorStore::getTensor(EntryRef ref, MutableDenseTensorView &tensor) const
 {
     if (!ref.valid()) {
         tensor.setCells(DenseTensorView::CellsRef(&_emptyCells[0], _emptyCells.size()));
-        if (_numUnboundDims > 0) {
+        if (_tensorSizeCalc._numUnboundDims > 0) {
             tensor.setUnboundDimensionsForEmptyTensor();
         }
     } else {
         auto raw = getRawBuffer(ref);
         size_t numCells = getNumCells(raw);
         tensor.setCells(DenseTensorView::CellsRef(static_cast<const double *>(raw), numCells));
-        if (_numUnboundDims > 0) {
-            makeConcreteType(tensor, raw, _numUnboundDims);
+        if (_tensorSizeCalc._numUnboundDims > 0) {
+            makeConcreteType(tensor, raw, _tensorSizeCalc._numUnboundDims);
         }
     }
 }
@@ -252,8 +268,8 @@ DenseTensorStore::setDenseTensor(const TensorType &tensor)
     size_t numCells = tensor.cellsRef().size();
     checkMatchingType(_type, tensor.type(), numCells);
     auto raw = allocRawBuffer(numCells);
-    setDenseTensorUnboundDimSizes(raw.data, _type, _numUnboundDims, tensor.type());
-    memcpy(raw.data, &tensor.cellsRef()[0], numCells * _cellSize);
+    setDenseTensorUnboundDimSizes(raw.data, _type, _tensorSizeCalc._numUnboundDims, tensor.type());
+    memcpy(raw.data, &tensor.cellsRef()[0], numCells * _tensorSizeCalc._cellSize);
     return raw.ref;
 }
 
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
index 9810ff43a19..55ddd2ec9e4 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
@@ -30,18 +30,25 @@ public:
     using DataStoreType = datastore::DataStoreT<RefType>;
     using ValueType = vespalib::eval::ValueType;
 
+    struct TensorSizeCalc
+    {
+        size_t   _numBoundCells; // product of bound dimension sizes
+        uint32_t _numUnboundDims;
+        uint32_t _cellSize; // size of a cell (e.g. double => 8)
+        
+        TensorSizeCalc(const ValueType &type);
+        size_t clusterSize() const;
+    };
+
     class BufferType : public datastore::BufferType<char>
     {
         using CleanContext = datastore::BufferType<char>::CleanContext;
         uint32_t _unboundDimSizesSize;
     public:
-        BufferType();
+        BufferType(const TensorSizeCalc &tensorSizeCalc);
         ~BufferType() override;
         void cleanHold(void *buffer, uint64_t offset, uint64_t len, CleanContext cleanCtx) override;
         uint32_t unboundDimSizesSize() const { return _unboundDimSizesSize; }
-        void setUnboundDimSizesSize(uint32_t unboundDimSizesSize_in) {
-            _unboundDimSizesSize = unboundDimSizesSize_in;
-        }
         size_t getReservedElements(uint32_t bufferId) const override;
         static size_t align(size_t size, size_t alignment) {
             size += alignment - 1;
@@ -51,11 +58,9 @@ public:
     };
 private:
     DataStoreType _concreteStore;
+    TensorSizeCalc _tensorSizeCalc;
     BufferType _bufferType;
     ValueType _type; // type of dense tensor
-    size_t _numBoundCells; // product of bound dimension sizes
-    uint32_t _numUnboundDims;
-    uint32_t _cellSize; // size of a cell (e.g. double => 8)
     std::vector<double> _emptyCells;
 
     size_t unboundCells(const void *buffer) const;
@@ -65,7 +70,7 @@ private:
     setDenseTensor(const TensorType &tensor);
     datastore::Handle<char> allocRawBuffer(size_t numCells);
     size_t alignedSize(size_t numCells) const {
-        return _bufferType.align(numCells * _cellSize + unboundDimSizesSize());
+        return _bufferType.align(numCells * _tensorSizeCalc._cellSize + unboundDimSizesSize());
     }
 
 public:
@@ -75,7 +80,7 @@ public:
     const ValueType &type() const { return _type; }
     uint32_t unboundDimSizesSize() const { return _bufferType.unboundDimSizesSize(); }
     size_t getNumCells(const void *buffer) const;
-    uint32_t getCellSize() const { return _cellSize; }
+    uint32_t getCellSize() const { return _tensorSizeCalc._cellSize; }
     const void *getRawBuffer(RefType ref) const;
     datastore::Handle<char> allocRawBuffer(size_t numCells, const std::vector<uint32_t> &unboundDimSizes);
     void holdTensor(EntryRef ref) override;
@@ -83,6 +88,8 @@ public:
     std::unique_ptr<Tensor> getTensor(EntryRef ref) const;
     void getTensor(EntryRef ref, vespalib::tensor::MutableDenseTensorView &tensor) const;
     EntryRef setTensor(const Tensor &tensor);
+    // The following method is meant to be used only for unit tests.
+    uint32_t getClusterSize() const { return _bufferType.getClusterSize(); }
 };
 
 }
-- 
cgit v1.2.3