diff options
author | Håvard Pettersen <havardpe@oath.com> | 2019-07-10 13:12:04 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2019-07-10 15:03:09 +0000 |
commit | a9510442554818a2288b9bb7ea1f2d8a584ef6a0 (patch) | |
tree | 45aa8d38114f69ad52cb46ad69163ed412edc782 /searchlib | |
parent | 6a7d07037ce40a637441078afe6951c00d189e7b (diff) |
remove concept of dimension 'bound-ness'
clean up tensor attribute code
Diffstat (limited to 'searchlib')
5 files changed, 55 insertions, 246 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index ba4c64f1744..a2b9f136ed9 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -27,57 +27,30 @@ class TensorReader : public ReaderBase private: static constexpr uint8_t tensorIsNotPresent = 0; static constexpr uint8_t tensorIsPresent = 1; - vespalib::eval::ValueType _tensorType; - uint32_t _numUnboundDims; - size_t _numBoundCells; - std::vector<uint32_t> _unboundDimSizes; public: TensorReader(AttributeVector &attr); ~TensorReader(); - size_t getNumCells(); - const vespalib::eval::ValueType &tensorType() const { return _tensorType; } - const std::vector<uint32_t> &getUnboundDimSizes() const { return _unboundDimSizes; } + bool is_present(); void readTensor(void *buf, size_t len) { _datFile->ReadBuf(buf, len); } }; TensorReader::TensorReader(AttributeVector &attr) - : ReaderBase(attr), - _tensorType(vespalib::eval::ValueType::from_spec(getDatHeader().getTag(tensorTypeTag).asString())), - _numUnboundDims(0), - _numBoundCells(1), - _unboundDimSizes() + : ReaderBase(attr) { - for (const auto & dim : _tensorType.dimensions()) { - if (dim.is_bound()) { - _numBoundCells *= dim.size; - } else { - ++_numUnboundDims; - } - } - _unboundDimSizes.resize(_numUnboundDims); } TensorReader::~TensorReader() = default; -size_t -TensorReader::getNumCells() { +bool +TensorReader::is_present() { unsigned char detect; _datFile->ReadBuf(&detect, sizeof(detect)); if (detect == tensorIsNotPresent) { - return 0u; + return false; } if (detect != tensorIsPresent) { LOG_ABORT("should not be reached"); } - size_t numCells = _numBoundCells; - if (_numUnboundDims != 0) { - _datFile->ReadBuf(&_unboundDimSizes[0], _numUnboundDims * sizeof(uint32_t)); - for (auto i = 0u; i < _numUnboundDims; ++i) { - assert(_unboundDimSizes[i] != 0u); - numCells *= _unboundDimSizes[i]; - // TODO: sanity check numCells - } - } - return numCells; + return true; } } @@ -140,16 +113,12 @@ DenseTensorAttribute::onLoad() assert(getConfig().tensorType().to_spec() == tensorReader.getDatHeader().getTag(tensorTypeTag).asString()); uint32_t numDocs(tensorReader.getDocIdLimit()); - uint32_t cellSize(_denseTensorStore.getCellSize()); _refVector.reset(); _refVector.unsafe_reserve(numDocs); for (uint32_t lid = 0; lid < numDocs; ++lid) { - size_t numCells = tensorReader.getNumCells(); - if (numCells != 0u) { - const auto &unboundDimSizes = tensorReader.getUnboundDimSizes(); - auto raw = _denseTensorStore.allocRawBuffer(numCells, unboundDimSizes); - size_t rawLen = numCells * cellSize; - tensorReader.readTensor(raw.data, rawLen); + if (tensorReader.is_present()) { + auto raw = _denseTensorStore.allocRawBuffer(); + tensorReader.readTensor(raw.data, _denseTensorStore.getBufSize()); _refVector.push_back(raw.ref); } else { _refVector.push_back(EntryRef()); diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute_saver.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute_saver.cpp index fb0554112ef..d78adab81b5 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute_saver.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute_saver.cpp @@ -40,17 +40,15 @@ DenseTensorAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) { std::unique_ptr<BufferWriter> datWriter(saveTarget.datWriter().allocBufferWriter()); - const uint32_t unboundDimSizesSize = _tensorStore.unboundDimSizesSize(); const uint32_t docIdLimit(_refs.size()); const uint32_t cellSize = _tensorStore.getCellSize(); for (uint32_t lid = 0; lid < docIdLimit; ++lid) { if (_refs[lid].valid()) { auto raw = _tensorStore.getRawBuffer(_refs[lid]); datWriter->write(&tensorIsPresent, sizeof(tensorIsPresent)); - size_t numCells = _tensorStore.getNumCells(raw); - size_t rawLen = numCells * cellSize + unboundDimSizesSize; - datWriter->write(static_cast<const char *>(raw) - unboundDimSizesSize, - rawLen); + size_t numCells = _tensorStore.getNumCells(); + size_t rawLen = numCells * cellSize; + datWriter->write(static_cast<const char *>(raw), rawLen); } else { datWriter->write(&tensorIsNotPresent, sizeof(tensorIsNotPresent)); } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp index 11a6839ca59..dc459d7d246 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp @@ -30,32 +30,30 @@ size_t size_of(CellType type) { abort(); } +size_t my_align(size_t size, size_t alignment) { + size += alignment - 1; + return (size - (size % alignment)); +} + } DenseTensorStore::TensorSizeCalc::TensorSizeCalc(const ValueType &type) - : _numBoundCells(1u), - _numUnboundDims(0u), + : _numCells(1u), _cellSize(size_of(type.cell_type())) { - for (const auto & dim : type.dimensions()) { - if (dim.is_bound()) { - _numBoundCells *= dim.size; - } else { - ++_numUnboundDims; - } + for (const auto &dim: type.dimensions()) { + _numCells *= dim.size; } } size_t -DenseTensorStore::TensorSizeCalc::arraySize() const +DenseTensorStore::TensorSizeCalc::alignedSize() const { - size_t tensorSize = (_numBoundCells * _cellSize) + (_numUnboundDims * sizeof(uint32_t)); - return DenseTensorStore::BufferType::align(tensorSize, DENSE_TENSOR_ALIGNMENT); + return my_align(bufSize(), DENSE_TENSOR_ALIGNMENT); } DenseTensorStore::BufferType::BufferType(const TensorSizeCalc &tensorSizeCalc) - : datastore::BufferType<char>(tensorSizeCalc.arraySize(), MIN_BUFFER_ARRAYS, RefType::offsetSize()), - _unboundDimSizesSize(tensorSizeCalc._numUnboundDims * sizeof(uint32_t)) + : datastore::BufferType<char>(tensorSizeCalc.alignedSize(), MIN_BUFFER_ARRAYS, RefType::offsetSize()) {} DenseTensorStore::BufferType::~BufferType() = default; @@ -64,15 +62,7 @@ void DenseTensorStore::BufferType::cleanHold(void *buffer, size_t offset, size_t numElems, CleanContext) { - // Clear both tensor dimension size information and cells. - memset(static_cast<char *>(buffer) + offset - _unboundDimSizesSize, 0, numElems); -} - -size_t -DenseTensorStore::BufferType::getReservedElements(uint32_t bufferId) const -{ - return datastore::BufferType<char>::getReservedElements(bufferId) + - align(_unboundDimSizesSize); + memset(static_cast<char *>(buffer) + offset, 0, numElems); } DenseTensorStore::DenseTensorStore(const ValueType &type) @@ -81,15 +71,12 @@ DenseTensorStore::DenseTensorStore(const ValueType &type) _tensorSizeCalc(type), _bufferType(_tensorSizeCalc), _type(type), - _emptyCells() + _emptySpace() { - _emptyCells.resize(_tensorSizeCalc._numBoundCells, 0.0); + _emptySpace.resize(getBufSize(), 0); _store.addType(&_bufferType); _store.initActiveBuffers(); - if (_tensorSizeCalc._numUnboundDims == 0) { - // In this case each tensor use the same amount of memory and we can re-use previously allocated raw buffers by using free lists. - _store.enableFreeLists(); - } + _store.enableFreeLists(); } DenseTensorStore::~DenseTensorStore() @@ -103,61 +90,32 @@ DenseTensorStore::getRawBuffer(RefType ref) const return _store.getEntryArray<char>(ref, _bufferType.getArraySize()); } - -size_t -DenseTensorStore::getNumCells(const void *buffer) const -{ - const uint32_t *unboundDimSizeEnd = static_cast<const uint32_t *>(buffer); - const uint32_t *unboundDimSizeStart = unboundDimSizeEnd - _tensorSizeCalc._numUnboundDims; - size_t numCells = _tensorSizeCalc._numBoundCells; - for (auto unboundDimSize = unboundDimSizeStart; unboundDimSize != unboundDimSizeEnd; ++unboundDimSize) { - numCells *= *unboundDimSize; - } - return numCells; -} - namespace { -void clearPadAreaAfterBuffer(char *buffer, size_t bufSize, size_t alignedBufSize, uint32_t unboundDimSizesSize) { - size_t padSize = alignedBufSize - unboundDimSizesSize - bufSize; +void clearPadAreaAfterBuffer(char *buffer, size_t bufSize, size_t alignedBufSize) { + size_t padSize = alignedBufSize - bufSize; memset(buffer + bufSize, 0, padSize); } } Handle<char> -DenseTensorStore::allocRawBuffer(size_t numCells) +DenseTensorStore::allocRawBuffer() { - size_t bufSize = numCells * _tensorSizeCalc._cellSize; - size_t alignedBufSize = alignedSize(numCells); + size_t bufSize = getBufSize(); + size_t alignedBufSize = _tensorSizeCalc.alignedSize(); auto result = _concreteStore.freeListRawAllocator<char>(_typeId).alloc(alignedBufSize); - clearPadAreaAfterBuffer(result.data, bufSize, alignedBufSize, unboundDimSizesSize()); + clearPadAreaAfterBuffer(result.data, bufSize, alignedBufSize); return result; } -Handle<char> -DenseTensorStore::allocRawBuffer(size_t numCells, - const std::vector<uint32_t> &unboundDimSizes) -{ - assert(unboundDimSizes.size() == _tensorSizeCalc._numUnboundDims); - auto ret = allocRawBuffer(numCells); - if (_tensorSizeCalc._numUnboundDims > 0) { - memcpy(ret.data - unboundDimSizesSize(), - &unboundDimSizes[0], unboundDimSizesSize()); - } - assert(numCells == getNumCells(ret.data)); - return ret; -} - void DenseTensorStore::holdTensor(EntryRef ref) { if (!ref.valid()) { return; } - const void *buffer = getRawBuffer(ref); - size_t numCells = getNumCells(buffer); - _concreteStore.holdElem(ref, alignedSize(numCells)); + _concreteStore.holdElem(ref, _tensorSizeCalc.alignedSize()); } TensorStore::EntryRef @@ -167,111 +125,32 @@ DenseTensorStore::move(EntryRef ref) return RefType(); } auto oldraw = getRawBuffer(ref); - size_t numCells = getNumCells(oldraw); - auto newraw = allocRawBuffer(numCells); - memcpy(newraw.data - unboundDimSizesSize(), - static_cast<const char *>(oldraw) - unboundDimSizesSize(), - numCells * _tensorSizeCalc._cellSize + unboundDimSizesSize()); - _concreteStore.holdElem(ref, alignedSize(numCells)); + auto newraw = allocRawBuffer(); + memcpy(newraw.data, static_cast<const char *>(oldraw), getBufSize()); + _concreteStore.holdElem(ref, _tensorSizeCalc.alignedSize()); return newraw.ref; } -namespace { - -void makeConcreteType(MutableDenseTensorView &tensor, - const void *buffer, - uint32_t numUnboundDims) -{ - const uint32_t *unboundDimSizeEnd = static_cast<const uint32_t *>(buffer); - const uint32_t *unboundDimSizeBegin = unboundDimSizeEnd - numUnboundDims; - tensor.setUnboundDimensions(unboundDimSizeBegin, unboundDimSizeEnd); -} - -} - std::unique_ptr<Tensor> DenseTensorStore::getTensor(EntryRef ref) const { if (!ref.valid()) { return std::unique_ptr<Tensor>(); } - auto raw = getRawBuffer(ref); - size_t numCells = getNumCells(raw); - vespalib::tensor::TypedCells cells_ref(raw, _type.cell_type(), numCells); - if (_tensorSizeCalc._numUnboundDims == 0) { - return std::make_unique<DenseTensorView>(_type, cells_ref); - } else { - auto result = std::make_unique<MutableDenseTensorView>(_type, cells_ref); - makeConcreteType(*result, raw, _tensorSizeCalc._numUnboundDims); - return result; - } + vespalib::tensor::TypedCells cells_ref(getRawBuffer(ref), _type.cell_type(), getNumCells()); + return std::make_unique<DenseTensorView>(_type, cells_ref); } void DenseTensorStore::getTensor(EntryRef ref, MutableDenseTensorView &tensor) const { if (!ref.valid()) { - vespalib::tensor::TypedCells cells_ref(&_emptyCells[0], _type.cell_type(), _emptyCells.size()); + vespalib::tensor::TypedCells cells_ref(&_emptySpace[0], _type.cell_type(), getNumCells()); tensor.setCells(cells_ref); - if (_tensorSizeCalc._numUnboundDims > 0) { - tensor.setUnboundDimensionsForEmptyTensor(); - } } else { - auto raw = getRawBuffer(ref); - size_t numCells = getNumCells(raw); - vespalib::tensor::TypedCells cells_ref(raw, _type.cell_type(), numCells); + vespalib::tensor::TypedCells cells_ref(getRawBuffer(ref), _type.cell_type(), getNumCells()); tensor.setCells(cells_ref); - if (_tensorSizeCalc._numUnboundDims > 0) { - makeConcreteType(tensor, raw, _tensorSizeCalc._numUnboundDims); - } - } -} - -namespace { - -void -checkMatchingType(const ValueType &lhs, const ValueType &rhs, size_t numCells) -{ - (void) numCells; - size_t checkNumCells = 1u; - auto rhsItr = rhs.dimensions().cbegin(); - auto rhsItrEnd = rhs.dimensions().cend(); - (void) rhsItrEnd; - for (const auto &dim : lhs.dimensions()) { - (void) dim; - assert(rhsItr != rhsItrEnd); - assert(dim.name == rhsItr->name); - assert(rhsItr->is_bound()); - assert(!dim.is_bound() || dim.size == rhsItr->size); - checkNumCells *= rhsItr->size; - ++rhsItr; - } - assert(lhs.cell_type() == rhs.cell_type()); - assert(numCells == checkNumCells); - assert(rhsItr == rhsItrEnd); -} - -void -setDenseTensorUnboundDimSizes(void *buffer, const ValueType &lhs, uint32_t numUnboundDims, const ValueType &rhs) -{ - uint32_t *unboundDimSizeEnd = static_cast<uint32_t *>(buffer); - uint32_t *unboundDimSize = unboundDimSizeEnd - numUnboundDims; - auto rhsItr = rhs.dimensions().cbegin(); - auto rhsItrEnd = rhs.dimensions().cend(); - (void) rhsItrEnd; - for (const auto &dim : lhs.dimensions()) { - assert (rhsItr != rhsItrEnd); - if (!dim.is_bound()) { - assert(unboundDimSize != unboundDimSizeEnd); - *unboundDimSize = rhsItr->size; - ++unboundDimSize; - } - ++rhsItr; } - assert (rhsItr == rhsItrEnd); - assert(unboundDimSize == unboundDimSizeEnd); -} - } template <class TensorType> @@ -279,10 +158,10 @@ TensorStore::EntryRef DenseTensorStore::setDenseTensor(const TensorType &tensor) { size_t numCells = tensor.cellsRef().size; - checkMatchingType(_type, tensor.type(), numCells); - auto raw = allocRawBuffer(numCells); - setDenseTensorUnboundDimSizes(raw.data, _type, _tensorSizeCalc._numUnboundDims, tensor.type()); - memcpy(raw.data, tensor.cellsRef().data, numCells * _tensorSizeCalc._cellSize); + assert(numCells == getNumCells()); + assert(tensor.type() == _type); + auto raw = allocRawBuffer(); + memcpy(raw.data, tensor.cellsRef().data, getBufSize()); return raw.ref; } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h index 6b87bb76b87..bd52709e423 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h @@ -12,16 +12,6 @@ namespace search::tensor { /** * Class for storing dense tensors with known bounds in memory, used * by DenseTensorAttribute. - * - * Tensor dimension size information for unbound dimensions is at - * negative offset to preserve cell array aligment without - * introducing excessive padding, e.g. if tensor store is setup for - * tensors of type tensor(x[]) then a tensor of type tensor(x[3]) will - * use 32 bytes (inclusive 4 bytes padding). - * - * If both start of tensor dimension size information and start of - * tensor cells were to be 32 byte aligned then tensors of type tensor(x[3]) - * would use 64 bytes. */ class DenseTensorStore : public TensorStore { @@ -32,57 +22,45 @@ public: struct TensorSizeCalc { - size_t _numBoundCells; // product of bound dimension sizes - uint32_t _numUnboundDims; - uint32_t _cellSize; // size of a cell (e.g. double => 8) - + size_t _numCells; // product of dimension sizes + uint32_t _cellSize; // size of a cell (e.g. double => 8, float => 4) + TensorSizeCalc(const ValueType &type); - size_t arraySize() const; + size_t bufSize() const { return (_numCells * _cellSize); } + size_t alignedSize() const; }; class BufferType : public datastore::BufferType<char> { using CleanContext = datastore::BufferType<char>::CleanContext; - uint32_t _unboundDimSizesSize; public: BufferType(const TensorSizeCalc &tensorSizeCalc); ~BufferType() override; void cleanHold(void *buffer, size_t offset, size_t numElems, CleanContext cleanCtx) override; - uint32_t unboundDimSizesSize() const { return _unboundDimSizesSize; } - size_t getReservedElements(uint32_t bufferId) const override; - static size_t align(size_t size, size_t alignment) { - size += alignment - 1; - return (size - (size % alignment)); - } - size_t align(size_t size) const { return align(size, _arraySize); } }; private: DataStoreType _concreteStore; TensorSizeCalc _tensorSizeCalc; BufferType _bufferType; ValueType _type; // type of dense tensor - std::vector<double> _emptyCells; + std::vector<char> _emptySpace; size_t unboundCells(const void *buffer) const; template <class TensorType> TensorStore::EntryRef setDenseTensor(const TensorType &tensor); - datastore::Handle<char> allocRawBuffer(size_t numCells); - size_t alignedSize(size_t numCells) const { - return _bufferType.align(numCells * _tensorSizeCalc._cellSize + unboundDimSizesSize()); - } public: DenseTensorStore(const ValueType &type); ~DenseTensorStore() override; const ValueType &type() const { return _type; } - uint32_t unboundDimSizesSize() const { return _bufferType.unboundDimSizesSize(); } - size_t getNumCells(const void *buffer) const; + size_t getNumCells() const { return _tensorSizeCalc._numCells; } uint32_t getCellSize() const { return _tensorSizeCalc._cellSize; } + size_t getBufSize() const { return _tensorSizeCalc.bufSize(); } const void *getRawBuffer(RefType ref) const; - datastore::Handle<char> allocRawBuffer(size_t numCells, const std::vector<uint32_t> &unboundDimSizes); + datastore::Handle<char> allocRawBuffer(); void holdTensor(EntryRef ref) override; EntryRef move(EntryRef ref) override; std::unique_ptr<Tensor> getTensor(EntryRef ref) const; diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp index 7dd666ac74f..89b8e77e136 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp @@ -30,21 +30,6 @@ constexpr uint32_t TENSOR_ATTRIBUTE_VERSION = 0; // minimum dead bytes in tensor attribute before consider compaction constexpr size_t DEAD_SLACK = 0x10000u; - -ValueType -createEmptyTensorType(const ValueType &type) -{ - std::vector<ValueType::Dimension> list; - for (const auto &dim : type.dimensions()) { - if (dim.is_indexed() && !dim.is_bound()) { - list.emplace_back(dim.name, 1); - } else { - list.emplace_back(dim); - } - } - return ValueType::tensor_type(std::move(list)); -} - struct CallMakeEmptyTensor { template <typename CT> static Tensor::UP call(const ValueType &type) { @@ -81,7 +66,7 @@ TensorAttribute::TensorAttribute(vespalib::stringref name, const Config &cfg, Te cfg.getGrowStrategy().getDocsGrowDelta(), getGenerationHolder()), _tensorStore(tensorStore), - _emptyTensor(createEmptyTensor(createEmptyTensorType(cfg.tensorType()))), + _emptyTensor(createEmptyTensor(cfg.tensorType())), _compactGeneration(0) { } |