diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-02-14 13:40:24 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2022-02-14 16:20:46 +0000 |
commit | 2cf04edd1930887a04559864d29fa180aac1ae6b (patch) | |
tree | 1565794819c1d922b561eb6b4a3a3f9dd178d32a /vespamalloc | |
parent | 6e231823aa201df4ef82158d969864974727777b (diff) |
Replace uses of templates by using an interface instead.
None of these uses were necessary for speed.
It just complicated code generation.
Diffstat (limited to 'vespamalloc')
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/CMakeLists.txt | 6 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/common.h | 4 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/datasegment.cpp | 329 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/datasegment.h | 47 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/datasegment.hpp | 339 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/datasegmentd.cpp | 9 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/datasegmentdst.cpp | 9 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/globalpool.h | 2 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/malloc.h | 29 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/memblock.h | 2 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h | 2 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/threadpool.h | 2 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/util/callgraph.h | 6 |
13 files changed, 394 insertions, 392 deletions
diff --git a/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt b/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt index c3a49161f32..985cd9948ad 100644 --- a/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt +++ b/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt @@ -24,7 +24,7 @@ vespa_add_library(vespamalloc_mallocd OBJECT threadproxy.cpp memblockboundscheck.cpp memblockboundscheck_d.cpp - datasegmentd.cpp + datasegment.cpp globalpoold.cpp threadpoold.cpp threadlistd.cpp @@ -40,7 +40,7 @@ vespa_add_library(vespamalloc_mallocdst16 OBJECT threadproxy.cpp memblockboundscheck.cpp memblockboundscheck_dst.cpp - datasegmentdst.cpp + datasegment.cpp globalpooldst.cpp threadpooldst.cpp threadlistdst.cpp @@ -57,7 +57,7 @@ vespa_add_library(vespamalloc_mallocdst16_nl OBJECT threadproxy.cpp memblockboundscheck.cpp memblockboundscheck_dst.cpp - datasegmentdst.cpp + datasegment.cpp globalpooldst.cpp threadpooldst.cpp threadlistdst.cpp diff --git a/vespamalloc/src/vespamalloc/malloc/common.h b/vespamalloc/src/vespamalloc/malloc/common.h index 65a86b89bf6..892df72def4 100644 --- a/vespamalloc/src/vespamalloc/malloc/common.h +++ b/vespamalloc/src/vespamalloc/malloc/common.h @@ -67,11 +67,11 @@ class CommonT public: static constexpr size_t MAX_ALIGN = 0x200000ul; enum {MinClassSize = MinClassSizeC}; - static inline constexpr SizeClassT sizeClass(size_t sz) { + static constexpr SizeClassT sizeClass(size_t sz) noexcept { SizeClassT tmp(msbIdx(sz - 1) - (MinClassSizeC - 1)); return (sz <= (1 << MinClassSizeC )) ? 0 : tmp; } - static inline constexpr size_t classSize(SizeClassT sc) { return (size_t(1) << (sc + MinClassSizeC)); } + static constexpr size_t classSize(SizeClassT sc) noexcept { return (size_t(1) << (sc + MinClassSizeC)); } }; inline void crash() { *((volatile unsigned *) nullptr) = 0; } diff --git a/vespamalloc/src/vespamalloc/malloc/datasegment.cpp b/vespamalloc/src/vespamalloc/malloc/datasegment.cpp index d794fb88419..4c815476dab 100644 --- a/vespamalloc/src/vespamalloc/malloc/datasegment.cpp +++ b/vespamalloc/src/vespamalloc/malloc/datasegment.cpp @@ -1,9 +1,332 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "datasegment.hpp" -#include "memblock.h" + +#include "datasegment.h" namespace vespamalloc::segment { -template class DataSegment<MemBlock>; +DataSegment::~DataSegment() = default; + +#define INIT_LOG_LIMIT 0x400000000ul // 16G + +DataSegment::DataSegment(const IHelper & helper) : + _osMemory(BlockSize), + _bigSegmentLogLevel(0), + _bigIncrement (0x4000000), + _allocs2Show (8), + _unmapSize(0x100000), + _nextLogLimit(INIT_LOG_LIMIT), + _partialExtension(0), + _helper(helper), + _mutex(), + _freeList(_blockList), + _unMappedList(_blockList) +{ + size_t wanted(0x1000000000ul); //64G + void * everything = _osMemory.reserve(wanted); + if (everything) { + for (BlockIdT i = blockId(everything), m = blockId(everything) + (wanted / BlockSize); i < m; i++) { + if (i > BlockCount) { + abort(); + } + _blockList[i].sizeClass(UNUSED_BLOCK); + _blockList[i].freeChainLength(m-i); + } + _freeList.add(blockId(everything)); + } + _nextLogLimit = std::max(size_t(end()) + _nextLogLimit, _nextLogLimit); +} + +size_t +DataSegment::freeSize() const { + return _freeList.numFreeBlocks() * BlockSize; +} + +void * DataSegment::getBlock(size_t & oldBlockSize, SizeClassT sc) +{ + const size_t minBlockSize = std::max(BlockSize, _osMemory.getMinBlockSize()); + oldBlockSize = ((oldBlockSize + (minBlockSize-1))/minBlockSize)*minBlockSize; + BlockIdT numBlocks((oldBlockSize + (BlockSize - 1)) / BlockSize); + size_t blockSize = BlockSize * numBlocks; + void * newBlock; + { + Guard sync(_mutex); + newBlock = _freeList.sub(numBlocks); + if ( newBlock == nullptr ) { + newBlock = _unMappedList.sub(numBlocks); + if ( newBlock == nullptr ) { + BlockIdT nextBlock = blockId(end()); + BlockIdT startBlock = _freeList.lastBlock(nextBlock); + if (startBlock) { + size_t adjustedBlockSize = blockSize - BlockSize*(nextBlock-startBlock); + newBlock = _osMemory.get(adjustedBlockSize); + if (newBlock != nullptr) { + assert (newBlock == fromBlockId(nextBlock)); + _freeList.removeLastBlock(); + newBlock = fromBlockId(startBlock); + _partialExtension++; + } + } else { + newBlock = _osMemory.get(blockSize); + } + } else { + bool result(_osMemory.reclaim(newBlock, blockSize)); + assert (result); + (void) result; + } + } else { + DEBUG(fprintf(stderr, "Reuse segment %p(%d, %d)\n", newBlock, sc, numBlocks)); + } + } + if (newBlock == (void *) -1) { + newBlock = nullptr; + blockSize = 0; + } else if (newBlock == nullptr) { + blockSize = 0; + } else { + assert(blockId(newBlock)+numBlocks < BlockCount); + // assumes _osMemory.get will always return a value that does not make + // "i" overflow the _blockList array; this will break when hitting the + // 2T address space boundary. + for (BlockIdT i = blockId(newBlock), m = blockId(newBlock) + numBlocks; i < m; i++) { + _blockList[i].sizeClass(sc); + _blockList[i].freeChainLength(m-i); + _blockList[i].realNumBlocks(m-i); + } + } + oldBlockSize = blockSize; + if (newBlock == nullptr) { + static int recurse = 0; + if (recurse++ == 0) { + perror("Failed extending datasegment: "); + assert(false); + } + return nullptr; + } + checkAndLogBigSegment(); + return newBlock; +} + +void DataSegment::checkAndLogBigSegment() +{ + if (size_t(end()) >= _nextLogLimit) { + fprintf(stderr, "Datasegment is growing ! Start:%p - End:%p : nextLogLimit = %lx\n", start(), end(), _nextLogLimit); + _nextLogLimit = ((size_t(end()) + _bigIncrement)/_bigIncrement)*_bigIncrement; + static int recurse = 0; + if (recurse++ == 0) { + if (_bigSegmentLogLevel > 0) { + _helper.dumpInfo(_bigSegmentLogLevel); + } + } + recurse--; + } +} + +void DataSegment::returnBlock(void *ptr) +{ + BlockIdT bId(blockId(ptr)); + SizeClassT sc = _blockList[bId].sizeClass(); + size_t bsz = _helper.classSize(sc); + if (bsz >= BlockSize) { + BlockIdT numBlocks = bsz / BlockSize; + if (numBlocks > _blockList[bId].realNumBlocks()) { + numBlocks = _blockList[bId].realNumBlocks(); + } + assert(_blockList[bId].freeChainLength() >= numBlocks); + if ((_unmapSize < bsz) && _osMemory.release(ptr, numBlocks*BlockSize)) { + for(BlockIdT i=0; i < numBlocks; i++) { + BlockT & b = _blockList[bId + i]; + b.sizeClass(UNMAPPED_BLOCK); + b.freeChainLength(numBlocks - i); + } + { + Guard sync(_mutex); + _unMappedList.add(bId); + } + } else { + for(BlockIdT i=0; i < numBlocks; i++) { + BlockT & b = _blockList[bId + i]; + b.sizeClass(FREE_BLOCK); + b.freeChainLength(numBlocks - i); + } + { + Guard sync(_mutex); + _freeList.add(bId); + } + } + } +} + +namespace { + +std::vector<uint32_t> +createHistogram(bool allThreads, uint32_t maxThreads) { + if (allThreads) { + return std::vector<uint32_t>(maxThreads, 0); + } + return std::vector<uint32_t>(); +} + +} + +size_t DataSegment::infoThread(FILE * os, int level, uint32_t thread, SizeClassT sct, uint32_t maxThreadId) const +{ + using CallGraphLT = CallGraph<StackEntry, 0x10000, Index>; + bool allThreads(thread == 0); + size_t usedCount(0); + size_t checkedCount(0); + size_t allocatedCount(0); + size_t notAccounted(0); + size_t invalidCallStacks(0); + std::unique_ptr<CallGraphLT> callGraph = std::make_unique<CallGraphLT>(); + std::vector<uint32_t> threadHistogram = createHistogram(allThreads, maxThreadId); + for (size_t i=0; i < NELEMS(_blockList); ) { + const BlockT & b = _blockList[i]; + SizeClassT sc = b.sizeClass(); + if (sc == sct) { + size_t sz = _helper.classSize(sc); + size_t numB(b.freeChainLength()); + for(char *m((char *)(fromBlockId(i))), *em((char*)(fromBlockId(i+numB))); (m + sz) <= em; m += sz) { + (void) m; + (void) em; + auto mem = _helper.createMemblockInfo(m); + checkedCount++; + if (mem->allocated()) { + allocatedCount++; + if (allThreads || (mem->threadId() == thread)) { + usedCount++; + if (mem->threadId() < threadHistogram.size()) { + threadHistogram[mem->threadId()]++; + } + if (usedCount < _allocs2Show) { + mem->info(os, level); + } + if (mem->callStackLen() && mem->callStack()[0].valid()) { + size_t csl(mem->callStackLen()); + for (size_t j(0); j < csl; j++) { + if ( ! mem->callStack()[j].valid()) { + csl = j; + } + } + if ( ! callGraph->addStack(mem->callStack(), csl)) { + notAccounted++; + } + } else { + if (mem->callStackLen()) { + invalidCallStacks++; + } + } + } + } + } + i += numB; + } else { + i++; + } + } + if (checkedCount == 0) return 0; + + fprintf(os, "\nCallTree SC %d(Checked=%ld, GlobalAlloc=%ld(%ld%%)," "By%sAlloc=%ld(%2.2f%%) NotAccountedDue2FullGraph=%ld InvalidCallStacks=%ld:\n", + sct, checkedCount, allocatedCount, allocatedCount*100/checkedCount, + allThreads ? "Us" : "Me", + usedCount, static_cast<double>(usedCount*100)/checkedCount, notAccounted, invalidCallStacks); + if ( ! callGraph->empty()) { + Aggregator agg; + DumpGraph<typename CallGraphLT::Node> dump(&agg, "{ ", " }"); + callGraph->traverseDepth(dump);; + asciistream ost; + ost << agg; + fprintf(os, "%s\n", ost.c_str()); + } + if ( !threadHistogram.empty()) { + uint32_t nonZeroCount(0); + for (uint32_t i(0); i < threadHistogram.size(); i++) { + if (threadHistogram[i] > 0) { + nonZeroCount++; + } + } + using Pair = std::pair<uint32_t, uint32_t>; + std::vector<Pair> orderedHisto; + orderedHisto.reserve(nonZeroCount); + for (uint32_t i(0); i < threadHistogram.size(); i++) { + if (threadHistogram[i] > 0) { + orderedHisto.emplace_back(i, threadHistogram[i]); + } + } + std::sort(orderedHisto.begin(), orderedHisto.end(), [](const Pair & a, const Pair & b) { return a.second > b.second;}); + fprintf(os, "ThreadHistogram SC %d: [", sct); + + bool first(true); + for (const Pair & entry : orderedHisto) { + if ( !first) { + fprintf(os, ", "); + } + fprintf(os, "{%u, %u}", entry.first, entry.second); + first = false; + } + fprintf(os, " ]"); + } + return usedCount; +} + +void DataSegment::info(FILE * os, size_t level) +{ + fprintf(os, "Start at %p, End at %p(%p) size(%ld) partialExtension(%ld) NextLogLimit(%lx) logLevel(%ld)\n", + _osMemory.getStart(), _osMemory.getEnd(), sbrk(0), dataSize(), _partialExtension, _nextLogLimit, level); + size_t numAllocatedBlocks(0); + size_t numFreeBlocks = _freeList.numFreeBlocks(); + _freeList.info(os); + _unMappedList.info(os); + if (level >= 1) { +#ifdef PRINT_ALOT + SizeClassT oldSc(-17); + size_t oldChainLength(0); +#endif + size_t scTable[32+NUM_ADMIN_CLASSES]; + memset(scTable, 0, sizeof(scTable)); + for (size_t i=0; (i < NELEMS(_blockList)) && ((i*BlockSize) < dataSize()); i++) { + BlockT & b = _blockList[i]; +#ifdef PRINT_ALOT + if ((b.sizeClass() != oldSc) + || ((oldChainLength < (b.freeChainLength()+1)) + && b.freeChainLength())) + { + scTable[b.sizeClass()+NUM_ADMIN_CLASSES] += b.freeChainLength(); + oldSc = b.sizeClass(); + if (level & 0x2) { + fprintf(os, "Block %d at address %p with chainLength %d " + "freeCount %d sizeClass %d and size %d\n", + i, fromBlockId(i), b.freeChainLength(), b.freeCount(), + b.sizeClass(), classSize(b.sizeClass())); + } + } + oldChainLength = b.freeChainLength(); +#else + scTable[b.sizeClass()+NUM_ADMIN_CLASSES]++; +#endif + } + size_t numAdminBlocks(0); + for(size_t i=0; i < NUM_ADMIN_CLASSES; i++) { + if (scTable[i] != 0ul) { + numAllocatedBlocks += scTable[i]; + numAdminBlocks += scTable[i]; + fprintf(os, "SizeClass %2ld(%s) has %5ld blocks with %10lu bytes\n", + i-NUM_ADMIN_CLASSES, getAdminClassName(i-NUM_ADMIN_CLASSES), scTable[i], scTable[i]*BlockSize); + } + } + for(size_t i=NUM_ADMIN_CLASSES; i < NELEMS(scTable); i++) { + if (scTable[i] != 0ul) { + numAllocatedBlocks += scTable[i]; + fprintf(os, "SizeClass %2ld has %5ld blocks with %10lu bytes\n", + i-NUM_ADMIN_CLASSES, scTable[i], scTable[i]*BlockSize); + } + } + size_t total(dataSize()/BlockSize); + fprintf(os, "Usage: Total=%ld(100%%), admin=%ld(%ld%%), unused=%ld(%ld%%), allocated=%ld(%ld%%)\n", + total*BlockSize, + numAdminBlocks*BlockSize, numAdminBlocks*100/total, + numFreeBlocks*BlockSize, numFreeBlocks*100/total, + (numAllocatedBlocks-numAdminBlocks)*BlockSize, (numAllocatedBlocks-numAdminBlocks)*100/total); + } +} } diff --git a/vespamalloc/src/vespamalloc/malloc/datasegment.h b/vespamalloc/src/vespamalloc/malloc/datasegment.h index ada9b46a241..a72d6160324 100644 --- a/vespamalloc/src/vespamalloc/malloc/datasegment.h +++ b/vespamalloc/src/vespamalloc/malloc/datasegment.h @@ -8,19 +8,36 @@ namespace vespamalloc::segment { -template<typename MemBlockPtrT> +class IMemblockInfo { +public: + virtual ~IMemblockInfo() = default; + virtual bool allocated() const = 0; + virtual uint32_t threadId() const = 0; + virtual void info(FILE * os, int level) const = 0; + virtual uint32_t callStackLen() const = 0; + virtual const StackEntry * callStack() const = 0; +}; +class IHelper { +public: + virtual ~IHelper() = default; + virtual size_t classSize(SizeClassT sc) const = 0; + virtual void dumpInfo(int level) const = 0; + virtual std::unique_ptr<IMemblockInfo> createMemblockInfo(void * ptr) const = 0; +}; + class DataSegment { public: DataSegment(const DataSegment & rhs) = delete; DataSegment & operator = (const DataSegment & rhs) = delete; - DataSegment() __attribute__((noinline)); + DataSegment(const IHelper & helper) __attribute__((noinline)); ~DataSegment() __attribute__((noinline)); void * getBlock(size_t & oldBlockSize, SizeClassT sc) __attribute__((noinline)); void returnBlock(void *ptr) __attribute__((noinline)); SizeClassT sizeClass(const void * ptr) const { return _blockList[blockId(ptr)].sizeClass(); } bool containsPtr(const void * ptr) const { return blockId(ptr) < BlockCount; } + template<typename MemBlockPtrT> size_t getMaxSize(const void * ptr) const { return _blockList[blockId(ptr)].getMaxSize<MemBlockPtrT>(); } const void * start() const { return _osMemory.getStart(); } const void * end() const { return _osMemory.getEnd(); } @@ -30,8 +47,7 @@ public: size_t freeSize() const; size_t infoThread(FILE * os, int level, uint32_t thread, SizeClassT sct, uint32_t maxThreadId=0) const __attribute__((noinline)); void info(FILE * os, size_t level) __attribute__((noinline)); - void setupLog(size_t bigMemLogLevel, size_t bigLimit, size_t bigIncrement, size_t allocs2Show) - { + void setupLog(size_t bigMemLogLevel, size_t bigLimit, size_t bigIncrement, size_t allocs2Show) { _bigSegmentLogLevel = bigMemLogLevel; if ((size_t(end()) < _nextLogLimit) || (size_t(end()) < (size_t(start()) + bigLimit))) { _nextLogLimit = size_t(start()) + bigLimit; @@ -48,18 +64,19 @@ private: typedef BlockT BlockList[BlockCount]; typedef FreeListT<BlockCount/2> FreeList; - OSMemory _osMemory; - size_t _bigSegmentLogLevel; - size_t _bigIncrement; - size_t _allocs2Show; - size_t _unmapSize; + OSMemory _osMemory; + size_t _bigSegmentLogLevel; + size_t _bigIncrement; + size_t _allocs2Show; + size_t _unmapSize; + size_t _nextLogLimit; + size_t _partialExtension; + const IHelper &_helper; - size_t _nextLogLimit; - size_t _partialExtension; - Mutex _mutex; - BlockList _blockList; - FreeList _freeList; - FreeList _unMappedList; + Mutex _mutex; + BlockList _blockList; + FreeList _freeList; + FreeList _unMappedList; }; } diff --git a/vespamalloc/src/vespamalloc/malloc/datasegment.hpp b/vespamalloc/src/vespamalloc/malloc/datasegment.hpp deleted file mode 100644 index 9c92bb19f7c..00000000000 --- a/vespamalloc/src/vespamalloc/malloc/datasegment.hpp +++ /dev/null @@ -1,339 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "datasegment.h" - -namespace vespamalloc::segment { - -template<typename MemBlockPtrT> -DataSegment<MemBlockPtrT>::~DataSegment() = default; - -#define INIT_LOG_LIMIT 0x400000000ul // 16G - -template<typename MemBlockPtrT> -DataSegment<MemBlockPtrT>::DataSegment() : - _osMemory(BlockSize), - _bigSegmentLogLevel(0), - _bigIncrement (0x4000000), - _allocs2Show (8), - _unmapSize(0x100000), - _nextLogLimit(INIT_LOG_LIMIT), - _partialExtension(0), - _mutex(), - _freeList(_blockList), - _unMappedList(_blockList) -{ - size_t wanted(0x1000000000ul); //64G - void * everything = _osMemory.reserve(wanted); - if (everything) { - for (BlockIdT i = blockId(everything), m = blockId(everything) + (wanted / BlockSize); i < m; i++) { - if (i > BlockCount) { - abort(); - } - _blockList[i].sizeClass(UNUSED_BLOCK); - _blockList[i].freeChainLength(m-i); - } - _freeList.add(blockId(everything)); - } - _nextLogLimit = std::max(size_t(end()) + _nextLogLimit, _nextLogLimit); -} - -template<typename MemBlockPtrT> -size_t -DataSegment<MemBlockPtrT>::freeSize() const { - return _freeList.numFreeBlocks() * BlockSize; -} - -template<typename MemBlockPtrT> -void * DataSegment<MemBlockPtrT>::getBlock(size_t & oldBlockSize, SizeClassT sc) -{ - const size_t minBlockSize = std::max(BlockSize, _osMemory.getMinBlockSize()); - oldBlockSize = ((oldBlockSize + (minBlockSize-1))/minBlockSize)*minBlockSize; - BlockIdT numBlocks((oldBlockSize + (BlockSize - 1)) / BlockSize); - size_t blockSize = BlockSize * numBlocks; - void * newBlock(nullptr); - { - Guard sync(_mutex); - newBlock = _freeList.sub(numBlocks); - if ( newBlock == nullptr ) { - newBlock = _unMappedList.sub(numBlocks); - if ( newBlock == nullptr ) { - BlockIdT nextBlock = blockId(end()); - BlockIdT startBlock = _freeList.lastBlock(nextBlock); - if (startBlock) { - size_t adjustedBlockSize = blockSize - BlockSize*(nextBlock-startBlock); - newBlock = _osMemory.get(adjustedBlockSize); - if (newBlock != nullptr) { - assert (newBlock == fromBlockId(nextBlock)); - _freeList.removeLastBlock(); - newBlock = fromBlockId(startBlock); - _partialExtension++; - } - } else { - newBlock = _osMemory.get(blockSize); - } - } else { - bool result(_osMemory.reclaim(newBlock, blockSize)); - assert (result); - (void) result; - } - } else { - DEBUG(fprintf(stderr, "Reuse segment %p(%d, %d)\n", newBlock, sc, numBlocks)); - } - } - if (newBlock == (void *) -1) { - newBlock = nullptr; - blockSize = 0; - } else if (newBlock == nullptr) { - blockSize = 0; - } else { - assert(blockId(newBlock)+numBlocks < BlockCount); - // assumes _osMemory.get will always return a value that does not make - // "i" overflow the _blockList array; this will break when hitting the - // 2T address space boundary. - for (BlockIdT i = blockId(newBlock), m = blockId(newBlock) + numBlocks; i < m; i++) { - _blockList[i].sizeClass(sc); - _blockList[i].freeChainLength(m-i); - _blockList[i].realNumBlocks(m-i); - } - } - oldBlockSize = blockSize; - if (newBlock == nullptr) { - static int recurse = 0; - if (recurse++ == 0) { - perror("Failed extending datasegment: "); - assert(false); - } - return nullptr; - } - checkAndLogBigSegment(); - return newBlock; -} - -template<typename MemBlockPtrT> -void DataSegment<MemBlockPtrT>::checkAndLogBigSegment() -{ - if (size_t(end()) >= _nextLogLimit) { - fprintf(stderr, "Datasegment is growing ! Start:%p - End:%p : nextLogLimit = %lx\n", start(), end(), _nextLogLimit); - _nextLogLimit = ((size_t(end()) + _bigIncrement)/_bigIncrement)*_bigIncrement; - static int recurse = 0; - if (recurse++ == 0) { - if (_bigSegmentLogLevel > 0) { - MemBlockPtrT::dumpInfo(_bigSegmentLogLevel); - } - } - recurse--; - } -} - -template<typename MemBlockPtrT> -void DataSegment<MemBlockPtrT>::returnBlock(void *ptr) -{ - BlockIdT bId(blockId(ptr)); - SizeClassT sc = _blockList[bId].sizeClass(); - size_t bsz = MemBlockPtrT::classSize(sc); - if (bsz >= BlockSize) { - BlockIdT numBlocks = bsz / BlockSize; - if (numBlocks > _blockList[bId].realNumBlocks()) { - numBlocks = _blockList[bId].realNumBlocks(); - } - assert(_blockList[bId].freeChainLength() >= numBlocks); - if ((_unmapSize < bsz) && _osMemory.release(ptr, numBlocks*BlockSize)) { - for(BlockIdT i=0; i < numBlocks; i++) { - BlockT & b = _blockList[bId + i]; - b.sizeClass(UNMAPPED_BLOCK); - b.freeChainLength(numBlocks - i); - } - { - Guard sync(_mutex); - _unMappedList.add(bId); - } - } else { - for(BlockIdT i=0; i < numBlocks; i++) { - BlockT & b = _blockList[bId + i]; - b.sizeClass(FREE_BLOCK); - b.freeChainLength(numBlocks - i); - } - { - Guard sync(_mutex); - _freeList.add(bId); - } - } - } -} - -namespace { - -std::vector<uint32_t> -createHistogram(bool allThreads, uint32_t maxThreads) { - if (allThreads) { - return std::vector<uint32_t>(maxThreads, 0); - } - return std::vector<uint32_t>(); -} - -} -template<typename MemBlockPtrT> -size_t DataSegment<MemBlockPtrT>::infoThread(FILE * os, int level, uint32_t thread, SizeClassT sct, uint32_t maxThreadId) const -{ - using CallGraphLT = CallGraph<typename MemBlockPtrT::Stack, 0x10000, Index>; - bool allThreads(thread == 0); - size_t usedCount(0); - size_t checkedCount(0); - size_t allocatedCount(0); - size_t notAccounted(0); - size_t invalidCallStacks(0); - std::unique_ptr<CallGraphLT> callGraph = std::make_unique<CallGraphLT>(); - std::vector<uint32_t> threadHistogram = createHistogram(allThreads, maxThreadId); - for (size_t i=0; i < NELEMS(_blockList); ) { - const BlockT & b = _blockList[i]; - SizeClassT sc = b.sizeClass(); - if (sc == sct) { - size_t sz(MemBlockPtrT::classSize(sc)); - size_t numB(b.freeChainLength()); - for(char *m((char *)(fromBlockId(i))), *em((char*)(fromBlockId(i+numB))); (m + sz) <= em; m += sz) { - MemBlockPtrT mem(m,0,false); - checkedCount++; - if (mem.allocated()) { - allocatedCount++; - if (allThreads || (mem.threadId() == thread)) { - usedCount++; - if (mem.threadId() < threadHistogram.size()) { - threadHistogram[mem.threadId()]++; - } - if (usedCount < _allocs2Show) { - mem.info(os, level); - } - if (mem.callStackLen() && mem.callStack()[0].valid()) { - size_t csl(mem.callStackLen()); - for (size_t j(0); j < csl; j++) { - if ( ! mem.callStack()[j].valid()) { - csl = j; - } - } - if ( ! callGraph->addStack(mem.callStack(), csl)) { - notAccounted++; - } - } else { - if (mem.callStackLen()) { - invalidCallStacks++; - } - } - } - } - } - i += numB; - } else { - i++; - } - } - if (checkedCount == 0) { - return 0; - } - - fprintf(os, "\nCallTree SC %d(Checked=%ld, GlobalAlloc=%ld(%ld%%)," "By%sAlloc=%ld(%2.2f%%) NotAccountedDue2FullGraph=%ld InvalidCallStacks=%ld:\n", - sct, checkedCount, allocatedCount, checkedCount ? allocatedCount*100/checkedCount : 0, - allThreads ? "Us" : "Me", - usedCount, checkedCount ? static_cast<double>(usedCount*100)/checkedCount : 0.0, notAccounted, invalidCallStacks); - if ( ! callGraph->empty()) { - Aggregator agg; - DumpGraph<typename CallGraphLT::Node> dump(&agg, "{ ", " }"); - callGraph->traverseDepth(dump);; - asciistream ost; - ost << agg; - fprintf(os, "%s\n", ost.c_str()); - } - if ( !threadHistogram.empty()) { - uint32_t nonZeroCount(0); - for (uint32_t i(0); i < threadHistogram.size(); i++) { - if (threadHistogram[i] > 0) { - nonZeroCount++; - } - } - using Pair = std::pair<uint32_t, uint32_t>; - std::vector<Pair> orderedHisto; - orderedHisto.reserve(nonZeroCount); - for (uint32_t i(0); i < threadHistogram.size(); i++) { - if (threadHistogram[i] > 0) { - orderedHisto.emplace_back(i, threadHistogram[i]); - } - } - std::sort(orderedHisto.begin(), orderedHisto.end(), [](const Pair & a, const Pair & b) { return a.second > b.second;}); - fprintf(os, "ThreadHistogram SC %d: [", sct); - - bool first(true); - for (const Pair & entry : orderedHisto) { - if ( !first) { - fprintf(os, ", "); - } - fprintf(os, "{%u, %u}", entry.first, entry.second); - first = false; - } - fprintf(os, " ]"); - } - return usedCount; -} - -template<typename MemBlockPtrT> -void DataSegment<MemBlockPtrT>::info(FILE * os, size_t level) -{ - fprintf(os, "Start at %p, End at %p(%p) size(%ld) partialExtension(%ld) NextLogLimit(%lx) logLevel(%ld)\n", - _osMemory.getStart(), _osMemory.getEnd(), sbrk(0), dataSize(), _partialExtension, _nextLogLimit, level); - size_t numAllocatedBlocks(0); - size_t numFreeBlocks = _freeList.numFreeBlocks(); - _freeList.info(os); - _unMappedList.info(os); - if (level >= 1) { -#ifdef PRINT_ALOT - SizeClassT oldSc(-17); - size_t oldChainLength(0); -#endif - size_t scTable[32+NUM_ADMIN_CLASSES]; - memset(scTable, 0, sizeof(scTable)); - for (size_t i=0; (i < NELEMS(_blockList)) && ((i*BlockSize) < dataSize()); i++) { - BlockT & b = _blockList[i]; -#ifdef PRINT_ALOT - if ((b.sizeClass() != oldSc) - || ((oldChainLength < (b.freeChainLength()+1)) - && b.freeChainLength())) - { - scTable[b.sizeClass()+NUM_ADMIN_CLASSES] += b.freeChainLength(); - oldSc = b.sizeClass(); - if (level & 0x2) { - fprintf(os, "Block %d at address %p with chainLength %d " - "freeCount %d sizeClass %d and size %d\n", - i, fromBlockId(i), b.freeChainLength(), b.freeCount(), - b.sizeClass(), classSize(b.sizeClass())); - } - } - oldChainLength = b.freeChainLength(); -#else - scTable[b.sizeClass()+NUM_ADMIN_CLASSES]++; -#endif - } - size_t numAdminBlocks(0); - for(size_t i=0; i < NUM_ADMIN_CLASSES; i++) { - if (scTable[i] != 0ul) { - numAllocatedBlocks += scTable[i]; - numAdminBlocks += scTable[i]; - fprintf(os, "SizeClass %2ld(%s) has %5ld blocks with %10lu bytes\n", - i-NUM_ADMIN_CLASSES, getAdminClassName(i-NUM_ADMIN_CLASSES), scTable[i], scTable[i]*BlockSize); - } - } - for(size_t i=NUM_ADMIN_CLASSES; i < NELEMS(scTable); i++) { - if (scTable[i] != 0ul) { - numAllocatedBlocks += scTable[i]; - fprintf(os, "SizeClass %2ld has %5ld blocks with %10lu bytes\n", - i-NUM_ADMIN_CLASSES, scTable[i], scTable[i]*BlockSize); - } - } - size_t total(dataSize()/BlockSize); - fprintf(os, "Usage: Total=%ld(100%%), admin=%ld(%ld%%), unused=%ld(%ld%%), allocated=%ld(%ld%%)\n", - total*BlockSize, - numAdminBlocks*BlockSize, numAdminBlocks*100/total, - numFreeBlocks*BlockSize, numFreeBlocks*100/total, - (numAllocatedBlocks-numAdminBlocks)*BlockSize, (numAllocatedBlocks-numAdminBlocks)*100/total); - } -} - -} diff --git a/vespamalloc/src/vespamalloc/malloc/datasegmentd.cpp b/vespamalloc/src/vespamalloc/malloc/datasegmentd.cpp deleted file mode 100644 index 3842aae6297..00000000000 --- a/vespamalloc/src/vespamalloc/malloc/datasegmentd.cpp +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "datasegment.hpp" -#include "memblockboundscheck_d.h" - -namespace vespamalloc::segment { - -template class DataSegment<MemBlockBoundsCheck>; - -} diff --git a/vespamalloc/src/vespamalloc/malloc/datasegmentdst.cpp b/vespamalloc/src/vespamalloc/malloc/datasegmentdst.cpp deleted file mode 100644 index 56504050a64..00000000000 --- a/vespamalloc/src/vespamalloc/malloc/datasegmentdst.cpp +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "datasegment.hpp" -#include "memblockboundscheck_dst.h" - -namespace vespamalloc::segment { - -template class DataSegment<MemBlockBoundsCheck>; - -} diff --git a/vespamalloc/src/vespamalloc/malloc/globalpool.h b/vespamalloc/src/vespamalloc/malloc/globalpool.h index ffe15921f3d..807d1498633 100644 --- a/vespamalloc/src/vespamalloc/malloc/globalpool.h +++ b/vespamalloc/src/vespamalloc/malloc/globalpool.h @@ -13,7 +13,7 @@ namespace vespamalloc { template <typename MemBlockPtrT> class AllocPoolT { - using DataSegment = segment::DataSegment<MemBlockPtrT>; + using DataSegment = segment::DataSegment; public: typedef AFList<MemBlockPtrT> ChunkSList; AllocPoolT(DataSegment & ds); diff --git a/vespamalloc/src/vespamalloc/malloc/malloc.h b/vespamalloc/src/vespamalloc/malloc/malloc.h index ce9a6e71d78..5ea9b48b421 100644 --- a/vespamalloc/src/vespamalloc/malloc/malloc.h +++ b/vespamalloc/src/vespamalloc/malloc/malloc.h @@ -11,10 +11,23 @@ namespace vespamalloc { +template <typename MemBlockPtrT> +class MemblockInfoT final : public segment::IMemblockInfo { +public: + MemblockInfoT(void *ptr) : _mem(ptr, 0, false) { } + bool allocated() const override { return _mem.allocated(); } + uint32_t threadId() const override { return _mem.threadId(); } + void info(FILE * os, int level) const override { _mem.info(os, level); } + uint32_t callStackLen() const override { return _mem.callStackLen(); } + const StackEntry * callStack() const override { return _mem.callStack(); } +private: + MemBlockPtrT _mem; +}; + template <typename MemBlockPtrT, typename ThreadListT> -class MemoryManager : public IAllocator +class MemoryManager : public IAllocator, public segment::IHelper { - using DataSegment = segment::DataSegment<MemBlockPtrT>; + using DataSegment = segment::DataSegment; public: MemoryManager(size_t logLimitAtStart); ~MemoryManager() override; @@ -25,6 +38,12 @@ public: MemBlockPtrT::Stack::setStopAddress(returnAddressStop); } size_t getMaxNumThreads() const override { return _threadList.getMaxNumThreads(); } + size_t classSize(SizeClassT sc) const override { return MemBlockPtrT::classSize(sc); } + void dumpInfo(int level) const override { MemBlockPtrT::dumpInfo(level); } + std::unique_ptr<segment::IMemblockInfo> + createMemblockInfo(void * ptr) const override { + return std::make_unique<MemblockInfoT<MemBlockPtrT>>(ptr); + } int mallopt(int param, int value); void *malloc(size_t sz); @@ -54,7 +73,7 @@ public: size_t getMinSizeForAlignment(size_t align, size_t sz) const { return MemBlockPtrT::getMinSizeForAlignment(align, sz); } size_t sizeClass(const void *ptr) const { return _segment.sizeClass(ptr); } size_t usable_size(void *ptr) const { - return MemBlockPtrT::usable_size(ptr, _segment.getMaxSize(ptr)); + return MemBlockPtrT::usable_size(ptr, _segment.getMaxSize<MemBlockPtrT>(ptr)); } void *calloc(size_t nelm, size_t esz) { @@ -95,7 +114,7 @@ template <typename MemBlockPtrT, typename ThreadListT> MemoryManager<MemBlockPtrT, ThreadListT>::MemoryManager(size_t logLimitAtStart) : IAllocator(), _prAllocLimit(logLimitAtStart), - _segment(), + _segment(*this), _allocPool(_segment), _mmapPool(), _threadList(_allocPool, _mmapPool) @@ -226,7 +245,7 @@ void * MemoryManager<MemBlockPtrT, ThreadListT>::realloc(void *oldPtr, size_t sz } SizeClassT sc(_segment.sizeClass(oldPtr)); if (sc >= 0) { - size_t oldSz(_segment.getMaxSize(oldPtr)); + size_t oldSz(_segment.getMaxSize<MemBlockPtrT>(oldPtr)); if (sz > oldSz) { ptr = malloc(sz); if (ptr) { diff --git a/vespamalloc/src/vespamalloc/malloc/memblock.h b/vespamalloc/src/vespamalloc/malloc/memblock.h index 1d0b0e3da00..f7b5923ecff 100644 --- a/vespamalloc/src/vespamalloc/malloc/memblock.h +++ b/vespamalloc/src/vespamalloc/malloc/memblock.h @@ -40,7 +40,7 @@ public: bool allocated() const { return false; } uint32_t threadId() const { return 0; } void info(FILE *, unsigned level=0) const { (void) level; } - Stack * callStack() { return nullptr; } + const Stack * callStack() const { return nullptr; } size_t callStackLen() const { return 0; } void fillMemory(size_t) { } diff --git a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h index d6421b9a7dd..b465a4e834c 100644 --- a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h +++ b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h @@ -24,7 +24,7 @@ public: bool allocated() const { return (static_cast<uint32_t*>(_ptr)[3] == ALLOC_MAGIC); } size_t size() const { return static_cast<const uint32_t *>(_ptr)[0]; } size_t alignment() const { return static_cast<const uint32_t *>(_ptr)[1]; } - uint32_t threadId() const { return static_cast<uint32_t *>(_ptr)[2]; } + uint32_t threadId() const { return static_cast<uint32_t *>(_ptr)[2]; } Stack * callStack() { return reinterpret_cast<Stack *>((char *)_ptr + size() + alignment()); } const Stack * callStack() const { return reinterpret_cast<const Stack *>((const char *)_ptr + size() + alignment()); } void fillMemory(size_t sz) { diff --git a/vespamalloc/src/vespamalloc/malloc/threadpool.h b/vespamalloc/src/vespamalloc/malloc/threadpool.h index 0d7baa85781..f49e6cf24af 100644 --- a/vespamalloc/src/vespamalloc/malloc/threadpool.h +++ b/vespamalloc/src/vespamalloc/malloc/threadpool.h @@ -15,7 +15,7 @@ class ThreadPoolT public: using ChunkSList = AFList<MemBlockPtrT>; using AllocPool = AllocPoolT<MemBlockPtrT>; - using DataSegment = segment::DataSegment<MemBlockPtrT>; + using DataSegment = segment::DataSegment; ThreadPoolT(); ~ThreadPoolT(); void setPool(AllocPool & allocPool, MMapPool & mmapPool) { diff --git a/vespamalloc/src/vespamalloc/util/callgraph.h b/vespamalloc/src/vespamalloc/util/callgraph.h index 5dffe7f6a31..2d66fc8b717 100644 --- a/vespamalloc/src/vespamalloc/util/callgraph.h +++ b/vespamalloc/src/vespamalloc/util/callgraph.h @@ -21,7 +21,7 @@ public: size_t count() const { return _count; } void content(const T & v) { _content = v; } template <typename Store> - bool addStack(T * stack, size_t nelem, Store & store); + bool addStack(const T * stack, size_t nelem, Store & store); template<typename Object> void traverseDepth(size_t depth, size_t width, Object func); template<typename Object> @@ -38,7 +38,7 @@ private: template<typename T, typename AddSub> template <typename Store> -bool CallGraphNode<T, AddSub>::addStack(T * stack, size_t nelem, Store & store) { +bool CallGraphNode<T, AddSub>::addStack(const T * stack, size_t nelem, Store & store) { bool retval(false); if (nelem == 0) { retval = true; @@ -125,7 +125,7 @@ public: { checkOrSetRoot(root); } - bool addStack(Content * stack, size_t nelem) { + bool addStack(const Content * stack, size_t nelem) { checkOrSetRoot(stack[0]); return _root->addStack(stack, nelem, *_nodeStore); } |