Merge pull request #21181 from vespa-engine/balder/avoid-templates-in-datasegment

Replace uses of templates by using an interface instead.
author: Henning Baldersheim <balder@yahoo-inc.com> 2022-02-14 18:31:21 +0100
committer: GitHub <noreply@github.com> 2022-02-14 18:31:21 +0100
commit: 4ef3877534bd0db8e1c23b2c05d889bfd53b96ef (patch)
tree: ccfdf7035883d3983dfbd954c015c171cfe2fb0a
parent: fb3bb1af840b9b22240e9563d991d32b6a86bfb2 (diff)
parent: 1a0a23f1fc3f6ad6ebf847fe8c48fb9df3cae6a4 (diff)
13 files changed, 394 insertions, 392 deletions
diff --git a/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt b/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt
index c3a49161f32..985cd9948ad 100644
--- a/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt
+++ b/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt
@@ -24,7 +24,7 @@ vespa_add_library(vespamalloc_mallocd OBJECT
     threadproxy.cpp
     memblockboundscheck.cpp
     memblockboundscheck_d.cpp
-    datasegmentd.cpp
+    datasegment.cpp
     globalpoold.cpp
     threadpoold.cpp
     threadlistd.cpp
@@ -40,7 +40,7 @@ vespa_add_library(vespamalloc_mallocdst16 OBJECT
     threadproxy.cpp
     memblockboundscheck.cpp
     memblockboundscheck_dst.cpp
-    datasegmentdst.cpp
+    datasegment.cpp
     globalpooldst.cpp
     threadpooldst.cpp
     threadlistdst.cpp
@@ -57,7 +57,7 @@ vespa_add_library(vespamalloc_mallocdst16_nl OBJECT
     threadproxy.cpp
     memblockboundscheck.cpp
     memblockboundscheck_dst.cpp
-    datasegmentdst.cpp
+    datasegment.cpp
     globalpooldst.cpp
     threadpooldst.cpp
     threadlistdst.cpp
diff --git a/vespamalloc/src/vespamalloc/malloc/common.h b/vespamalloc/src/vespamalloc/malloc/common.h
index 65a86b89bf6..892df72def4 100644
--- a/vespamalloc/src/vespamalloc/malloc/common.h
+++ b/vespamalloc/src/vespamalloc/malloc/common.h
@@ -67,11 +67,11 @@ class CommonT
 public:
     static constexpr size_t MAX_ALIGN = 0x200000ul;
     enum {MinClassSize = MinClassSizeC};
-    static inline constexpr SizeClassT sizeClass(size_t sz) {
+    static constexpr SizeClassT sizeClass(size_t sz) noexcept {
         SizeClassT tmp(msbIdx(sz - 1) - (MinClassSizeC - 1));
         return (sz <= (1 << MinClassSizeC )) ? 0 : tmp;
     }
-    static inline constexpr size_t classSize(SizeClassT sc) { return (size_t(1) << (sc + MinClassSizeC)); }
+    static constexpr size_t classSize(SizeClassT sc) noexcept { return (size_t(1) << (sc + MinClassSizeC)); }
 };
 
 inline void crash() { *((volatile unsigned *) nullptr) = 0; }
diff --git a/vespamalloc/src/vespamalloc/malloc/datasegment.cpp b/vespamalloc/src/vespamalloc/malloc/datasegment.cpp
index d794fb88419..4c815476dab 100644
--- a/vespamalloc/src/vespamalloc/malloc/datasegment.cpp
+++ b/vespamalloc/src/vespamalloc/malloc/datasegment.cpp
@@ -1,9 +1,332 @@
 // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "datasegment.hpp"
-#include "memblock.h"
+
+#include "datasegment.h"
 
 namespace vespamalloc::segment {
 
-template class DataSegment<MemBlock>;
+DataSegment::~DataSegment() = default;
+
+#define INIT_LOG_LIMIT 0x400000000ul // 16G
+
+DataSegment::DataSegment(const IHelper & helper) :
+    _osMemory(BlockSize),
+    _bigSegmentLogLevel(0),
+    _bigIncrement (0x4000000),
+    _allocs2Show (8),
+    _unmapSize(0x100000),
+    _nextLogLimit(INIT_LOG_LIMIT),
+    _partialExtension(0),
+    _helper(helper),
+    _mutex(),
+    _freeList(_blockList),
+    _unMappedList(_blockList)
+{
+    size_t wanted(0x1000000000ul); //64G
+    void * everything = _osMemory.reserve(wanted);
+    if (everything) {
+        for (BlockIdT i = blockId(everything), m = blockId(everything) + (wanted / BlockSize); i < m; i++) {
+            if (i > BlockCount) {
+                abort();
+            }
+            _blockList[i].sizeClass(UNUSED_BLOCK);
+            _blockList[i].freeChainLength(m-i);
+        }
+        _freeList.add(blockId(everything));
+    }
+    _nextLogLimit = std::max(size_t(end()) + _nextLogLimit, _nextLogLimit);
+}
+
+size_t
+DataSegment::freeSize() const {
+    return _freeList.numFreeBlocks() * BlockSize;
+}
+
+void * DataSegment::getBlock(size_t & oldBlockSize, SizeClassT sc)
+{
+    const size_t minBlockSize = std::max(BlockSize, _osMemory.getMinBlockSize());
+    oldBlockSize = ((oldBlockSize + (minBlockSize-1))/minBlockSize)*minBlockSize;
+    BlockIdT numBlocks((oldBlockSize + (BlockSize - 1)) / BlockSize);
+    size_t blockSize = BlockSize * numBlocks;
+    void * newBlock;
+    {
+        Guard sync(_mutex);
+        newBlock = _freeList.sub(numBlocks);
+        if ( newBlock == nullptr ) {
+            newBlock = _unMappedList.sub(numBlocks);
+            if ( newBlock == nullptr ) {
+                BlockIdT nextBlock = blockId(end());
+                BlockIdT startBlock = _freeList.lastBlock(nextBlock);
+                if (startBlock) {
+                    size_t adjustedBlockSize = blockSize - BlockSize*(nextBlock-startBlock);
+                    newBlock = _osMemory.get(adjustedBlockSize);
+                    if (newBlock != nullptr) {
+                        assert (newBlock == fromBlockId(nextBlock));
+                        _freeList.removeLastBlock();
+                        newBlock = fromBlockId(startBlock);
+                        _partialExtension++;
+                    }
+                } else {
+                    newBlock = _osMemory.get(blockSize);
+                }
+            } else {
+                bool result(_osMemory.reclaim(newBlock, blockSize));
+                assert (result);
+                (void) result;
+            }
+        } else {
+            DEBUG(fprintf(stderr, "Reuse segment %p(%d, %d)\n", newBlock, sc, numBlocks));
+        }
+    }
+    if (newBlock == (void *) -1) {
+        newBlock = nullptr;
+        blockSize = 0;
+    } else if (newBlock == nullptr) {
+        blockSize = 0;
+    } else {
+        assert(blockId(newBlock)+numBlocks < BlockCount);
+        // assumes _osMemory.get will always return a value that does not make
+        // "i" overflow the _blockList array; this will break when hitting the
+        // 2T address space boundary.
+        for (BlockIdT i = blockId(newBlock), m = blockId(newBlock) + numBlocks; i < m; i++) {
+            _blockList[i].sizeClass(sc);
+            _blockList[i].freeChainLength(m-i);
+            _blockList[i].realNumBlocks(m-i);
+        }
+    }
+    oldBlockSize = blockSize;
+    if (newBlock == nullptr) {
+        static int recurse = 0;
+        if (recurse++ == 0) {
+            perror("Failed extending datasegment: ");
+            assert(false);
+        }
+        return nullptr;
+    }
+    checkAndLogBigSegment();
+    return newBlock;
+}
+
+void DataSegment::checkAndLogBigSegment()
+{
+    if (size_t(end()) >= _nextLogLimit) {
+        fprintf(stderr, "Datasegment is growing ! Start:%p - End:%p : nextLogLimit = %lx\n", start(), end(), _nextLogLimit);
+        _nextLogLimit = ((size_t(end()) + _bigIncrement)/_bigIncrement)*_bigIncrement;
+        static int recurse = 0;
+        if (recurse++ == 0) {
+            if (_bigSegmentLogLevel > 0) {
+                _helper.dumpInfo(_bigSegmentLogLevel);
+            }
+        }
+        recurse--;
+    }
+}
+
+void DataSegment::returnBlock(void *ptr)
+{
+    BlockIdT bId(blockId(ptr));
+    SizeClassT sc =  _blockList[bId].sizeClass();
+    size_t bsz = _helper.classSize(sc);
+    if (bsz >= BlockSize) {
+        BlockIdT numBlocks = bsz / BlockSize;
+        if (numBlocks > _blockList[bId].realNumBlocks()) {
+            numBlocks = _blockList[bId].realNumBlocks();
+        }
+        assert(_blockList[bId].freeChainLength() >= numBlocks);
+        if ((_unmapSize < bsz) && _osMemory.release(ptr, numBlocks*BlockSize)) {
+            for(BlockIdT i=0; i < numBlocks; i++) {
+                BlockT & b = _blockList[bId + i];
+                b.sizeClass(UNMAPPED_BLOCK);
+                b.freeChainLength(numBlocks - i);
+            }
+            {
+                Guard sync(_mutex);
+                _unMappedList.add(bId);
+            }
+        } else {
+            for(BlockIdT i=0; i < numBlocks; i++) {
+                BlockT & b = _blockList[bId + i];
+                b.sizeClass(FREE_BLOCK);
+                b.freeChainLength(numBlocks - i);
+            }
+            {
+                Guard sync(_mutex);
+                _freeList.add(bId);
+            }
+        }
+    }
+}
+
+namespace {
+
+std::vector<uint32_t>
+createHistogram(bool allThreads, uint32_t maxThreads) {
+    if (allThreads) {
+        return std::vector<uint32_t>(maxThreads, 0);
+    }
+    return std::vector<uint32_t>();
+}
+
+}
+
+size_t DataSegment::infoThread(FILE * os, int level, uint32_t thread, SizeClassT sct, uint32_t maxThreadId) const
+{
+    using CallGraphLT = CallGraph<StackEntry, 0x10000, Index>;
+    bool allThreads(thread == 0);
+    size_t usedCount(0);
+    size_t checkedCount(0);
+    size_t allocatedCount(0);
+    size_t notAccounted(0);
+    size_t invalidCallStacks(0);
+    std::unique_ptr<CallGraphLT> callGraph = std::make_unique<CallGraphLT>();
+    std::vector<uint32_t> threadHistogram = createHistogram(allThreads, maxThreadId);
+    for (size_t i=0; i <  NELEMS(_blockList); ) {
+        const BlockT & b = _blockList[i];
+        SizeClassT sc = b.sizeClass();
+        if (sc == sct) {
+            size_t sz = _helper.classSize(sc);
+            size_t numB(b.freeChainLength());
+            for(char *m((char *)(fromBlockId(i))), *em((char*)(fromBlockId(i+numB))); (m + sz) <= em; m += sz) {
+                (void) m;
+                (void) em;
+                auto mem = _helper.createMemblockInfo(m);
+                checkedCount++;
+                if (mem->allocated()) {
+                    allocatedCount++;
+                    if (allThreads || (mem->threadId() == thread)) {
+                        usedCount++;
+                        if (mem->threadId() < threadHistogram.size()) {
+                            threadHistogram[mem->threadId()]++;
+                        }
+                        if (usedCount < _allocs2Show) {
+                            mem->info(os, level);
+                        }
+                        if (mem->callStackLen() && mem->callStack()[0].valid()) {
+                            size_t csl(mem->callStackLen());
+                            for (size_t j(0); j < csl; j++) {
+                                if ( ! mem->callStack()[j].valid()) {
+                                    csl = j;
+                                }
+                            }
+                            if ( ! callGraph->addStack(mem->callStack(), csl)) {
+                                notAccounted++;
+                            }
+                        } else {
+                            if (mem->callStackLen()) {
+                                invalidCallStacks++;
+                            }
+                        }
+                    }
+                }
+            }
+            i += numB;
+        } else {
+            i++;
+        }
+    }
+    if (checkedCount == 0) return 0;
+
+    fprintf(os, "\nCallTree SC %d(Checked=%ld, GlobalAlloc=%ld(%ld%%)," "By%sAlloc=%ld(%2.2f%%) NotAccountedDue2FullGraph=%ld InvalidCallStacks=%ld:\n",
+            sct, checkedCount, allocatedCount, allocatedCount*100/checkedCount,
+            allThreads ? "Us" : "Me",
+            usedCount, static_cast<double>(usedCount*100)/checkedCount, notAccounted, invalidCallStacks);
+    if ( ! callGraph->empty()) {
+        Aggregator agg;
+        DumpGraph<typename CallGraphLT::Node> dump(&agg, "{ ", " }");
+        callGraph->traverseDepth(dump);;
+        asciistream ost;
+        ost << agg;
+        fprintf(os, "%s\n", ost.c_str());
+    }
+    if ( !threadHistogram.empty()) {
+        uint32_t nonZeroCount(0);
+        for (uint32_t i(0); i < threadHistogram.size(); i++) {
+            if (threadHistogram[i] > 0) {
+                nonZeroCount++;
+            }
+        }
+        using Pair = std::pair<uint32_t, uint32_t>;
+        std::vector<Pair> orderedHisto;
+        orderedHisto.reserve(nonZeroCount);
+        for (uint32_t i(0); i < threadHistogram.size(); i++) {
+            if (threadHistogram[i] > 0) {
+                orderedHisto.emplace_back(i, threadHistogram[i]);
+            }
+        }
+        std::sort(orderedHisto.begin(), orderedHisto.end(), [](const Pair & a, const Pair & b) { return a.second > b.second;});
+        fprintf(os, "ThreadHistogram SC %d: [", sct);
+
+        bool first(true);
+        for (const Pair & entry : orderedHisto) {
+            if ( !first) {
+                fprintf(os, ", ");
+            }
+            fprintf(os, "{%u, %u}", entry.first, entry.second);
+            first = false;
+        }
+        fprintf(os, " ]");
+    }
+    return usedCount;
+}
+
+void DataSegment::info(FILE * os, size_t level)
+{
+    fprintf(os, "Start at %p, End at %p(%p) size(%ld) partialExtension(%ld) NextLogLimit(%lx) logLevel(%ld)\n",
+            _osMemory.getStart(), _osMemory.getEnd(), sbrk(0), dataSize(), _partialExtension, _nextLogLimit, level);
+    size_t numAllocatedBlocks(0);
+    size_t numFreeBlocks = _freeList.numFreeBlocks();
+    _freeList.info(os);
+    _unMappedList.info(os);
+    if (level >= 1) {
+#ifdef PRINT_ALOT
+        SizeClassT oldSc(-17);
+        size_t oldChainLength(0);
+#endif
+        size_t scTable[32+NUM_ADMIN_CLASSES];
+        memset(scTable, 0, sizeof(scTable));
+        for (size_t i=0; (i < NELEMS(_blockList)) && ((i*BlockSize) < dataSize()); i++) {
+            BlockT & b = _blockList[i];
+#ifdef PRINT_ALOT
+            if ((b.sizeClass() != oldSc)
+                || ((oldChainLength < (b.freeChainLength()+1))
+                    && b.freeChainLength()))
+            {
+                scTable[b.sizeClass()+NUM_ADMIN_CLASSES] += b.freeChainLength();
+                oldSc = b.sizeClass();
+                if (level & 0x2) {
+                    fprintf(os, "Block %d at address %p with chainLength %d "
+                            "freeCount %d sizeClass %d and size %d\n",
+                            i, fromBlockId(i), b.freeChainLength(), b.freeCount(),
+                            b.sizeClass(), classSize(b.sizeClass()));
+                }
+            }
+            oldChainLength = b.freeChainLength();
+#else
+            scTable[b.sizeClass()+NUM_ADMIN_CLASSES]++;
+#endif
+        }
+        size_t numAdminBlocks(0);
+        for(size_t i=0; i < NUM_ADMIN_CLASSES; i++) {
+            if (scTable[i] != 0ul) {
+                numAllocatedBlocks += scTable[i];
+                numAdminBlocks += scTable[i];
+                fprintf(os, "SizeClass %2ld(%s) has %5ld blocks with %10lu bytes\n",
+                        i-NUM_ADMIN_CLASSES, getAdminClassName(i-NUM_ADMIN_CLASSES), scTable[i], scTable[i]*BlockSize);
+            }
+        }
+        for(size_t i=NUM_ADMIN_CLASSES; i < NELEMS(scTable); i++) {
+            if (scTable[i] != 0ul) {
+                numAllocatedBlocks += scTable[i];
+                fprintf(os, "SizeClass %2ld has %5ld blocks with %10lu bytes\n",
+                            i-NUM_ADMIN_CLASSES, scTable[i], scTable[i]*BlockSize);
+            }
+        }
+        size_t total(dataSize()/BlockSize);
+        fprintf(os, "Usage: Total=%ld(100%%), admin=%ld(%ld%%), unused=%ld(%ld%%), allocated=%ld(%ld%%)\n",
+                total*BlockSize,
+                numAdminBlocks*BlockSize, numAdminBlocks*100/total,
+                numFreeBlocks*BlockSize, numFreeBlocks*100/total,
+                (numAllocatedBlocks-numAdminBlocks)*BlockSize, (numAllocatedBlocks-numAdminBlocks)*100/total);
+    }
+}
 
 }
diff --git a/vespamalloc/src/vespamalloc/malloc/datasegment.h b/vespamalloc/src/vespamalloc/malloc/datasegment.h
index ada9b46a241..a9f5e0046db 100644
--- a/vespamalloc/src/vespamalloc/malloc/datasegment.h
+++ b/vespamalloc/src/vespamalloc/malloc/datasegment.h
@@ -8,19 +8,36 @@
 
 namespace vespamalloc::segment {
 
-template<typename MemBlockPtrT>
+class IMemBlockInfo {
+public:
+    virtual ~IMemBlockInfo() = default;
+    virtual bool allocated() const = 0;
+    virtual uint32_t threadId() const = 0;
+    virtual void info(FILE * os, int level) const = 0;
+    virtual uint32_t callStackLen() const = 0;
+    virtual const StackEntry * callStack() const = 0;
+};
+class IHelper {
+public:
+    virtual ~IHelper() = default;
+    virtual size_t classSize(SizeClassT sc) const = 0;
+    virtual void dumpInfo(int level) const = 0;
+    virtual std::unique_ptr<IMemBlockInfo> createMemblockInfo(void * ptr) const = 0;
+};
+
 class DataSegment
 {
 public:
     DataSegment(const DataSegment & rhs) = delete;
     DataSegment & operator = (const DataSegment & rhs) = delete;
-    DataSegment() __attribute__((noinline));
+    explicit DataSegment(const IHelper & helper) __attribute__((noinline));
     ~DataSegment() __attribute__((noinline));
 
     void * getBlock(size_t & oldBlockSize, SizeClassT sc) __attribute__((noinline));
     void returnBlock(void *ptr) __attribute__((noinline));
     SizeClassT sizeClass(const void * ptr)    const { return _blockList[blockId(ptr)].sizeClass(); }
     bool containsPtr(const void * ptr)        const { return blockId(ptr) < BlockCount; }
+    template<typename MemBlockPtrT>
     size_t getMaxSize(const void * ptr)       const { return _blockList[blockId(ptr)].getMaxSize<MemBlockPtrT>(); }
     const void * start()                      const { return _osMemory.getStart(); }
     const void * end()                        const { return _osMemory.getEnd(); }
@@ -30,8 +47,7 @@ public:
     size_t freeSize() const;
     size_t infoThread(FILE * os, int level, uint32_t thread, SizeClassT sct, uint32_t maxThreadId=0) const __attribute__((noinline));
     void info(FILE * os, size_t level) __attribute__((noinline));
-    void setupLog(size_t bigMemLogLevel, size_t bigLimit, size_t bigIncrement, size_t allocs2Show)
-    {
+    void setupLog(size_t bigMemLogLevel, size_t bigLimit, size_t bigIncrement, size_t allocs2Show) {
         _bigSegmentLogLevel = bigMemLogLevel;
         if ((size_t(end()) < _nextLogLimit) || (size_t(end()) < (size_t(start()) + bigLimit))) {
             _nextLogLimit = size_t(start()) + bigLimit;
@@ -48,18 +64,19 @@ private:
 
     typedef BlockT BlockList[BlockCount];
     typedef FreeListT<BlockCount/2> FreeList;
-    OSMemory     _osMemory;
-    size_t       _bigSegmentLogLevel;
-    size_t       _bigIncrement;
-    size_t       _allocs2Show;
-    size_t       _unmapSize;
+    OSMemory        _osMemory;
+    size_t          _bigSegmentLogLevel;
+    size_t          _bigIncrement;
+    size_t          _allocs2Show;
+    size_t          _unmapSize;
+    size_t          _nextLogLimit;
+    size_t          _partialExtension;
+    const IHelper  &_helper;
 
-    size_t       _nextLogLimit;
-    size_t       _partialExtension;
-    Mutex        _mutex;
-    BlockList    _blockList;
-    FreeList     _freeList;
-    FreeList     _unMappedList;
+    Mutex           _mutex;
+    BlockList       _blockList;
+    FreeList        _freeList;
+    FreeList        _unMappedList;
 };
 
 }
diff --git a/vespamalloc/src/vespamalloc/malloc/datasegment.hpp b/vespamalloc/src/vespamalloc/malloc/datasegment.hpp
deleted file mode 100644
index 9c92bb19f7c..00000000000
--- a/vespamalloc/src/vespamalloc/malloc/datasegment.hpp
+++ /dev/null
@@ -1,339 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "datasegment.h"
-
-namespace vespamalloc::segment {
-
-template<typename MemBlockPtrT>
-DataSegment<MemBlockPtrT>::~DataSegment() = default;
-
-#define INIT_LOG_LIMIT 0x400000000ul // 16G
-
-template<typename MemBlockPtrT>
-DataSegment<MemBlockPtrT>::DataSegment() :
-    _osMemory(BlockSize),
-    _bigSegmentLogLevel(0),
-    _bigIncrement (0x4000000),
-    _allocs2Show (8),
-    _unmapSize(0x100000),
-    _nextLogLimit(INIT_LOG_LIMIT),
-    _partialExtension(0),
-    _mutex(),
-    _freeList(_blockList),
-    _unMappedList(_blockList)
-{
-    size_t wanted(0x1000000000ul); //64G
-    void * everything = _osMemory.reserve(wanted);
-    if (everything) {
-        for (BlockIdT i = blockId(everything), m = blockId(everything) + (wanted / BlockSize); i < m; i++) {
-            if (i > BlockCount) {
-                abort();
-            }
-            _blockList[i].sizeClass(UNUSED_BLOCK);
-            _blockList[i].freeChainLength(m-i);
-        }
-        _freeList.add(blockId(everything));
-    }
-    _nextLogLimit = std::max(size_t(end()) + _nextLogLimit, _nextLogLimit);
-}
-
-template<typename MemBlockPtrT>
-size_t
-DataSegment<MemBlockPtrT>::freeSize() const {
-    return _freeList.numFreeBlocks() * BlockSize;
-}
-
-template<typename MemBlockPtrT>
-void * DataSegment<MemBlockPtrT>::getBlock(size_t & oldBlockSize, SizeClassT sc)
-{
-    const size_t minBlockSize = std::max(BlockSize, _osMemory.getMinBlockSize());
-    oldBlockSize = ((oldBlockSize + (minBlockSize-1))/minBlockSize)*minBlockSize;
-    BlockIdT numBlocks((oldBlockSize + (BlockSize - 1)) / BlockSize);
-    size_t blockSize = BlockSize * numBlocks;
-    void * newBlock(nullptr);
-    {
-        Guard sync(_mutex);
-        newBlock = _freeList.sub(numBlocks);
-        if ( newBlock == nullptr ) {
-            newBlock = _unMappedList.sub(numBlocks);
-            if ( newBlock == nullptr ) {
-                BlockIdT nextBlock = blockId(end());
-                BlockIdT startBlock = _freeList.lastBlock(nextBlock);
-                if (startBlock) {
-                    size_t adjustedBlockSize = blockSize - BlockSize*(nextBlock-startBlock);
-                    newBlock = _osMemory.get(adjustedBlockSize);
-                    if (newBlock != nullptr) {
-                        assert (newBlock == fromBlockId(nextBlock));
-                        _freeList.removeLastBlock();
-                        newBlock = fromBlockId(startBlock);
-                        _partialExtension++;
-                    }
-                } else {
-                    newBlock = _osMemory.get(blockSize);
-                }
-            } else {
-                bool result(_osMemory.reclaim(newBlock, blockSize));
-                assert (result);
-                (void) result;
-            }
-        } else {
-            DEBUG(fprintf(stderr, "Reuse segment %p(%d, %d)\n", newBlock, sc, numBlocks));
-        }
-    }
-    if (newBlock == (void *) -1) {
-        newBlock = nullptr;
-        blockSize = 0;
-    } else if (newBlock == nullptr) {
-        blockSize = 0;
-    } else {
-        assert(blockId(newBlock)+numBlocks < BlockCount);
-        // assumes _osMemory.get will always return a value that does not make
-        // "i" overflow the _blockList array; this will break when hitting the
-        // 2T address space boundary.
-        for (BlockIdT i = blockId(newBlock), m = blockId(newBlock) + numBlocks; i < m; i++) {
-            _blockList[i].sizeClass(sc);
-            _blockList[i].freeChainLength(m-i);
-            _blockList[i].realNumBlocks(m-i);
-        }
-    }
-    oldBlockSize = blockSize;
-    if (newBlock == nullptr) {
-        static int recurse = 0;
-        if (recurse++ == 0) {
-            perror("Failed extending datasegment: ");
-            assert(false);
-        }
-        return nullptr;
-    }
-    checkAndLogBigSegment();
-    return newBlock;
-}
-
-template<typename MemBlockPtrT>
-void DataSegment<MemBlockPtrT>::checkAndLogBigSegment()
-{
-    if (size_t(end()) >= _nextLogLimit) {
-        fprintf(stderr, "Datasegment is growing ! Start:%p - End:%p : nextLogLimit = %lx\n", start(), end(), _nextLogLimit);
-        _nextLogLimit = ((size_t(end()) + _bigIncrement)/_bigIncrement)*_bigIncrement;
-        static int recurse = 0;
-        if (recurse++ == 0) {
-            if (_bigSegmentLogLevel > 0) {
-                MemBlockPtrT::dumpInfo(_bigSegmentLogLevel);
-            }
-        }
-        recurse--;
-    }
-}
-
-template<typename MemBlockPtrT>
-void DataSegment<MemBlockPtrT>::returnBlock(void *ptr)
-{
-    BlockIdT bId(blockId(ptr));
-    SizeClassT sc =  _blockList[bId].sizeClass();
-    size_t bsz = MemBlockPtrT::classSize(sc);
-    if (bsz >= BlockSize) {
-        BlockIdT numBlocks = bsz / BlockSize;
-        if (numBlocks > _blockList[bId].realNumBlocks()) {
-            numBlocks = _blockList[bId].realNumBlocks();
-        }
-        assert(_blockList[bId].freeChainLength() >= numBlocks);
-        if ((_unmapSize < bsz) && _osMemory.release(ptr, numBlocks*BlockSize)) {
-            for(BlockIdT i=0; i < numBlocks; i++) {
-                BlockT & b = _blockList[bId + i];
-                b.sizeClass(UNMAPPED_BLOCK);
-                b.freeChainLength(numBlocks - i);
-            }
-            {
-                Guard sync(_mutex);
-                _unMappedList.add(bId);
-            }
-        } else {
-            for(BlockIdT i=0; i < numBlocks; i++) {
-                BlockT & b = _blockList[bId + i];
-                b.sizeClass(FREE_BLOCK);
-                b.freeChainLength(numBlocks - i);
-            }
-            {
-                Guard sync(_mutex);
-                _freeList.add(bId);
-            }
-        }
-    }
-}
-
-namespace {
-
-std::vector<uint32_t>
-createHistogram(bool allThreads, uint32_t maxThreads) {
-    if (allThreads) {
-        return std::vector<uint32_t>(maxThreads, 0);
-    }
-    return std::vector<uint32_t>();
-}
-
-}
-template<typename MemBlockPtrT>
-size_t DataSegment<MemBlockPtrT>::infoThread(FILE * os, int level, uint32_t thread, SizeClassT sct, uint32_t maxThreadId) const
-{
-    using CallGraphLT = CallGraph<typename MemBlockPtrT::Stack, 0x10000, Index>;
-    bool allThreads(thread == 0);
-    size_t usedCount(0);
-    size_t checkedCount(0);
-    size_t allocatedCount(0);
-    size_t notAccounted(0);
-    size_t invalidCallStacks(0);
-    std::unique_ptr<CallGraphLT> callGraph = std::make_unique<CallGraphLT>();
-    std::vector<uint32_t> threadHistogram = createHistogram(allThreads, maxThreadId);
-    for (size_t i=0; i <  NELEMS(_blockList); ) {
-        const BlockT & b = _blockList[i];
-        SizeClassT sc = b.sizeClass();
-        if (sc == sct) {
-            size_t sz(MemBlockPtrT::classSize(sc));
-            size_t numB(b.freeChainLength());
-            for(char *m((char *)(fromBlockId(i))), *em((char*)(fromBlockId(i+numB))); (m + sz) <= em; m += sz) {
-                MemBlockPtrT mem(m,0,false);
-                checkedCount++;
-                if (mem.allocated()) {
-                    allocatedCount++;
-                    if (allThreads || (mem.threadId() == thread)) {
-                        usedCount++;
-                        if (mem.threadId() < threadHistogram.size()) {
-                            threadHistogram[mem.threadId()]++;
-                        }
-                        if (usedCount < _allocs2Show) {
-                            mem.info(os, level);
-                        }
-                        if (mem.callStackLen() && mem.callStack()[0].valid()) {
-                            size_t csl(mem.callStackLen());
-                            for (size_t j(0); j < csl; j++) {
-                                if ( ! mem.callStack()[j].valid()) {
-                                    csl = j;
-                                }
-                            }
-                            if ( ! callGraph->addStack(mem.callStack(), csl)) {
-                                notAccounted++;
-                            }
-                        } else {
-                            if (mem.callStackLen()) {
-                                invalidCallStacks++;
-                            }
-                        }
-                    }
-                }
-            }
-            i += numB;
-        } else {
-            i++;
-        }
-    }
-    if (checkedCount == 0) {
-        return 0;
-    }
-
-    fprintf(os, "\nCallTree SC %d(Checked=%ld, GlobalAlloc=%ld(%ld%%)," "By%sAlloc=%ld(%2.2f%%) NotAccountedDue2FullGraph=%ld InvalidCallStacks=%ld:\n",
-            sct, checkedCount, allocatedCount, checkedCount ? allocatedCount*100/checkedCount : 0,
-            allThreads ? "Us" : "Me",
-            usedCount, checkedCount ? static_cast<double>(usedCount*100)/checkedCount : 0.0, notAccounted, invalidCallStacks);
-    if ( ! callGraph->empty()) {
-        Aggregator agg;
-        DumpGraph<typename CallGraphLT::Node> dump(&agg, "{ ", " }");
-        callGraph->traverseDepth(dump);;
-        asciistream ost;
-        ost << agg;
-        fprintf(os, "%s\n", ost.c_str());
-    }
-    if ( !threadHistogram.empty()) {
-        uint32_t nonZeroCount(0);
-        for (uint32_t i(0); i < threadHistogram.size(); i++) {
-            if (threadHistogram[i] > 0) {
-                nonZeroCount++;
-            }
-        }
-        using Pair = std::pair<uint32_t, uint32_t>;
-        std::vector<Pair> orderedHisto;
-        orderedHisto.reserve(nonZeroCount);
-        for (uint32_t i(0); i < threadHistogram.size(); i++) {
-            if (threadHistogram[i] > 0) {
-                orderedHisto.emplace_back(i, threadHistogram[i]);
-            }
-        }
-        std::sort(orderedHisto.begin(), orderedHisto.end(), [](const Pair & a, const Pair & b) { return a.second > b.second;});
-        fprintf(os, "ThreadHistogram SC %d: [", sct);
-
-        bool first(true);
-        for (const Pair & entry : orderedHisto) {
-            if ( !first) {
-                fprintf(os, ", ");
-            }
-            fprintf(os, "{%u, %u}", entry.first, entry.second);
-            first = false;
-        }
-        fprintf(os, " ]");
-    }
-    return usedCount;
-}
-
-template<typename MemBlockPtrT>
-void DataSegment<MemBlockPtrT>::info(FILE * os, size_t level)
-{
-    fprintf(os, "Start at %p, End at %p(%p) size(%ld) partialExtension(%ld) NextLogLimit(%lx) logLevel(%ld)\n",
-            _osMemory.getStart(), _osMemory.getEnd(), sbrk(0), dataSize(), _partialExtension, _nextLogLimit, level);
-    size_t numAllocatedBlocks(0);
-    size_t numFreeBlocks = _freeList.numFreeBlocks();
-    _freeList.info(os);
-    _unMappedList.info(os);
-    if (level >= 1) {
-#ifdef PRINT_ALOT
-        SizeClassT oldSc(-17);
-        size_t oldChainLength(0);
-#endif
-        size_t scTable[32+NUM_ADMIN_CLASSES];
-        memset(scTable, 0, sizeof(scTable));
-        for (size_t i=0; (i < NELEMS(_blockList)) && ((i*BlockSize) < dataSize()); i++) {
-            BlockT & b = _blockList[i];
-#ifdef PRINT_ALOT
-            if ((b.sizeClass() != oldSc)
-                || ((oldChainLength < (b.freeChainLength()+1))
-                    && b.freeChainLength()))
-            {
-                scTable[b.sizeClass()+NUM_ADMIN_CLASSES] += b.freeChainLength();
-                oldSc = b.sizeClass();
-                if (level & 0x2) {
-                    fprintf(os, "Block %d at address %p with chainLength %d "
-                            "freeCount %d sizeClass %d and size %d\n",
-                            i, fromBlockId(i), b.freeChainLength(), b.freeCount(),
-                            b.sizeClass(), classSize(b.sizeClass()));
-                }
-            }
-            oldChainLength = b.freeChainLength();
-#else
-            scTable[b.sizeClass()+NUM_ADMIN_CLASSES]++;
-#endif
-        }
-        size_t numAdminBlocks(0);
-        for(size_t i=0; i < NUM_ADMIN_CLASSES; i++) {
-            if (scTable[i] != 0ul) {
-                numAllocatedBlocks += scTable[i];
-                numAdminBlocks += scTable[i];
-                fprintf(os, "SizeClass %2ld(%s) has %5ld blocks with %10lu bytes\n",
-                        i-NUM_ADMIN_CLASSES, getAdminClassName(i-NUM_ADMIN_CLASSES), scTable[i], scTable[i]*BlockSize);
-            }
-        }
-        for(size_t i=NUM_ADMIN_CLASSES; i < NELEMS(scTable); i++) {
-            if (scTable[i] != 0ul) {
-                numAllocatedBlocks += scTable[i];
-                fprintf(os, "SizeClass %2ld has %5ld blocks with %10lu bytes\n",
-                            i-NUM_ADMIN_CLASSES, scTable[i], scTable[i]*BlockSize);
-            }
-        }
-        size_t total(dataSize()/BlockSize);
-        fprintf(os, "Usage: Total=%ld(100%%), admin=%ld(%ld%%), unused=%ld(%ld%%), allocated=%ld(%ld%%)\n",
-                total*BlockSize,
-                numAdminBlocks*BlockSize, numAdminBlocks*100/total,
-                numFreeBlocks*BlockSize, numFreeBlocks*100/total,
-                (numAllocatedBlocks-numAdminBlocks)*BlockSize, (numAllocatedBlocks-numAdminBlocks)*100/total);
-    }
-}
-
-}
diff --git a/vespamalloc/src/vespamalloc/malloc/datasegmentd.cpp b/vespamalloc/src/vespamalloc/malloc/datasegmentd.cpp
deleted file mode 100644
index 3842aae6297..00000000000
--- a/vespamalloc/src/vespamalloc/malloc/datasegmentd.cpp
+++ /dev/null
@@ -1,9 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "datasegment.hpp"
-#include "memblockboundscheck_d.h"
-
-namespace vespamalloc::segment {
-
-template class DataSegment<MemBlockBoundsCheck>;
-
-}
diff --git a/vespamalloc/src/vespamalloc/malloc/datasegmentdst.cpp b/vespamalloc/src/vespamalloc/malloc/datasegmentdst.cpp
deleted file mode 100644
index 56504050a64..00000000000
--- a/vespamalloc/src/vespamalloc/malloc/datasegmentdst.cpp
+++ /dev/null
@@ -1,9 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "datasegment.hpp"
-#include "memblockboundscheck_dst.h"
-
-namespace vespamalloc::segment {
-
-template class DataSegment<MemBlockBoundsCheck>;
-
-}
diff --git a/vespamalloc/src/vespamalloc/malloc/globalpool.h b/vespamalloc/src/vespamalloc/malloc/globalpool.h
index ffe15921f3d..807d1498633 100644
--- a/vespamalloc/src/vespamalloc/malloc/globalpool.h
+++ b/vespamalloc/src/vespamalloc/malloc/globalpool.h
@@ -13,7 +13,7 @@ namespace vespamalloc {
 template <typename MemBlockPtrT>
 class AllocPoolT
 {
-    using DataSegment = segment::DataSegment<MemBlockPtrT>;
+    using DataSegment = segment::DataSegment;
 public:
     typedef AFList<MemBlockPtrT> ChunkSList;
     AllocPoolT(DataSegment & ds);
diff --git a/vespamalloc/src/vespamalloc/malloc/malloc.h b/vespamalloc/src/vespamalloc/malloc/malloc.h
index ce9a6e71d78..cc69d7469c3 100644
--- a/vespamalloc/src/vespamalloc/malloc/malloc.h
+++ b/vespamalloc/src/vespamalloc/malloc/malloc.h
@@ -11,10 +11,23 @@
 
 namespace vespamalloc {
 
+template <typename MemBlockPtrT>
+class MemBlockInfoT final : public segment::IMemBlockInfo {
+public:
+    MemBlockInfoT(void *ptr) : _mem(ptr, 0, false) { }
+    bool allocated() const override { return _mem.allocated(); }
+    uint32_t threadId() const override { return _mem.threadId(); }
+    void info(FILE * os, int level) const override { _mem.info(os, level); }
+    uint32_t callStackLen() const override { return _mem.callStackLen(); }
+    const StackEntry * callStack() const override { return _mem.callStack(); }
+private:
+    MemBlockPtrT _mem;
+};
+
 template <typename MemBlockPtrT, typename ThreadListT>
-class MemoryManager : public IAllocator
+class MemoryManager : public IAllocator, public segment::IHelper
 {
-    using DataSegment = segment::DataSegment<MemBlockPtrT>;
+    using DataSegment = segment::DataSegment;
 public:
     MemoryManager(size_t logLimitAtStart);
     ~MemoryManager() override;
@@ -25,6 +38,12 @@ public:
         MemBlockPtrT::Stack::setStopAddress(returnAddressStop);
     }
     size_t getMaxNumThreads() const override { return _threadList.getMaxNumThreads(); }
+    size_t classSize(SizeClassT sc) const override { return MemBlockPtrT::classSize(sc); }
+    void dumpInfo(int level) const override { MemBlockPtrT::dumpInfo(level); }
+    std::unique_ptr<segment::IMemBlockInfo>
+    createMemblockInfo(void * ptr) const override {
+        return std::make_unique<MemBlockInfoT<MemBlockPtrT>>(ptr);
+    }
 
     int mallopt(int param, int value);
     void *malloc(size_t sz);
@@ -54,7 +73,7 @@ public:
     size_t getMinSizeForAlignment(size_t align, size_t sz) const { return MemBlockPtrT::getMinSizeForAlignment(align, sz); }
     size_t sizeClass(const void *ptr) const { return _segment.sizeClass(ptr); }
     size_t usable_size(void *ptr) const {
-        return MemBlockPtrT::usable_size(ptr, _segment.getMaxSize(ptr));
+        return MemBlockPtrT::usable_size(ptr, _segment.getMaxSize<MemBlockPtrT>(ptr));
     }
 
     void *calloc(size_t nelm, size_t esz) {
@@ -95,7 +114,7 @@ template <typename MemBlockPtrT, typename ThreadListT>
 MemoryManager<MemBlockPtrT, ThreadListT>::MemoryManager(size_t logLimitAtStart) :
     IAllocator(),
     _prAllocLimit(logLimitAtStart),
-    _segment(),
+    _segment(*this),
     _allocPool(_segment),
     _mmapPool(),
     _threadList(_allocPool, _mmapPool)
@@ -226,7 +245,7 @@ void * MemoryManager<MemBlockPtrT, ThreadListT>::realloc(void *oldPtr, size_t sz
         }
         SizeClassT sc(_segment.sizeClass(oldPtr));
         if (sc >= 0) {
-            size_t oldSz(_segment.getMaxSize(oldPtr));
+            size_t oldSz(_segment.getMaxSize<MemBlockPtrT>(oldPtr));
             if (sz > oldSz) {
                 ptr = malloc(sz);
                 if (ptr) {
diff --git a/vespamalloc/src/vespamalloc/malloc/memblock.h b/vespamalloc/src/vespamalloc/malloc/memblock.h
index 1d0b0e3da00..f7b5923ecff 100644
--- a/vespamalloc/src/vespamalloc/malloc/memblock.h
+++ b/vespamalloc/src/vespamalloc/malloc/memblock.h
@@ -40,7 +40,7 @@ public:
     bool allocated()    const { return false; }
     uint32_t threadId()      const { return 0; }
     void info(FILE *, unsigned level=0) const  { (void) level; }
-    Stack * callStack()                   { return nullptr; }
+    const Stack * callStack()           const  { return nullptr; }
     size_t callStackLen()           const { return 0; }
     void fillMemory(size_t)               { }
 
diff --git a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h
index d6421b9a7dd..b465a4e834c 100644
--- a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h
+++ b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h
@@ -24,7 +24,7 @@ public:
     bool allocated()                const { return (static_cast<uint32_t*>(_ptr)[3] == ALLOC_MAGIC); }
     size_t size()                   const { return static_cast<const uint32_t *>(_ptr)[0]; }
     size_t alignment()              const { return static_cast<const uint32_t *>(_ptr)[1]; }
-    uint32_t threadId()                  const { return static_cast<uint32_t *>(_ptr)[2]; }
+    uint32_t threadId()             const { return static_cast<uint32_t *>(_ptr)[2]; }
     Stack * callStack()                   { return reinterpret_cast<Stack *>((char *)_ptr + size() + alignment()); }
     const Stack * callStack()       const { return reinterpret_cast<const Stack *>((const char *)_ptr + size() + alignment()); }
     void fillMemory(size_t sz) {
diff --git a/vespamalloc/src/vespamalloc/malloc/threadpool.h b/vespamalloc/src/vespamalloc/malloc/threadpool.h
index 0d7baa85781..f49e6cf24af 100644
--- a/vespamalloc/src/vespamalloc/malloc/threadpool.h
+++ b/vespamalloc/src/vespamalloc/malloc/threadpool.h
@@ -15,7 +15,7 @@ class ThreadPoolT
 public:
     using ChunkSList = AFList<MemBlockPtrT>;
     using AllocPool = AllocPoolT<MemBlockPtrT>;
-    using DataSegment = segment::DataSegment<MemBlockPtrT>;
+    using DataSegment = segment::DataSegment;
     ThreadPoolT();
     ~ThreadPoolT();
     void setPool(AllocPool & allocPool, MMapPool & mmapPool) {
diff --git a/vespamalloc/src/vespamalloc/util/callgraph.h b/vespamalloc/src/vespamalloc/util/callgraph.h
index 5dffe7f6a31..2d66fc8b717 100644
--- a/vespamalloc/src/vespamalloc/util/callgraph.h
+++ b/vespamalloc/src/vespamalloc/util/callgraph.h
@@ -21,7 +21,7 @@ public:
     size_t count()                 const { return _count; }
     void content(const T & v)            { _content = v; }
     template <typename Store>
-    bool addStack(T * stack, size_t nelem, Store & store);
+    bool addStack(const T * stack, size_t nelem, Store & store);
     template<typename Object>
     void traverseDepth(size_t depth, size_t width, Object func);
     template<typename Object>
@@ -38,7 +38,7 @@ private:
 
 template<typename T, typename AddSub>
 template <typename Store>
-bool CallGraphNode<T, AddSub>::addStack(T * stack, size_t nelem, Store & store) {
+bool CallGraphNode<T, AddSub>::addStack(const T * stack, size_t nelem, Store & store) {
     bool retval(false);
     if (nelem == 0) {
         retval = true;
@@ -125,7 +125,7 @@ public:
     {
         checkOrSetRoot(root);
     }
-    bool addStack(Content * stack, size_t nelem) {
+    bool addStack(const Content * stack, size_t nelem) {
         checkOrSetRoot(stack[0]);
         return _root->addStack(stack, nelem, *_nodeStore);
     }
author	Henning Baldersheim <balder@yahoo-inc.com>	2022-02-14 18:31:21 +0100
committer	GitHub <noreply@github.com>	2022-02-14 18:31:21 +0100
commit	4ef3877534bd0db8e1c23b2c05d889bfd53b96ef (patch)
tree	ccfdf7035883d3983dfbd954c015c171cfe2fb0a
parent	fb3bb1af840b9b22240e9563d991d32b6a86bfb2 (diff)
parent	1a0a23f1fc3f6ad6ebf847fe8c48fb9df3cae6a4 (diff)