123 files changed, 6806 insertions, 0 deletions
diff --git a/vespamalloc/src/.gitignore b/vespamalloc/src/.gitignore
new file mode 100644
index 00000000000..dd21c2da121
--- /dev/null
+++ b/vespamalloc/src/.gitignore
@@ -0,0 +1,4 @@
+Makefile.ini
+config_command.sh
+project.dsw
+vespamalloc.mak
diff --git a/vespamalloc/src/testlist.txt b/vespamalloc/src/testlist.txt
new file mode 100644
index 00000000000..5cd876dde19
--- /dev/null
+++ b/vespamalloc/src/testlist.txt
@@ -0,0 +1,7 @@
+tests/test1
+tests/test2
+tests/allocfree
+tests/doubledelete
+tests/overwrite
+tests/stacktrace
+tests/thread
diff --git a/vespamalloc/src/tests/.gitignore b/vespamalloc/src/tests/.gitignore
new file mode 100644
index 00000000000..fdebd8b7f2c
--- /dev/null
+++ b/vespamalloc/src/tests/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+test
+vespamalloc_test_app
diff --git a/vespamalloc/src/tests/CMakeLists.txt b/vespamalloc/src/tests/CMakeLists.txt
new file mode 100644
index 00000000000..4d566371bd7
--- /dev/null
+++ b/vespamalloc/src/tests/CMakeLists.txt
@@ -0,0 +1,6 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespamalloc_test_app
+    SOURCES
+    test.cpp
+    DEPENDS
+)
diff --git a/vespamalloc/src/tests/allocfree/.gitignore b/vespamalloc/src/tests/allocfree/.gitignore
new file mode 100644
index 00000000000..a80a52d808a
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/.gitignore
@@ -0,0 +1,14 @@
+.depend
+Makefile
+allocfree_shared_test
+allocfree_static_test
+allocfree_static_testd
+allocfree_test
+linklist_test
+realloc_test
+realloc_testd
+/creatingmanythreads_test
+vespamalloc_allocfree_shared_test_app
+vespamalloc_creatingmanythreads_test_app
+vespamalloc_linklist_test_app
+vespamalloc_realloc_test_app
diff --git a/vespamalloc/src/tests/allocfree/CMakeLists.txt b/vespamalloc/src/tests/allocfree/CMakeLists.txt
new file mode 100644
index 00000000000..1dc36f8dec7
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespamalloc_creatingmanythreads_test_app
+    SOURCES
+    creatingmanythreads.cpp
+)
+vespa_add_executable(vespamalloc_allocfree_shared_test_app
+    SOURCES
+    allocfree.cpp
+    producerconsumer.cpp
+)
+vespa_add_test(NAME vespamalloc_allocfree_shared_test_app NO_VALGRIND COMMAND sh allocfree_test.sh BENCHMARK)
+vespa_add_executable(vespamalloc_realloc_test_app
+    SOURCES
+    realloc.cpp
+)
+vespa_add_executable(vespamalloc_linklist_test_app
+    SOURCES
+    linklist.cpp
+    producerconsumer.cpp
+    ../../vespamalloc/malloc/allocchunk.cpp
+    ../../vespamalloc/malloc/common.cpp
+    $<TARGET_OBJECTS:vespamalloc_util>
+)
diff --git a/vespamalloc/src/tests/allocfree/DESC b/vespamalloc/src/tests/allocfree/DESC
new file mode 100644
index 00000000000..4f3ca4d4d97
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/DESC
@@ -0,0 +1 @@
+This is a unittest of vespamalloc.
diff --git a/vespamalloc/src/tests/allocfree/FILES b/vespamalloc/src/tests/allocfree/FILES
new file mode 100644
index 00000000000..4b14c586dd4
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/FILES
@@ -0,0 +1 @@
+testatomic.cpp
diff --git a/vespamalloc/src/tests/allocfree/allocfree.cpp b/vespamalloc/src/tests/allocfree/allocfree.cpp
new file mode 100644
index 00000000000..0f7b4d53c6f
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/allocfree.cpp
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/log/log.h>
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include "producerconsumer.h"
+#include <map>
+
+using vespalib::Consumer;
+using vespalib::Producer;
+using vespalib::ProducerConsumer;
+
+LOG_SETUP("allocfree_test");
+
+TEST_SETUP(Test);
+
+//-----------------------------------------------------------------------------
+
+class FreeWorker : public Consumer {
+public:
+    FreeWorker(uint32_t maxQueue, bool inverse)
+        : Consumer (maxQueue, inverse) {}
+private:
+    virtual void consume(void * p) { free(p); }
+};
+
+//-----------------------------------------------------------------------------
+
+class MallocWorker : public Producer {
+public:
+    MallocWorker(uint32_t size, uint32_t cnt, FreeWorker &target)
+        : Producer(cnt, target), _size(size) {}
+private:
+    uint32_t _size;
+    virtual void * produce()       { return malloc(_size); }
+};
+
+//-----------------------------------------------------------------------------
+
+class MallocFreeWorker : public ProducerConsumer {
+public:
+    MallocFreeWorker(uint32_t size, uint32_t cnt, bool inverse)
+        : ProducerConsumer(cnt, inverse), _size(size) { }
+private:
+    uint32_t _size;
+    virtual void * produce()       { return malloc(_size); }
+    virtual void consume(void * p) { free(p); }
+};
+
+//-----------------------------------------------------------------------------
+
+int Test::Main() {
+    int duration = 10;
+    int numCrossThreadAlloc(2);
+    int numSameThreadAlloc(2);
+    if (_argc > 1) {
+        duration = atoi(_argv[1]);
+    }
+    if (_argc > 2) {
+        numCrossThreadAlloc = atoi(_argv[2]);
+    }
+    if (_argc > 3) {
+        numSameThreadAlloc = atoi(_argv[3]);
+    }
+    TEST_INIT("allocfree_test");
+
+    FastOS_ThreadPool pool(128000);
+
+    std::map<int, std::shared_ptr<FreeWorker> > freeWorkers;
+    std::map<int, std::shared_ptr<MallocWorker> > mallocWorkers;
+    std::map<int, std::shared_ptr<MallocFreeWorker> > mallocFreeWorkers;
+    for (int i(0); i < numCrossThreadAlloc; i++) {
+        freeWorkers[i] = std::shared_ptr<FreeWorker>(new FreeWorker(1024, (i%2) ? true : false));
+        mallocWorkers[i] = std::shared_ptr<MallocWorker>(new MallocWorker(400, 256, *freeWorkers[i]));
+    }
+    for(int i(0); i < numSameThreadAlloc; i++) {
+        mallocFreeWorkers[i] = std::shared_ptr<MallocFreeWorker>(new MallocFreeWorker(200, 16, (i%2) ? true : false));
+    }
+
+
+    for(std::map<int, std::shared_ptr<FreeWorker> >::iterator it(freeWorkers.begin()), mt(freeWorkers.end()); it != mt; it++) {
+        ASSERT_TRUE(pool.NewThread(it->second.get(), NULL) != NULL);
+    }
+    for(std::map<int, std::shared_ptr<MallocWorker> >::iterator it(mallocWorkers.begin()), mt(mallocWorkers.end()); it != mt; it++) {
+        ASSERT_TRUE(pool.NewThread(it->second.get(), NULL) != NULL);
+    }
+    for(std::map<int, std::shared_ptr<MallocFreeWorker> >::iterator it(mallocFreeWorkers.begin()), mt(mallocFreeWorkers.end()); it != mt; it++) {
+        ASSERT_TRUE(pool.NewThread(it->second.get(), NULL) != NULL);
+    }
+
+    for (; duration > 0; --duration) {
+        LOG(info, "%d seconds left...", duration);
+        FastOS_Thread::Sleep(1000);
+    }
+    pool.Close();
+    size_t numFreeOperations(0);
+    size_t numMallocOperations(0);
+    size_t numSameThreadMallocFreeOperations(0);
+    for(std::map<int, std::shared_ptr<FreeWorker> >::iterator it(freeWorkers.begin()), mt(freeWorkers.end()); it != mt; it++) {
+        numFreeOperations += it->second->operations();
+    }
+    for(std::map<int, std::shared_ptr<MallocWorker> >::iterator it(mallocWorkers.begin()), mt(mallocWorkers.end()); it != mt; it++) {
+        numMallocOperations += it->second->operations();
+    }
+    for(std::map<int, std::shared_ptr<MallocFreeWorker> >::iterator it(mallocFreeWorkers.begin()), mt(mallocFreeWorkers.end()); it != mt; it++) {
+        numSameThreadMallocFreeOperations += it->second->operationsConsumed();
+    }
+    EXPECT_EQUAL(numFreeOperations, numMallocOperations);
+    const size_t numCrossThreadMallocFreeOperations(numMallocOperations);
+
+    fprintf(stderr, "Did %" PRIu64 " Cross thread malloc/free operations\n", numCrossThreadMallocFreeOperations);
+    fprintf(stderr, "Did %" PRIu64 " Same thread malloc/free operations\n", numSameThreadMallocFreeOperations);
+    fprintf(stderr, "Did %" PRIu64 " Total operations\n", numCrossThreadMallocFreeOperations + numSameThreadMallocFreeOperations);
+
+    TEST_DONE();
+}
diff --git a/vespamalloc/src/tests/allocfree/allocfree_benchmark.sh b/vespamalloc/src/tests/allocfree/allocfree_benchmark.sh
new file mode 100755
index 00000000000..51165dfce71
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/allocfree_benchmark.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+
+TIME=/usr/bin/time
+
+VESPA_MALLOC_SO=../../../src/vespamalloc/libvespamalloc.so
+LIBDIR=$LIBDIR
+
+$TIME ./vespamalloc_allocfree_shared_test_app 5  1 0
+$TIME ./vespamalloc_allocfree_shared_test_app 5  2 0
+$TIME ./vespamalloc_allocfree_shared_test_app 5  4 0
+$TIME ./vespamalloc_allocfree_shared_test_app 5  8 0
+$TIME ./vespamalloc_allocfree_shared_test_app 5 16 0
+$TIME ./vespamalloc_allocfree_shared_test_app 5 32 0
+$TIME ./vespamalloc_allocfree_shared_test_app 5  0  1
+$TIME ./vespamalloc_allocfree_shared_test_app 5  0  2
+$TIME ./vespamalloc_allocfree_shared_test_app 5  0  4
+$TIME ./vespamalloc_allocfree_shared_test_app 5  0  8
+$TIME ./vespamalloc_allocfree_shared_test_app 5  0 16
+$TIME ./vespamalloc_allocfree_shared_test_app 5  0 32
+$TIME ./vespamalloc_allocfree_shared_test_app 5  1  1
+$TIME ./vespamalloc_allocfree_shared_test_app 5  2  2
+$TIME ./vespamalloc_allocfree_shared_test_app 5  4  4
+$TIME ./vespamalloc_allocfree_shared_test_app 5  8  8
+$TIME ./vespamalloc_allocfree_shared_test_app 5 16 16
+$TIME ./vespamalloc_allocfree_shared_test_app 5 32 32
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  1 0
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  2 0
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  4 0
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  8 0
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5 16 0
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5 32 0
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  0  1
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  0  2
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  0  4
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  0  8
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  0 16
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  0 32
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  1  1
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  2  2
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  4  4
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5  8  8
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5 16 16
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 5 32 32
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1 0
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2 0
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4 0
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8 0
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 0
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 0
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  1
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  2
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  4
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  8
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 16
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 32
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1  1
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2  2
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4  4
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8  8
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 16
+LD_PRELOAD=$LIBDIR/libtcmalloc_minimal.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 32
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1 0
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2 0
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4 0
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8 0
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 0
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 0
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  1
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  2
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  4
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  8
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 16
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 32
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1  1
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2  2
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4  4
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8  8
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 16
+LD_PRELOAD=$LIBDIR/libjemalloc_mt.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 32
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1 0
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2 0
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4 0
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8 0
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 0
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 0
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  1
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  2
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  4
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  8
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 16
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 32
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1  1
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2  2
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4  4
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8  8
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 16
+LD_PRELOAD=$LIBDIR/libptmalloc3.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 32
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1 0
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2 0
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4 0
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8 0
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 0
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 0
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  1
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  2
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  4
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  8
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 16
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 32
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1  1
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2  2
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4  4
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8  8
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 16
+LD_PRELOAD=$LIBDIR/libnedmalloc.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 32
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1 0
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2 0
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4 0
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8 0
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 0
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 0
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  1
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  2
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  4
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  8
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 16
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 32
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1  1
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2  2
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4  4
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8  8
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 16
+LD_PRELOAD=$LIBDIR/libhoard.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 32
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1 0
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2 0
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4 0
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8 0
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 0
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 0
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  1
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  2
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  4
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0  8
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 16
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  0 32
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  1  1
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  2  2
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  4  4
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5  8  8
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5 16 16
+LD_PRELOAD=$LIBDIR/libtlsf.so $TIME ./vespamalloc_allocfree_shared_test_app 5 32 32
diff --git a/vespamalloc/src/tests/allocfree/allocfree_test.sh b/vespamalloc/src/tests/allocfree/allocfree_test.sh
new file mode 100755
index 00000000000..ac864dc891b
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/allocfree_test.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+TIME=/usr/bin/time
+
+VESPA_MALLOC_SO=../../../src/vespamalloc/libvespamalloc.so
+VESPA_MALLOC_SO_D=../../../src/vespamalloc/libvespamalloc_vespamallocd.so
+
+LD_PRELOAD=$VESPA_MALLOC_SO ./vespamalloc_realloc_test_app
+LD_PRELOAD=$VESPA_MALLOC_SO_D ./vespamalloc_realloc_test_app
+$TIME ./vespamalloc_linklist_test_app 3
+LD_PRELOAD=$VESPA_MALLOC_SO $TIME ./vespamalloc_allocfree_shared_test_app 3
+LD_PRELOAD=$VESPA_MALLOC_SO_D $TIME ./vespamalloc_allocfree_shared_test_app 3
+$TIME ./vespamalloc_allocfree_shared_test_app 3
+VESPA_MALLOC_MADVISE_LIMIT=0x200000 LD_PRELOAD=$VESPA_MALLOC_SO_D $TIME ./vespamalloc_allocfree_shared_test_app 3
+LD_PRELOAD=$VESPA_MALLOC_SO_D $TIME ./vespamalloc_allocfree_shared_test_app 3
+VESPA_MALLOC_MADVISE_LIMIT=0x200000 VESPA_MALLOC_HUGEPAGES=on LD_PRELOAD=$VESPA_MALLOC_SO_D $TIME ./vespamalloc_allocfree_shared_test_app 3
+VESPA_MALLOC_HUGEPAGES=on LD_PRELOAD=$VESPA_MALLOC_SO_D $TIME ./vespamalloc_allocfree_shared_test_app 3
diff --git a/vespamalloc/src/tests/allocfree/creatingmanythreads.cpp b/vespamalloc/src/tests/allocfree/creatingmanythreads.cpp
new file mode 100644
index 00000000000..53de3f274cc
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/creatingmanythreads.cpp
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/log/log.h>
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+LOG_SETUP("creatingmanythreads_test");
+
+TEST_SETUP(Test);
+
+void * thread_alloc(void * arg)
+{
+    char * v = new char [*static_cast<int *>(arg)];
+    delete [] v;
+    return NULL;
+}
+
+int Test::Main() {
+    int numThreads(10000);
+    int allocSize(256);
+    if (_argc > 1) {
+        numThreads = atoi(_argv[1]);
+    }
+    if (_argc > 2) {
+        allocSize = atoi(_argv[2]);
+    }
+    TEST_INIT("creatingmanythreads_test");
+
+    LOG(info, "Will create and run %d threads each allocating a single block of memory of %d size\n", numThreads, allocSize);
+    for (int i(0); i < numThreads; ) {
+        for (int j(0); (i < numThreads) && j < 10000; i++, j++) {
+            pthread_t thread;
+            ASSERT_EQUAL(0, pthread_create(&thread, NULL, thread_alloc, &allocSize));
+            ASSERT_EQUAL(0, pthread_join(thread, NULL));
+        }
+        LOG(info, "Completed %d tests", i);
+    }
+
+    TEST_DONE();
+}
diff --git a/vespamalloc/src/tests/allocfree/generate_testtable.sh b/vespamalloc/src/tests/allocfree/generate_testtable.sh
new file mode 100755
index 00000000000..5763024c086
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/generate_testtable.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+base=$1
+
+echo "No threads test. Test difference of static linkage, shared and debug versus glibc"
+cat $base | ./timeusage.sh > t1
+cat $base | grep time | grep allocfree_ | cut -d'5' -f2  | awk '{print $1*2 + $2 ";"}' > t2
+cat $base | grep "Total" | awk '{print $2}' > t3
+paste testnames.all testtype.all t2 t1 t3 > t4
+
+for t in "cross thread" "same thread" "same + cross"
+do
+    echo $t
+
+    for f in "glibc" "vespamallostatic" "vespamalloc" "tcmalloc" "jemalloc" "ptmalloc3" "nedmalloc" "hoard" "tlsf"
+    do
+        grep "$t" t4 | grep "$f" | cut -d';' -f7 | xargs echo $f | sed "s/ /;/g"
+    done
+done
+
+cat t4
+
diff --git a/vespamalloc/src/tests/allocfree/linklist.cpp b/vespamalloc/src/tests/allocfree/linklist.cpp
new file mode 100644
index 00000000000..5ad31d481f9
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/linklist.cpp
@@ -0,0 +1,188 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/log/log.h>
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include "producerconsumer.h"
+#include <vespamalloc/malloc/allocchunk.h>
+#include <vespamalloc/util/callstack.h>
+
+using vespalib::Consumer;
+using vespalib::Producer;
+using vespalib::ProducerConsumer;
+
+LOG_SETUP("linklist_test");
+
+TEST_SETUP(Test);
+
+//-----------------------------------------------------------------------------
+
+template <size_t MinSizeClassC, size_t MaxSizeClassMultiAllocC>
+class MemBlockT : public vespamalloc::CommonT<MinSizeClassC>
+{
+public:
+    typedef vespamalloc::StackEntry<vespamalloc::StackReturnEntry> Stack;
+    enum {
+        MaxSizeClassMultiAlloc = MaxSizeClassMultiAllocC,
+        SizeClassSpan = (MaxSizeClassMultiAllocC-MinSizeClassC)
+    };
+    MemBlockT() : _ptr(NULL) { }
+    MemBlockT(void * p) : _ptr(p) { }
+    MemBlockT(void * p, size_t /*sz*/) : _ptr(p) { }
+    void *ptr()               { return _ptr; }
+    const void *ptr()   const { return _ptr; }
+    bool validAlloc()   const { return _ptr != NULL; }
+    bool validFree()    const { return _ptr != NULL; }
+    void setExact(size_t )    { }
+    void alloc(bool )         { }
+    void threadId(int )       { }
+    void free()               { }
+    size_t size()       const { return 0; }
+    bool allocated()    const { return false; }
+    int threadId()      const { return 0; }
+    void info(FILE *, unsigned level=0) const  { level = 0; }
+    Stack * callStack()                   { return NULL; }
+    size_t callStackLen()           const { return 0; }
+
+    static size_t adjustSize(size_t sz)   { return sz; }
+    static size_t unAdjustSize(size_t sz) { return sz; }
+    static void dumpInfo(size_t level);
+private:
+    void * _ptr;
+};
+
+typedef MemBlockT<5, 20> DummyMemBlock;
+
+typedef vespamalloc::AFList<DummyMemBlock> List;
+
+const size_t NumBlocks((64*(32+2)+16)*2);
+
+List globalList[NumBlocks];
+
+class LinkIn : public Consumer {
+public:
+    LinkIn(List::HeadPtr & list, uint32_t maxQueue, bool inverse);
+private:
+    List::HeadPtr & _head;
+    virtual void consume(void * p) {
+        List * l((List *) p);
+        if ( ! ((l >= &globalList[0]) && (l < &globalList[NumBlocks]))) { abort(); }
+        List::linkIn(_head, l, l);
+    }
+};
+
+LinkIn::LinkIn(List::HeadPtr & list, uint32_t maxQueue, bool inverse) :
+    Consumer (maxQueue, inverse),
+    _head(list)
+{
+}
+
+//-----------------------------------------------------------------------------
+
+class LinkOut : public Producer {
+public:
+    LinkOut(List::HeadPtr & list, uint32_t cnt, LinkIn &target)
+        : Producer(cnt, target), _head(list) {}
+private:
+    List::HeadPtr & _head;
+    virtual void * produce()       {
+        void *p = List::linkOut(_head);
+        List *l((List *)p);
+        if ( ! ((l >= &globalList[0]) && (l < &globalList[NumBlocks]))) { abort(); }
+        return p;
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+class LinkInOutAndIn : public ProducerConsumer {
+public:
+    LinkInOutAndIn(List::HeadPtr & list, uint32_t cnt, bool inverse)
+        : ProducerConsumer(cnt, inverse), _head(list) { }
+private:
+    List::HeadPtr & _head;
+    virtual void * produce()       {
+        void *p = List::linkOut(_head);
+        List *l((List *)p);
+        if ( !((l >= &globalList[0]) && (l < &globalList[NumBlocks]))) { abort(); }
+        return p;
+    }
+    virtual void consume(void * p) {
+        List * l((List *) p);
+        if ( !((l >= &globalList[0]) && (l < &globalList[NumBlocks]))) { abort(); }
+        List::linkIn(_head, l, l);
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+int Test::Main() {
+    int duration = 10;
+    if (_argc > 1) {
+        duration = atoi(_argv[1]);
+    }
+    TEST_INIT("allocfree_test");
+
+    ASSERT_EQUAL(1024ul, sizeof(List));
+
+    FastOS_ThreadPool      pool(128000);
+    List::HeadPtr    sharedList;
+    sharedList._tag = 1;
+    List::init();
+    List::enableThreadSupport();
+    fprintf(stderr, "Start populating list\n");
+    for (size_t i=0; i < NumBlocks; i++) {
+        List * l(&globalList[i]);
+        List::linkIn(sharedList, l, l);
+    }
+    fprintf(stderr, "Finished populating list with %ld elements\n", NumBlocks);
+    fprintf(stderr, "Start verifying result 1.\n");
+    for (size_t i=0; i < NumBlocks; i++) {
+        List *l =  List::linkOut(sharedList);
+        ASSERT_TRUE((l >= &globalList[0]) && (l < &globalList[NumBlocks]));
+    }
+    List *n =  List::linkOut(sharedList);
+    ASSERT_TRUE(n == NULL);
+
+    sharedList._tag = 1;
+    fprintf(stderr, "Start populating list\n");
+    for (size_t i=0; i < NumBlocks; i++) {
+        List * l(&globalList[i]);
+        List::linkIn(sharedList, l, l);
+    }
+    fprintf(stderr, "Finished populating list with %ld elements\n", NumBlocks);
+    LinkIn                c1(sharedList, 64, false);
+    LinkIn                c2(sharedList, 64, true);
+    LinkOut             p1(sharedList, 32, c1);
+    LinkOut             p2(sharedList, 32, c2);
+    LinkInOutAndIn  pc1(sharedList, 16, false);
+    LinkInOutAndIn  pc2(sharedList, 16, true);
+
+    ASSERT_TRUE(pool.NewThread(&c1, NULL) != NULL);
+    ASSERT_TRUE(pool.NewThread(&c2, NULL) != NULL);
+    ASSERT_TRUE(pool.NewThread(&p1, NULL) != NULL);
+    ASSERT_TRUE(pool.NewThread(&p2, NULL) != NULL);
+    ASSERT_TRUE(pool.NewThread(&pc1, NULL) != NULL);
+    ASSERT_TRUE(pool.NewThread(&pc2, NULL) != NULL);
+
+    for (; duration > 0; --duration) {
+        LOG(info, "%d seconds left...", duration);
+        FastOS_Thread::Sleep(1000);
+    }
+    pool.Close();
+    fprintf(stderr, "Did (%" PRIu64 " + %" PRIu64 ") = %" PRIu64 " linkIn operations\n",
+            c1.operations(), c2.operations(), c1.operations() + c2.operations());
+    fprintf(stderr, "Did (%" PRIu64 " + %" PRIu64 ") = %" PRIu64 " linkOut operations\n",
+            p1.operations(), p2.operations(), p1.operations() + p2.operations());
+    fprintf(stderr, "Did (%" PRIu64 " + %" PRIu64 ") = %" PRIu64 " linkInOut operations\n",
+            pc1.operationsConsumed(), pc2.operationsConsumed(), pc1.operationsConsumed() + pc2.operationsConsumed());
+    fprintf(stderr, "Did %" PRIu64 " Total operations\n",
+            c1.operations() + c2.operations() + p1.operations() + p2.operations() + pc1.operationsConsumed() + pc2.operationsConsumed());
+    fprintf(stderr, "Start verifying result 2.\n");
+    for (size_t i=0; i < NumBlocks; i++) {
+        List *l =  List::linkOut(sharedList);
+        ASSERT_TRUE((l >= &globalList[0]) && (l < &globalList[NumBlocks]));
+    }
+    n =  List::linkOut(sharedList);
+    ASSERT_TRUE(n == NULL);
+    TEST_DONE();
+}
diff --git a/vespamalloc/src/tests/allocfree/producerconsumer.cpp b/vespamalloc/src/tests/allocfree/producerconsumer.cpp
new file mode 100644
index 00000000000..38c52993762
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/producerconsumer.cpp
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "producerconsumer.h"
+
+namespace vespalib {
+
+Consumer::Consumer(uint32_t maxQueue, bool inverse) :
+   _queue(NULL, maxQueue),
+   _inverse(inverse),
+   _operations(0)
+{
+}
+
+Consumer::~Consumer()
+{
+}
+
+Producer::Producer(uint32_t cnt, Consumer &target) :
+    _target(target),
+    _cnt(cnt),
+    _operations(0)
+{
+}
+
+Producer::~Producer()
+{
+}
+
+ProducerConsumer::ProducerConsumer(uint32_t cnt, bool inverse) :
+    _cnt(cnt),
+    _inverse(inverse),
+    _operationsConsumed(0),
+    _operationsProduced(0)
+{
+}
+
+ProducerConsumer::~ProducerConsumer()
+{
+}
+
+
+void Consumer::Run(FastOS_ThreadInterface *, void *) {
+    for (;;) {
+        MemList ml = _queue.dequeue();
+        if (ml == NULL) {
+            return;
+        }
+        if (_inverse) {
+            for (uint32_t i = ml->size(); i > 0; --i) {
+                consume((*ml)[i - 1]);
+                _operations++;
+            }
+        } else {
+            for (uint32_t i = 0; i < ml->size(); ++i) {
+                consume((*ml)[i]);
+                _operations++;
+            }
+        }
+        delete ml;
+    }
+}
+
+void Producer::Run(FastOS_ThreadInterface *t, void *) {
+    while (!t->GetBreakFlag()) {
+        MemList ml = new MemListImpl();
+        for (uint32_t i = 0; i < _cnt; ++i) {
+            ml->push_back(produce());
+            _operations++;
+        }
+        _target.enqueue(ml);
+    }
+    _target.close();
+}
+
+void ProducerConsumer::Run(FastOS_ThreadInterface *t, void *) {
+    while (!t->GetBreakFlag()) {
+        MemListImpl ml;
+        for (uint32_t i = 0; i < _cnt; ++i) {
+            ml.push_back(produce());
+            _operationsProduced++;
+        }
+        if (_inverse) {
+            for (uint32_t i = ml.size(); i > 0; --i) {
+                consume(ml[i - 1]);
+                _operationsConsumed++;
+            }
+        } else {
+            for (uint32_t i = 0; i < ml.size(); ++i) {
+                consume(ml[i]);
+                _operationsConsumed++;
+            }
+        }
+    }
+}
+
+}
diff --git a/vespamalloc/src/tests/allocfree/producerconsumer.h b/vespamalloc/src/tests/allocfree/producerconsumer.h
new file mode 100644
index 00000000000..daa0173af98
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/producerconsumer.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vector>
+#include "queue.h"
+
+namespace vespalib {
+
+typedef std::vector<void *> MemListImpl;
+typedef MemListImpl * MemList;
+typedef vespalib::Queue<MemList>      MemQueue;
+
+class Consumer : public FastOS_Runnable {
+private:
+    MemQueue _queue;
+    bool     _inverse;
+    uint64_t _operations;
+    virtual void consume(void *) = 0;
+public:
+    Consumer(uint32_t maxQueue, bool inverse);
+    virtual ~Consumer();
+    void enqueue(const MemList &mem) { _queue.enqueue(mem); }
+    void close() { _queue.close(); }
+    void Run(FastOS_ThreadInterface *t, void *);
+    uint64_t operations() const { return _operations; }
+};
+
+class Producer : public FastOS_Runnable {
+private:
+    Consumer & _target;
+    uint32_t   _cnt;
+    uint64_t   _operations;
+    virtual void * produce() = 0;
+public:
+    Producer(uint32_t cnt, Consumer &target);
+    virtual ~Producer();
+    void Run(FastOS_ThreadInterface *t, void *);
+    uint64_t operations() const { return _operations; }
+};
+
+class ProducerConsumer : public FastOS_Runnable {
+private:
+    uint32_t _cnt;
+    bool     _inverse;
+    uint64_t _operationsConsumed;
+    uint64_t _operationsProduced;
+    virtual void * produce() = 0;
+    virtual void consume(void *) = 0;
+public:
+    ProducerConsumer(uint32_t cnt, bool inverse);
+    virtual ~ProducerConsumer();
+    void Run(FastOS_ThreadInterface *t, void *);
+    uint64_t operationsConsumed() const { return _operationsConsumed; }
+    uint64_t operationsProduced() const { return _operationsProduced; }
+};
+
+}
+
diff --git a/vespamalloc/src/tests/allocfree/queue.h b/vespamalloc/src/tests/allocfree/queue.h
new file mode 100644
index 00000000000..ce5c18d33f2
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/queue.h
@@ -0,0 +1,86 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/guard.h>
+#include <vespa/vespalib/util/sync.h>
+#include <queue>
+
+namespace vespalib {
+
+template <typename T>
+class Queue {
+private:
+    std::queue<T> _q;
+    Monitor       _cond;
+    int           _waitRead;
+    int           _waitWrite;
+    uint32_t      _maxSize;
+    bool          _closed;
+    T             _nil;
+    Queue(const Queue &);
+    Queue &operator=(const Queue &);
+public:
+    Queue(const T &nil, uint32_t maxSize);
+    ~Queue();
+    void enqueue(const T &entry);
+    void close();
+    T dequeue();
+};
+
+template <typename T>
+Queue<T>::Queue(const T &nil, uint32_t maxSize) :
+    _q(),
+    _cond(),
+    _waitRead(0),
+    _waitWrite(0),
+    _maxSize(maxSize),
+    _closed(false),
+    _nil(nil)
+{
+}
+
+template <typename T>
+Queue<T>::~Queue()
+{
+}
+
+template <typename T>
+void Queue<T>::enqueue(const T &entry) {
+    MonitorGuard guard(_cond);
+    while (_q.size() >= _maxSize) {
+        CounterGuard cntGuard(_waitWrite);
+        guard.wait();
+    }
+    _q.push(entry);
+    if (_waitRead > 0) {
+        guard.signal();
+    }
+}
+template <typename T>
+void Queue<T>::close() {
+    MonitorGuard guard(_cond);
+    _closed = true;
+    if (_waitRead > 0) {
+        guard.signal();
+    }
+}
+template <typename T>
+T Queue<T>::dequeue() {
+    MonitorGuard guard(_cond);
+    while (_q.empty() && !_closed) {
+        CounterGuard cntGuard(_waitRead);
+        guard.wait();
+    }
+    if (_q.empty()) {
+        return _nil;
+    }
+    T tmp = _q.front();
+    _q.pop();
+    if (_waitWrite > 0) {
+        guard.signal();
+    }
+    return tmp;
+}
+
+}
+
diff --git a/vespamalloc/src/tests/allocfree/realloc.cpp b/vespamalloc/src/tests/allocfree/realloc.cpp
new file mode 100644
index 00000000000..8cfd50d0132
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/realloc.cpp
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/log/log.h>
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+LOG_SETUP("realloc_test");
+
+TEST_SETUP(Test);
+
+int Test::Main() {
+    char * v = static_cast<char *>(malloc(0x400001));
+    char * nv = static_cast<char *>(realloc(v, 0x500001));
+    ASSERT_TRUE(v == nv);
+    v = static_cast<char *>(realloc(nv, 0x600001));
+    ASSERT_TRUE(v != nv);
+    free(v);
+
+    char *t = static_cast<char *>(malloc(70));
+    free (t+7);
+    t = static_cast<char *>(malloc(0x400001));
+    free (t+7);
+    return 0;
+}
diff --git a/vespamalloc/src/tests/allocfree/testnames.all b/vespamalloc/src/tests/allocfree/testnames.all
new file mode 100644
index 00000000000..eb51eeefa31
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/testnames.all
@@ -0,0 +1,162 @@
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+glibc;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamallostatic;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+vespamalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+tcmalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+jemalloc;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+ptmalloc3;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+nedmalloc;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+hoard;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
+tlsf;
diff --git a/vespamalloc/src/tests/allocfree/testtype.all b/vespamalloc/src/tests/allocfree/testtype.all
new file mode 100644
index 00000000000..a70eb05bde7
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/testtype.all
@@ -0,0 +1,162 @@
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+cross thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same thread;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
+same + cross;
diff --git a/vespamalloc/src/tests/allocfree/timeusage.sh b/vespamalloc/src/tests/allocfree/timeusage.sh
new file mode 100755
index 00000000000..58fdc47104a
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/timeusage.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+grep "CPU" |grep "elapsed" | sed "s/user / user /g" | sed "s/system / system /g" | sed "s/elapsed / elapsed /g" | sed "s/CPU / CPU /g" | awk '{print $1 ";\t" $3 ";\t" $7 ";"}'
diff --git a/vespamalloc/src/tests/allocfree/vespamalloc.conf b/vespamalloc/src/tests/allocfree/vespamalloc.conf
new file mode 100644
index 00000000000..5c82d46d94b
--- /dev/null
+++ b/vespamalloc/src/tests/allocfree/vespamalloc.conf
@@ -0,0 +1,13 @@
+#Config file for vespa malloc
+#loglevel = 0 should mean no logging. Only level 1 is implemented.
+# logfile vespamalloc.log           # default(stderr) This is the file to where log is written (stderr, stdout, filename)
+sigprof_loglevel        2           # default(0) Loglevel used at SIGPROF signal.
+atend_loglevel          2           # default(1) Loglevel used when application stops.
+atnomem_loglevel        2           # default(1) Loglevel used when datasegment is exhausted.
+atdoubledelete_loglevel 2           # default(1) Loglevel used when vespa_malloc discovers a double delete.
+atinvalid_loglevel      2           # default(1) Loglevel used when vespa_malloc discovers logical error.
+bigsegment_loglevel     0           # default(1) Loglevel used when datasegment passes a boundary.
+bigsegment_limit        0x80000000  # default(0x20000000) First level the datasegment must reach before logging is started
+bigsegment_increment    0x10000000  # default(0x4000000) At what increment it will log next time.
+bigblocklimit           0x800000    # default(0x800000) Limit for when to log new/deletes wuth stack trace. Only mallocdst.so
+allocs2show             8
diff --git a/vespamalloc/src/tests/doubledelete/.gitignore b/vespamalloc/src/tests/doubledelete/.gitignore
new file mode 100644
index 00000000000..0ac32be10fc
--- /dev/null
+++ b/vespamalloc/src/tests/doubledelete/.gitignore
@@ -0,0 +1,7 @@
+.depend
+Makefile
+doubledelete_test
+doubledelete_testd
+expectsignal
+vespamalloc_doubledelete_test_app
+vespamalloc_expectsignal_app
diff --git a/vespamalloc/src/tests/doubledelete/CMakeLists.txt b/vespamalloc/src/tests/doubledelete/CMakeLists.txt
new file mode 100644
index 00000000000..e38f163148a
--- /dev/null
+++ b/vespamalloc/src/tests/doubledelete/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespamalloc_doubledelete_test_app
+    SOURCES
+    doubledelete.cpp
+    DEPENDS
+)
+vespa_add_test(NAME vespamalloc_doubledelete_test_app NO_VALGRIND COMMAND sh doubledelete_test.sh)
+vespa_add_executable(vespamalloc_expectsignal_app
+    SOURCES
+    expectsignal.cpp
+    DEPENDS
+)
diff --git a/vespamalloc/src/tests/doubledelete/DESC b/vespamalloc/src/tests/doubledelete/DESC
new file mode 100644
index 00000000000..004492d6b82
--- /dev/null
+++ b/vespamalloc/src/tests/doubledelete/DESC
@@ -0,0 +1 @@
+Test that double delete is detected by vespamallocdxxxx.
diff --git a/vespamalloc/src/tests/doubledelete/FILES b/vespamalloc/src/tests/doubledelete/FILES
new file mode 100644
index 00000000000..3beebbcb132
--- /dev/null
+++ b/vespamalloc/src/tests/doubledelete/FILES
@@ -0,0 +1 @@
+doubledelete.cpp
diff --git a/vespamalloc/src/tests/doubledelete/doubledelete.cpp b/vespamalloc/src/tests/doubledelete/doubledelete.cpp
new file mode 100644
index 00000000000..954e2d90bd2
--- /dev/null
+++ b/vespamalloc/src/tests/doubledelete/doubledelete.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <stdlib.h>
+
+int main(int argc, char *argv[])
+{
+    (void) argc;
+    (void) argv;
+    char * a = new char [100];
+    delete a;
+    delete a;
+}
diff --git a/vespamalloc/src/tests/doubledelete/doubledelete_test.sh b/vespamalloc/src/tests/doubledelete/doubledelete_test.sh
new file mode 100755
index 00000000000..a43b5e3a406
--- /dev/null
+++ b/vespamalloc/src/tests/doubledelete/doubledelete_test.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+LD_PRELOAD=../../../src/vespamalloc/libvespamalloc.so ./vespamalloc_doubledelete_test_app
+
+ulimit -c 0 
+./vespamalloc_expectsignal_app 6 "LD_PRELOAD=../../../src/vespamalloc/libvespamalloc_vespamallocd.so ./vespamalloc_doubledelete_test_app"
diff --git a/vespamalloc/src/tests/doubledelete/expectsignal.cpp b/vespamalloc/src/tests/doubledelete/expectsignal.cpp
new file mode 100644
index 00000000000..0b2d5e154c4
--- /dev/null
+++ b/vespamalloc/src/tests/doubledelete/expectsignal.cpp
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/log/log.h>
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/slaveproc.h>
+
+LOG_SETUP("expectsignal_test");
+
+using namespace vespalib;
+
+class Test : public TestApp
+{
+public:
+    int Main();
+private:
+    virtual bool useProcessStarter() const { return true; }
+};
+
+int Test::Main()
+{
+    TEST_INIT("expectsignal_test");
+
+    EXPECT_EQUAL(_argc, 3);
+    ASSERT_TRUE(_argc == 3);
+
+    int retval = strtol(_argv[1], NULL, 0);
+
+    fprintf(stderr, "argc=%d : Running '%s' expecting signal %d\n", _argc, _argv[2], retval);
+
+    SlaveProc cmd(_argv[2]);
+    for(std::string line; cmd.readLine(line, 60000);) {
+        fprintf(stdout, "%s\n", line.c_str());
+    }
+
+    ASSERT_TRUE(cmd.wait(60000));
+
+    int exitCode = cmd.getExitCode();
+
+    if (exitCode == 65535) {
+        fprintf(stderr, "[ERROR] child killed (timeout)\n");
+    } else if (WIFEXITED(exitCode)) {
+        fprintf(stderr, "child terminated normally with exit code %u\n", WEXITSTATUS(exitCode));
+    } else if (WIFSIGNALED(exitCode)) {
+        fprintf(stderr, "child terminated by signal %u\n", WTERMSIG(exitCode));
+        if (WCOREDUMP(exitCode)) {
+            fprintf(stderr, "[WARNING] child dumped core\n");
+        }
+    } else {
+        fprintf(stderr, "[WARNING] strange exit code: %u\n", exitCode);
+    }
+
+    EXPECT_EQUAL(exitCode & 0x7f, retval);
+
+    TEST_DONE();
+}
+
+TEST_APPHOOK(Test)
diff --git a/vespamalloc/src/tests/overwrite/.gitignore b/vespamalloc/src/tests/overwrite/.gitignore
new file mode 100644
index 00000000000..5a8760f913d
--- /dev/null
+++ b/vespamalloc/src/tests/overwrite/.gitignore
@@ -0,0 +1,8 @@
+.depend
+Makefile
+expectsignal
+overwrite_test
+overwrite_testd
+/expectsignal-overwrite
+vespamalloc_overwrite_test_app
+vespamalloc_expectsignal-overwrite_app
diff --git a/vespamalloc/src/tests/overwrite/CMakeLists.txt b/vespamalloc/src/tests/overwrite/CMakeLists.txt
new file mode 100644
index 00000000000..f3625bd396c
--- /dev/null
+++ b/vespamalloc/src/tests/overwrite/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespamalloc_overwrite_test_app
+    SOURCES
+    overwrite.cpp
+    DEPENDS
+)
+vespa_add_test(NAME vespamalloc_overwrite_test_app NO_VALGRIND COMMAND sh overwrite_test.sh)
+vespa_add_executable(vespamalloc_expectsignal-overwrite_app
+    SOURCES
+    expectsignal.cpp
+    DEPENDS
+)
diff --git a/vespamalloc/src/tests/overwrite/DESC b/vespamalloc/src/tests/overwrite/DESC
new file mode 100644
index 00000000000..5d5e1d01ba4
--- /dev/null
+++ b/vespamalloc/src/tests/overwrite/DESC
@@ -0,0 +1 @@
+This is a test of using memory after delete detection.
diff --git a/vespamalloc/src/tests/overwrite/FILES b/vespamalloc/src/tests/overwrite/FILES
new file mode 100644
index 00000000000..d8b6a578e9d
--- /dev/null
+++ b/vespamalloc/src/tests/overwrite/FILES
@@ -0,0 +1 @@
+overwrite.cpp
diff --git a/vespamalloc/src/tests/overwrite/expectsignal.cpp b/vespamalloc/src/tests/overwrite/expectsignal.cpp
new file mode 100644
index 00000000000..0b2d5e154c4
--- /dev/null
+++ b/vespamalloc/src/tests/overwrite/expectsignal.cpp
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/log/log.h>
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/slaveproc.h>
+
+LOG_SETUP("expectsignal_test");
+
+using namespace vespalib;
+
+class Test : public TestApp
+{
+public:
+    int Main();
+private:
+    virtual bool useProcessStarter() const { return true; }
+};
+
+int Test::Main()
+{
+    TEST_INIT("expectsignal_test");
+
+    EXPECT_EQUAL(_argc, 3);
+    ASSERT_TRUE(_argc == 3);
+
+    int retval = strtol(_argv[1], NULL, 0);
+
+    fprintf(stderr, "argc=%d : Running '%s' expecting signal %d\n", _argc, _argv[2], retval);
+
+    SlaveProc cmd(_argv[2]);
+    for(std::string line; cmd.readLine(line, 60000);) {
+        fprintf(stdout, "%s\n", line.c_str());
+    }
+
+    ASSERT_TRUE(cmd.wait(60000));
+
+    int exitCode = cmd.getExitCode();
+
+    if (exitCode == 65535) {
+        fprintf(stderr, "[ERROR] child killed (timeout)\n");
+    } else if (WIFEXITED(exitCode)) {
+        fprintf(stderr, "child terminated normally with exit code %u\n", WEXITSTATUS(exitCode));
+    } else if (WIFSIGNALED(exitCode)) {
+        fprintf(stderr, "child terminated by signal %u\n", WTERMSIG(exitCode));
+        if (WCOREDUMP(exitCode)) {
+            fprintf(stderr, "[WARNING] child dumped core\n");
+        }
+    } else {
+        fprintf(stderr, "[WARNING] strange exit code: %u\n", exitCode);
+    }
+
+    EXPECT_EQUAL(exitCode & 0x7f, retval);
+
+    TEST_DONE();
+}
+
+TEST_APPHOOK(Test)
diff --git a/vespamalloc/src/tests/overwrite/overwrite.cpp b/vespamalloc/src/tests/overwrite/overwrite.cpp
new file mode 100644
index 00000000000..d7057444505
--- /dev/null
+++ b/vespamalloc/src/tests/overwrite/overwrite.cpp
@@ -0,0 +1,135 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/log/log.h>
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+LOG_SETUP("overwrite_test");
+
+using namespace vespalib;
+
+class Test : public TestApp
+{
+public:
+    int Main();
+    ~Test();
+private:
+    void testFillValue(char *a);
+    void verifyPreWriteDetection(); // Should abort
+    void verifyPostWriteDetection(); // Should abort
+    void verifyWriteAfterFreeDetection(); // Should abort
+};
+
+Test::~Test()
+{
+}
+
+void Test::testFillValue(char *a)
+{
+    // Verify fillvalue
+    EXPECT_EQUAL((int)a[0], 0x66);
+    EXPECT_EQUAL((int)a[1], 0x66);
+    EXPECT_EQUAL((int)a[255], 0x66);
+
+    // Make sure that enough blocks of memory is allocated and freed.
+    for (size_t i(0); i < 100; i++) {
+        char *d = new char[256];
+        memset(d, 0x77, 256);
+        delete [] d;
+        EXPECT_EQUAL((int)d[0], 0x66);
+        EXPECT_EQUAL((int)d[1], 0x66);
+        EXPECT_EQUAL((int)d[255], 0x66);
+    }
+
+    // Make sure we trigger vespamallocd detection of memory written after delete.
+    char *aa[1024];
+    for (size_t i(0); i < sizeof(aa)/sizeof(aa[0]); i++) {
+        aa[i] = new char[256];
+    }
+
+    // Verify overwrite detection in place after cleaning up.
+    for (size_t i(0); i < sizeof(aa)/sizeof(aa[0]); i++) {
+        delete [] aa[i];
+        EXPECT_EQUAL((int)a[0], 0x66);
+        EXPECT_EQUAL((int)a[1], 0x66);
+        EXPECT_EQUAL((int)a[255], 0x66);
+    }
+}
+
+void Test::verifyPreWriteDetection()
+{
+    char * a = new char[8];
+    *(a-1) = 0;
+    delete [] a;
+}
+
+void Test::verifyPostWriteDetection()
+{
+    char * a = new char[8];
+    a[8] = 0;
+    delete [] a;
+}
+
+void Test::verifyWriteAfterFreeDetection()
+{
+    // Make sure that enough blocks of memory is allocated and freed.
+    char * a = new char[256];
+    delete [] a;
+    for (size_t i(0); i < 100; i++) {
+        char *d = new char[256];
+        delete [] d;
+    }
+    // Write freed memory.
+    a[0] = 0;
+
+    // Make sure we trigger vespamallocd detection of memory written after delete.
+    char *aa[1024];
+    for (size_t i(0); i < sizeof(aa)/sizeof(aa[0]); i++) {
+        aa[i] = new char[256];
+    }
+
+    // Clean up.
+    for (size_t i(0); i < sizeof(aa)/sizeof(aa[0]); i++) {
+        delete [] aa[i];
+    }
+}
+
+int Test::Main()
+{
+    TEST_INIT("overwrite_test");
+
+    char * a = new char[256];
+    memset(a, 0x77, 256);
+    a[0] = 0;
+    EXPECT_EQUAL((int)a[0], 0);
+    EXPECT_EQUAL((int)a[1], 0x77);
+    EXPECT_EQUAL((int)a[255], 0x77);
+    char * b = a;
+    EXPECT_EQUAL(a, b);
+    delete [] a;
+    EXPECT_EQUAL(a, b);
+
+    if (_argc > 1) {
+        testFillValue(a);
+        if (strcmp(_argv[1], "prewrite") == 0) {
+            verifyPreWriteDetection();
+            return 0;
+        } else if (strcmp(_argv[1], "postwrite") == 0) {
+            verifyPostWriteDetection();
+            return 0;
+        } else if (strcmp(_argv[1], "writeafterfree") == 0) {
+            verifyWriteAfterFreeDetection();
+            return 0;
+        }
+
+    } else {
+        // Verify that nothing is done when not expected too.
+        EXPECT_EQUAL((int)a[0], 0);
+        EXPECT_EQUAL((int)a[1], 0x77);
+        EXPECT_EQUAL((int)a[255], 0x77);
+    }
+
+    TEST_DONE();
+    return 42;
+}
+
+TEST_APPHOOK(Test)
diff --git a/vespamalloc/src/tests/overwrite/overwrite_test.sh b/vespamalloc/src/tests/overwrite/overwrite_test.sh
new file mode 100755
index 00000000000..8ccac33aecc
--- /dev/null
+++ b/vespamalloc/src/tests/overwrite/overwrite_test.sh
@@ -0,0 +1,8 @@
+#!/bin/bash	
+
+LD_PRELOAD=../../../src/vespamalloc/libvespamalloc.so ./vespamalloc_overwrite_test_app
+LD_PRELOAD=../../../src/vespamalloc/libvespamalloc_vespamallocd.so ./vespamalloc_overwrite_test_app testmemoryfill
+ulimit -c 0; 
+./vespamalloc_expectsignal-overwrite_app 6 "LD_PRELOAD=../../../src/vespamalloc/libvespamalloc_vespamallocd.so ./vespamalloc_overwrite_test_app prewrite"
+./vespamalloc_expectsignal-overwrite_app 6 "LD_PRELOAD=../../../src/vespamalloc/libvespamalloc_vespamallocd.so ./vespamalloc_overwrite_test_app postwrite"
+./vespamalloc_expectsignal-overwrite_app 6 "LD_PRELOAD=../../../src/vespamalloc/libvespamalloc_vespamallocd.so ./vespamalloc_overwrite_test_app writeafterfree"
diff --git a/vespamalloc/src/tests/overwrite/vespamalloc.conf b/vespamalloc/src/tests/overwrite/vespamalloc.conf
new file mode 100644
index 00000000000..f371e36204a
--- /dev/null
+++ b/vespamalloc/src/tests/overwrite/vespamalloc.conf
@@ -0,0 +1,15 @@
+#Config file for vespa malloc
+#loglevel = 0 should mean no logging. Only level 1 is implemented.
+# logfile vespamalloc.log           # default(stderr) This is the file to where log is written (stderr, stdout, filename)
+sigprof_loglevel        0           # default(0) Loglevel used at SIGPROF signal.
+atend_loglevel          2           # default(1) Loglevel used when application stops.
+atnomem_loglevel        2           # default(1) Loglevel used when datasegment is exhausted.
+pralloc_loglimit        1           # What to log pr alloc. default(0) except mallocdst(1). mallocdst_nl(0), but has effect og SIGHUP.
+atdoubledelete_loglevel 2           # default(1) Loglevel used when vespa_malloc discovers a double delete.
+atinvalid_loglevel      2           # default(1) Loglevel used when vespa_malloc discovers logical error.
+bigsegment_loglevel     0           # default(1) Loglevel used when datasegment passes a boundary.
+bigsegment_limit        0x80000000  # default(0x20000000) First level the datasegment must reach before logging is started
+bigsegment_increment    0x10000000  # default(0x4000000) At what increment it will log next time.
+bigblocklimit           0x800000    # default(0x800000) Limit for when to log new/deletes wuth stack trace. Only mallocdst.so
+allocs2show             8
+fillvalue               0x66
diff --git a/vespamalloc/src/tests/stacktrace/.gitignore b/vespamalloc/src/tests/stacktrace/.gitignore
new file mode 100644
index 00000000000..669d726db1e
--- /dev/null
+++ b/vespamalloc/src/tests/stacktrace/.gitignore
@@ -0,0 +1,3 @@
+*_test*
+.depend
+Makefile
diff --git a/vespamalloc/src/tests/stacktrace/CMakeLists.txt b/vespamalloc/src/tests/stacktrace/CMakeLists.txt
new file mode 100644
index 00000000000..6d8fbfcbaa1
--- /dev/null
+++ b/vespamalloc/src/tests/stacktrace/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespamalloc_stacktrace_test_app
+    SOURCES
+    stacktrace.cpp
+    backtrace.c
+    DEPENDS
+)
+vespa_add_test(
+    NAME vespamalloc_stacktrace_test_app
+    NO_VALGRIND COMMAND vespamalloc_stacktrace_test_app
+    ENVIRONMENT "LD_PRELOAD=${CMAKE_CURRENT_BINARY_DIR}/../../vespamalloc/libvespamalloc_vespamallocdst16.so"
+    NO_VALGRIND)
diff --git a/vespamalloc/src/tests/stacktrace/DESC b/vespamalloc/src/tests/stacktrace/DESC
new file mode 100644
index 00000000000..5f30c916321
--- /dev/null
+++ b/vespamalloc/src/tests/stacktrace/DESC
@@ -0,0 +1 @@
+Test that the stacktrace functionality works as expected.
diff --git a/vespamalloc/src/tests/stacktrace/FILES b/vespamalloc/src/tests/stacktrace/FILES
new file mode 100644
index 00000000000..24b43aa0f89
--- /dev/null
+++ b/vespamalloc/src/tests/stacktrace/FILES
@@ -0,0 +1 @@
+stacktrace.cpp
diff --git a/vespamalloc/src/tests/stacktrace/backtrace.c b/vespamalloc/src/tests/stacktrace/backtrace.c
new file mode 100644
index 00000000000..d594caa3368
--- /dev/null
+++ b/vespamalloc/src/tests/stacktrace/backtrace.c
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "backtrace.h"
+
+#if defined(__i386__)
+// use GLIBC version, hope it works
+extern int backtrace(void **buffer, int size);
+#define HAVE_BACKTRACE
+#endif
+
+#if defined(__x86_64__)
+
+/**
+   Written by Arne H. J. based on docs:
+
+   http://www.kernel.org/pub/linux/devel/gcc/unwind/
+   http://www.codesourcery.com/public/cxx-abi/abi-eh.html
+   http://refspecs.freestandards.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/libgcc-s-ddefs.html
+**/
+
+#include <unwind.h>
+
+struct trace_context {
+    void **array;
+    int size;
+    int index;
+};
+
+static _Unwind_Reason_Code
+trace_fn(struct _Unwind_Context *ctxt, void *arg)
+{
+    struct trace_context *tp = (struct trace_context *)arg;
+    void *ip = (void *)_Unwind_GetIP(ctxt);
+
+    if (ip == 0) {
+        return _URC_END_OF_STACK;
+    }
+    if (tp->index <= tp->size) {
+        // there's no point filling in the address of the backtrace()
+        // function itself, that doesn't provide any extra information,
+        // so skip one level
+        if (tp->index > 0) {
+            tp->array[tp->index - 1] = ip;
+        }
+        tp->index++;
+    } else {
+        return _URC_NORMAL_STOP;
+    }
+    return _URC_NO_REASON; // "This is not the destination frame" -> try next frame
+}
+
+#define HAVE_BACKTRACE
+int
+backtrace (void **array, int size)
+{
+    struct trace_context t;
+    t.array = array;
+    t.size = size;
+    t.index = 0;
+    _Unwind_Backtrace(trace_fn, &t);
+    return t.index - 1;
+}
+#endif // x86_64
+
+
+#ifdef HAVE_BACKTRACE
+
+int
+FastOS_backtrace (void **array, int size)
+{
+    return backtrace(array, size);
+}
+
+#else
+
+# warning "backtrace not supported on this CPU"
+int
+FastOS_backtrace (void **array, int size) 
+{
+    (void) array;
+    (void) size;
+    return 0;
+}
+
+#endif
diff --git a/vespamalloc/src/tests/stacktrace/backtrace.h b/vespamalloc/src/tests/stacktrace/backtrace.h
new file mode 100644
index 00000000000..45c1ef1378d
--- /dev/null
+++ b/vespamalloc/src/tests/stacktrace/backtrace.h
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int FastOS_backtrace (void **array, int size);
+
+#if defined(__x86_64__)
+int backtrace (void **array, int size);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/vespamalloc/src/tests/stacktrace/stacktrace.cpp b/vespamalloc/src/tests/stacktrace/stacktrace.cpp
new file mode 100644
index 00000000000..0fb0c9759a2
--- /dev/null
+++ b/vespamalloc/src/tests/stacktrace/stacktrace.cpp
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+
+void * run(void * arg)
+{
+    (void) arg;
+    char * a = new char [100]; // a should not remain in stacktrace
+    char * b = new char [1];   // but b should as it not deleted.
+    (void) b;
+    delete [] a;
+    return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+    (void) argc;
+    (void) argv;
+    char * a = new char [100]; // a should not remain in stacktrace
+    char * b = new char [1];   // but b should as it not deleted.
+    (void) b;
+    delete [] a;
+    pthread_t tid;
+    int retval = pthread_create(&tid, NULL, run, NULL);
+    if (retval != 0) {
+        perror("pthread_create failed");
+        abort();
+    }
+    retval = pthread_join(tid, NULL);
+    if (retval != 0) {
+        perror("pthread_join failed");
+        abort();
+    }
+}
diff --git a/vespamalloc/src/tests/test.cpp b/vespamalloc/src/tests/test.cpp
new file mode 100644
index 00000000000..24acb3368d8
--- /dev/null
+++ b/vespamalloc/src/tests/test.cpp
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <stdio.h>
+#include <stdlib.h>
+#include <vespa/fastos/fastos.h>
+
+namespace vespamalloc {
+void info();
+}
+
+void testbigblocks(size_t count, size_t sz)
+{
+    for (size_t i=0; i < count; i++) {
+        char * a = new char[sz];
+        delete [] a;
+        a = new char [sz-1];
+        delete [] a;
+    }
+}
+
+void testdd()
+{
+    char * a = (char *)malloc(0x1003);
+    free(a);
+}
+
+class Thread : public FastOS_Runnable
+{
+private:
+    void Run(FastOS_ThreadInterface * ti, void * arg);
+};
+
+int main(int, char *[])
+{
+    FastOS_ThreadPool threadPool(512*1024);
+    printf("Main stack(%p)\n", &threadPool);
+    Thread context;
+
+    FastOS_ThreadInterface * th[4];
+    for (size_t i=0; i<sizeof(th)/sizeof(th[0]); i++) {
+        th[i] = threadPool.NewThread(&context);
+    }
+    for (size_t i=0; i<sizeof(th)/sizeof(th[0]); i++) {
+        th[i]->Join();
+        delete th[i];
+    }
+
+    return 0;
+}
+
+void Thread::Run(FastOS_ThreadInterface *, void *)
+{
+    char * a = new char [100];
+    delete [] a;
+    char * b;
+
+    testbigblocks(1, 0x800003);
+    testbigblocks(64000, 0x200003);
+    for (size_t i=0; i<100;i++) a = new char[400];
+    testdd();
+    b = new char[200];
+    (void)b;
+}
diff --git a/vespamalloc/src/tests/test1/.gitignore b/vespamalloc/src/tests/test1/.gitignore
new file mode 100644
index 00000000000..b7fab5d205c
--- /dev/null
+++ b/vespamalloc/src/tests/test1/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+testatomic
+vespamalloc_testatomic_app
diff --git a/vespamalloc/src/tests/test1/CMakeLists.txt b/vespamalloc/src/tests/test1/CMakeLists.txt
new file mode 100644
index 00000000000..dc0217b139a
--- /dev/null
+++ b/vespamalloc/src/tests/test1/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespamalloc_testatomic_app
+    SOURCES
+    testatomic.cpp
+    DEPENDS
+)
+vespa_add_test(NAME vespamalloc_testatomic_app NO_VALGRIND COMMAND vespamalloc_testatomic_app)
diff --git a/vespamalloc/src/tests/test1/DESC b/vespamalloc/src/tests/test1/DESC
new file mode 100644
index 00000000000..4f3ca4d4d97
--- /dev/null
+++ b/vespamalloc/src/tests/test1/DESC
@@ -0,0 +1 @@
+This is a unittest of vespamalloc.
diff --git a/vespamalloc/src/tests/test1/FILES b/vespamalloc/src/tests/test1/FILES
new file mode 100644
index 00000000000..4b14c586dd4
--- /dev/null
+++ b/vespamalloc/src/tests/test1/FILES
@@ -0,0 +1 @@
+testatomic.cpp
diff --git a/vespamalloc/src/tests/test1/testatomic.cpp b/vespamalloc/src/tests/test1/testatomic.cpp
new file mode 100644
index 00000000000..1222493446c
--- /dev/null
+++ b/vespamalloc/src/tests/test1/testatomic.cpp
@@ -0,0 +1,118 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/atomic.h>
+#include <vector>
+
+using vespalib::Atomic;
+
+class Test : public vespalib::TestApp
+{
+public:
+    int Main();
+private:
+    template<typename T>
+    void testSwap(T initial);
+    template<typename T>
+    void testSwapStress(T v, int numThreads);
+};
+
+template <typename T>
+class Stress : public FastOS_Runnable
+{
+private:
+    void Run(FastOS_ThreadInterface * ti, void * arg);
+    void stressSwap(T & value);
+public:
+    Stress(T * value) : _value(value), _successCount(0), _failedCount(0) { }
+    void wait() { _wait.Lock(); _wait.Unlock(); }
+    FastOS_Mutex _wait;
+    T      * _value;
+    size_t   _successCount;
+    size_t   _failedCount;
+};
+
+TEST_APPHOOK(Test);
+
+template<typename T>
+void Test::testSwap(T initial)
+{
+    T value(initial);
+
+    ASSERT_TRUE(Atomic::cmpSwap(&value, initial+1, initial));
+    ASSERT_TRUE(value == initial+1);
+
+    ASSERT_TRUE(!Atomic::cmpSwap(&value, initial+2, initial));
+    ASSERT_TRUE(value == initial+1);
+}
+
+template<typename T>
+void Test::testSwapStress(T v, int numThreads)
+{
+    T old(v);
+    std::vector<Stress<T> *> contexts;
+    std::vector<FastOS_ThreadInterface *> threads;
+    FastOS_ThreadPool threadPool(512*1024);
+
+    for(int i=0; i < numThreads; i++) {
+        contexts.push_back(new Stress<T>(&v));
+    }
+
+    for(size_t i = 0; i < contexts.size(); i++) {
+        threads.push_back(threadPool.NewThread(contexts[i]));
+    }
+    FastOS_Thread::Sleep(1000);
+    size_t succesCount(0);
+    size_t failedCount(0);
+    for(size_t i = 0; i < contexts.size(); i++) {
+        Stress<T> * s = contexts[i];
+        s->wait();
+        succesCount += s->_successCount;
+        failedCount += s->_failedCount;
+    }
+    ASSERT_TRUE(v == 0);
+    ASSERT_TRUE(old == succesCount);
+    fprintf(stderr, "%ld threads counting down from %" PRIu64 " had %ld succesfull and %ld unsuccessful attempts\n",
+            contexts.size(), uint64_t(old), succesCount, failedCount);
+    for(size_t i = 0; i < contexts.size(); i++) {
+        delete contexts[i];
+    }
+}
+
+template <typename T>
+void Stress<T>::Run(FastOS_ThreadInterface *, void *)
+{
+    _wait.Lock();
+    stressSwap(*_value);
+    _wait.Unlock();
+}
+
+template <typename T>
+void Stress<T>::stressSwap(T & value)
+{
+    for (T old = value; old > 0; old = value) {
+        if (Atomic::cmpSwap(&value, old-1, old)) {
+            _successCount++;
+        } else {
+            _failedCount++;
+        }
+    }
+}
+
+int Test::Main()
+{
+    TEST_INIT("atomic");
+
+    testSwap<uint32_t>(6);
+    testSwap<uint32_t>(7);
+    testSwap<uint32_t>(uint32_t(-6));
+    testSwap<uint32_t>(uint32_t(-7));
+    testSwap<uint64_t>(6);
+    testSwap<uint64_t>(7);
+    testSwap<uint64_t>(uint64_t(-6));
+    testSwap<uint64_t>(uint64_t(-7));
+    testSwapStress<uint64_t>(0x1000000, 4);
+    testSwapStress<uint32_t>(0x1000000, 4);
+
+    TEST_DONE();
+}
diff --git a/vespamalloc/src/tests/test2/.gitignore b/vespamalloc/src/tests/test2/.gitignore
new file mode 100644
index 00000000000..1c719511e5b
--- /dev/null
+++ b/vespamalloc/src/tests/test2/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+testgraph
+vespamalloc_testgraph_app
diff --git a/vespamalloc/src/tests/test2/CMakeLists.txt b/vespamalloc/src/tests/test2/CMakeLists.txt
new file mode 100644
index 00000000000..668c09feb03
--- /dev/null
+++ b/vespamalloc/src/tests/test2/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespamalloc_testgraph_app
+    SOURCES
+    testgraph.cpp
+    ../../vespamalloc/util/stream.cpp
+    ../../vespamalloc/util/traceutil.cpp
+    ../../vespamalloc/util/callstack.cpp
+    DEPENDS
+)
+vespa_add_test(NAME vespamalloc_testgraph_app NO_VALGRIND COMMAND vespamalloc_testgraph_app)
diff --git a/vespamalloc/src/tests/test2/DESC b/vespamalloc/src/tests/test2/DESC
new file mode 100644
index 00000000000..4f3ca4d4d97
--- /dev/null
+++ b/vespamalloc/src/tests/test2/DESC
@@ -0,0 +1 @@
+This is a unittest of vespamalloc.
diff --git a/vespamalloc/src/tests/test2/FILES b/vespamalloc/src/tests/test2/FILES
new file mode 100644
index 00000000000..44b3d9f7c51
--- /dev/null
+++ b/vespamalloc/src/tests/test2/FILES
@@ -0,0 +1 @@
+testgraph.cpp
diff --git a/vespamalloc/src/tests/test2/testgraph.cpp b/vespamalloc/src/tests/test2/testgraph.cpp
new file mode 100644
index 00000000000..a9cf2c07b61
--- /dev/null
+++ b/vespamalloc/src/tests/test2/testgraph.cpp
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/util/index.h>
+#include <vespamalloc/util/callgraph.h>
+#include <vespamalloc/util/callstack.h>
+#include <vespamalloc/util/traceutil.h>
+
+using namespace vespamalloc;
+
+//typedef StackEntry<StackFrameReturnEntry> StackElem;
+typedef CallGraph<int, 0x1000, Index> CallGraphIntT;
+typedef CallGraph<StackElem, 0x1000, Index> CallGraphStackEntryT;
+
+namespace vespalibtest {
+
+template <typename T>
+class DumpGraph
+{
+public:
+    DumpGraph(const char * s="") : _string(s) { }
+    void handle(const T & node)
+    {
+        asciistream os;
+        os << ' ' << node;
+        _string += os.c_str();
+        if (node.callers() == NULL) {
+            printf("%s\n", _string.c_str());
+        }
+    }
+    const std::string & str() const { return _string; }
+private:
+    std::string _string;
+};
+
+}
+void testint() {
+    CallGraphIntT callGraph;
+    vespalibtest::DumpGraph<CallGraphIntT::Node> dump("int: ");
+    int s1[3] = { 1, 2, 3 };
+    int s2[3] = { 1, 2, 4 };
+    int s3[1] = { 1 };
+    int s4[3] = { 1, 3, 4 };
+    callGraph.addStack(s1, 3);
+    callGraph.addStack(s2, 3);
+    callGraph.addStack(s3, 1);
+    callGraph.addStack(s4, 3);
+    callGraph.traverseDepth(dump);
+    printf("%s\n", dump.str().c_str());
+}
+
+void teststackentry() {
+    CallGraphStackEntryT callGraph;
+    vespalibtest::DumpGraph<CallGraphStackEntryT::Node> dump("callstack: ");
+    StackElem s1[3] = { StackElem((void *)1), StackElem((void *)2), StackElem((void *)3) };
+    StackElem s2[3] = { StackElem((void *)1), StackElem((void *)2), StackElem((void *)4) };
+    StackElem s3[1] = { StackElem((void *)1) };
+    StackElem s4[3] = { StackElem((void *)1), StackElem((void *)3), StackElem((void *)4) };
+    callGraph.addStack(s1, 3);
+    callGraph.addStack(s2, 3);
+    callGraph.addStack(s3, 1);
+    callGraph.addStack(s4, 3);
+    callGraph.traverseDepth(dump);
+    printf("%s\n", dump.str().c_str());
+}
+
+void testaggregator() {
+    CallGraphStackEntryT callGraph;
+    StackElem s1[3] = { StackElem((void *)1), StackElem((void *)2), StackElem((void *)3) };
+    StackElem s2[3] = { StackElem((void *)1), StackElem((void *)2), StackElem((void *)4) };
+    StackElem s3[1] = { StackElem((void *)1) };
+    StackElem s4[3] = { StackElem((void *)1), StackElem((void *)3), StackElem((void *)4) };
+    callGraph.addStack(s1, 3);
+    callGraph.addStack(s2, 3);
+    callGraph.addStack(s3, 1);
+    callGraph.addStack(s4, 3);
+    Aggregator agg;
+    DumpGraph<CallGraphT::Node> dump(&agg, "{ ", " }");
+    callGraph.traverseDepth(dump);;
+    asciistream ost;
+    ost << agg;
+    printf("%s\n", ost.c_str());
+}
+int main (int argc, char *argv[])
+{
+    (void) argc;
+    (void) argv;
+    testint();
+    teststackentry();
+    testaggregator();
+    return 0;
+}
+
diff --git a/vespamalloc/src/tests/thread/.gitignore b/vespamalloc/src/tests/thread/.gitignore
new file mode 100644
index 00000000000..e342e6aaea7
--- /dev/null
+++ b/vespamalloc/src/tests/thread/.gitignore
@@ -0,0 +1,3 @@
+/*_test
+vespamalloc_racemanythreads_test_app
+vespamalloc_thread_test_app
diff --git a/vespamalloc/src/tests/thread/CMakeLists.txt b/vespamalloc/src/tests/thread/CMakeLists.txt
new file mode 100644
index 00000000000..67539a68f77
--- /dev/null
+++ b/vespamalloc/src/tests/thread/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespamalloc_thread_test_app
+    SOURCES
+    thread.cpp
+)
+vespa_add_test(NAME vespamalloc_thread_test_app NO_VALGRIND COMMAND sh thread_test.sh)
+vespa_add_executable(vespamalloc_racemanythreads_test_app
+    SOURCES
+    racemanythreads.cpp
+)
diff --git a/vespamalloc/src/tests/thread/racemanythreads.cpp b/vespamalloc/src/tests/thread/racemanythreads.cpp
new file mode 100644
index 00000000000..ba5cc8b7a1c
--- /dev/null
+++ b/vespamalloc/src/tests/thread/racemanythreads.cpp
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <errno.h>
+#include <vespa/vespalib/util/atomic.h>
+#include <sys/resource.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP("thread_test");
+
+using namespace vespalib;
+
+class Test : public TestApp
+{
+public:
+    ~Test();
+    int Main();
+};
+
+Test::~Test()
+{
+}
+
+void * hammer(void * arg)
+{
+    usleep(4000000);
+    long seconds = * static_cast<const long *>(arg);
+    long stopTime(time(NULL) + seconds);
+    pthread_t id = pthread_self();
+    while (time(NULL) < stopTime) {
+         std::vector<pthread_t *> allocations;
+         for (size_t i(0); i < 2000; i++) {
+             pthread_t *t = new pthread_t[20];
+             allocations.push_back(t);
+             for (size_t j(0); j < 20; j++) {
+                 t[j] = id;
+             }
+         }
+
+         for (size_t i(0); i < allocations.size(); i++) {
+             for (size_t j(0); j < 20; j++) {
+                 assert(allocations[i][j] == id);
+             }
+             delete [] allocations[i];
+         }
+    }
+    return arg;
+}
+
+int Test::Main()
+{
+    TEST_INIT("racemanythreads_test");
+    size_t threadCount(1024);
+    long seconds(10);
+    if (_argc >= 2) {
+        threadCount = strtoul(_argv[1], NULL, 0);
+        if (_argc >= 3) {
+            seconds = strtoul(_argv[2], NULL, 0);
+        }
+    }
+
+    pthread_attr_t attr;
+    EXPECT_EQUAL(pthread_attr_init(&attr), 0);
+    EXPECT_EQUAL(pthread_attr_setstacksize(&attr, 64*1024), 0);
+    std::vector<pthread_t> threads(threadCount);
+    for (size_t i(0); i < threadCount; i++) {    
+        EXPECT_EQUAL( pthread_create(&threads[i], &attr, hammer, &seconds), 0);
+    }
+    for (size_t i(0); i < threadCount; i++) {    
+        void *retval;
+        EXPECT_EQUAL(pthread_join(threads[i], &retval), 0);
+    }
+
+    TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/vespamalloc/src/tests/thread/thread.cpp b/vespamalloc/src/tests/thread/thread.cpp
new file mode 100644
index 00000000000..49631c0f7e5
--- /dev/null
+++ b/vespamalloc/src/tests/thread/thread.cpp
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <errno.h>
+#include <vespa/vespalib/util/atomic.h>
+#include <sys/resource.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP("thread_test");
+
+using namespace vespalib;
+
+class Test : public TestApp
+{
+public:
+    ~Test();
+    int Main();
+private:
+    virtual bool useIPCHelper() const { return true; }
+};
+
+Test::~Test()
+{
+}
+
+void * just_return(void * arg)
+{
+    return arg;
+}
+
+void * just_exit(void * arg)
+{
+    pthread_exit(arg);
+}
+
+void * just_cancel(void * arg)
+{
+    sleep(60);
+    return arg;
+}
+
+struct wait_info {
+    wait_info() :
+        _count(0)
+    {
+        if (pthread_mutex_init(&_mutex, NULL) != 0) { abort(); }
+        if (pthread_cond_init(&_cond, NULL) != 0) { abort(); }
+    }
+    ~wait_info() {
+        if (pthread_mutex_destroy(&_mutex) != 0) { abort(); }
+        if (pthread_cond_destroy(&_cond) != 0) { abort(); }
+    }
+    pthread_cond_t    _cond;
+    pthread_mutex_t   _mutex;
+    volatile uint64_t _count;
+};
+
+void * just_wait(void * arg)
+{
+    wait_info * info = (wait_info *) arg;
+    pthread_mutex_lock(&info->_mutex);
+    vespalib::Atomic::postInc(&info->_count);
+    pthread_cond_wait(&info->_cond, &info->_mutex);
+    pthread_mutex_unlock(&info->_mutex);
+    pthread_cond_signal(&info->_cond);
+    vespalib::Atomic::postDec(&info->_count);
+    return arg;
+}
+
+int Test::Main()
+{
+    TEST_INIT("thread_test");
+    size_t threadCount(102400);
+    if (_argc >= 3) {
+        threadCount = strtoul(_argv[2], NULL, 0);
+    }
+
+    const char * testType = _argv[1];
+
+    for (size_t i(0); i < threadCount; i++) {    
+        pthread_t th;
+        void *retval;
+        if (strcmp(testType, "exit") == 0) {
+            EXPECT_EQUAL( pthread_create(&th, NULL, just_exit, NULL), 0);
+        } else if (strcmp(testType, "cancel") == 0) {
+            EXPECT_EQUAL( pthread_create(&th, NULL, just_cancel, NULL), 0);
+            EXPECT_EQUAL( pthread_cancel(th), 0);
+        } else {
+            EXPECT_EQUAL( pthread_create(&th, NULL, just_return, NULL), 0);
+        }
+        EXPECT_EQUAL(pthread_join(th, &retval), 0);
+    }
+
+    wait_info info;
+    pthread_attr_t attr;
+    EXPECT_EQUAL(pthread_attr_init(&attr), 0);
+    EXPECT_EQUAL(pthread_attr_setstacksize(&attr, 64*1024), 0);
+    EXPECT_EQUAL(info._count, 0ul);
+    const size_t NUM_THREADS(16382); // +1 for main thread, +1 for testsystem = 16384
+    pthread_t tl[NUM_THREADS];
+    for (size_t j=0;j < NUM_THREADS;j++) {
+        int e = pthread_create(&tl[j], &attr, just_wait, &info);
+        if (e != 0) {
+            fprintf(stderr, "pthread_create failed at index '%ld'. with errno='%d'", j, e);
+            perror("pthread_create failed");
+            abort();
+        }
+    }
+    pthread_t th;
+    EXPECT_EQUAL( pthread_create(&th, &attr, just_wait, &info), EAGAIN); // Verify that you have reached upper limit of threads with vespamalloc.
+    while (info._count != NUM_THREADS) {
+        usleep(1);
+    }
+    pthread_mutex_lock(&info._mutex);
+    pthread_cond_signal(&info._cond);
+    pthread_mutex_unlock(&info._mutex);
+    for (size_t j=0;j < NUM_THREADS;j++) {
+        void *retval;
+        EXPECT_EQUAL(pthread_join(tl[j], &retval), 0);
+    }
+    EXPECT_EQUAL(pthread_attr_destroy(&attr), 0);
+    EXPECT_EQUAL(info._count, 0ul);
+    TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/vespamalloc/src/tests/thread/thread_test.sh b/vespamalloc/src/tests/thread/thread_test.sh
new file mode 100755
index 00000000000..edcd7a41a17
--- /dev/null
+++ b/vespamalloc/src/tests/thread/thread_test.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+VESPA_MALLOC_SO=../../../src/vespamalloc/libvespamalloc.so
+VESPA_MALLOC_SO_D=../../../src/vespamalloc/libvespamalloc_vespamallocd.so
+
+LD_PRELOAD=$VESPA_MALLOC_SO ./vespamalloc_thread_test_app return 20
+LD_PRELOAD=$VESPA_MALLOC_SO ./vespamalloc_thread_test_app exit 20
+LD_PRELOAD=$VESPA_MALLOC_SO ./vespamalloc_thread_test_app cancel 20
+#LD_PRELOAD=$VESPA_MALLOC_SO ./vespamalloc_racemanythreads_test_app 4000 20
+#LD_PRELOAD=$VESPA_MALLOC_SO_D ./vespamalloc_racemanythreads_test_app 4000 20
diff --git a/vespamalloc/src/vespamalloc/.gitignore b/vespamalloc/src/vespamalloc/.gitignore
new file mode 100644
index 00000000000..5dae353d999
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/.gitignore
@@ -0,0 +1,2 @@
+.depend
+Makefile
diff --git a/vespamalloc/src/vespamalloc/CMakeLists.txt b/vespamalloc/src/vespamalloc/CMakeLists.txt
new file mode 100644
index 00000000000..ece7f9f61a0
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/CMakeLists.txt
@@ -0,0 +1,35 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(vespamalloc
+    SOURCES
+    $<TARGET_OBJECTS:vespamalloc_malloc>
+    $<TARGET_OBJECTS:vespamalloc_util>
+    INSTALL lib64
+    DEPENDS
+)
+vespa_add_library(vespamalloc_vespamallocd
+    SOURCES
+    $<TARGET_OBJECTS:vespamalloc_mallocd>
+    $<TARGET_OBJECTS:vespamalloc_util>
+    INSTALL lib64
+    DEPENDS
+)
+vespa_add_library(vespamalloc_vespamallocdst16
+    SOURCES
+    $<TARGET_OBJECTS:vespamalloc_mallocdst16>
+    $<TARGET_OBJECTS:vespamalloc_util>
+    INSTALL lib64
+    DEPENDS
+)
+vespa_add_library(vespamalloc_vespamallocdst16_nl
+    SOURCES
+    $<TARGET_OBJECTS:vespamalloc_mallocdst16_nl>
+    $<TARGET_OBJECTS:vespamalloc_util>
+    INSTALL lib64
+    DEPENDS
+)
+vespa_add_library(vespamalloc_vespammap
+    SOURCES
+    $<TARGET_OBJECTS:vespamalloc_mmap>
+    INSTALL lib64
+    DEPENDS
+)
diff --git a/vespamalloc/src/vespamalloc/malloc/.gitignore b/vespamalloc/src/vespamalloc/malloc/.gitignore
new file mode 100644
index 00000000000..5dae353d999
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/.gitignore
@@ -0,0 +1,2 @@
+.depend
+Makefile
diff --git a/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt b/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt
new file mode 100644
index 00000000000..4534ca2f632
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt
@@ -0,0 +1,63 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(vespamalloc_malloc OBJECT
+    SOURCES
+    malloc.cpp
+    allocchunk.cpp
+    common.cpp
+    threadproxy.cpp
+    memblock.cpp
+    datasegment.cpp
+    globalpool.cpp
+    threadpool.cpp
+    threadlist.cpp
+    DEPENDS
+)
+vespa_add_library(vespamalloc_mallocd OBJECT
+    SOURCES
+    mallocd.cpp
+    allocchunk.cpp
+    common.cpp
+    threadproxy.cpp
+    memblockboundscheck.cpp
+    memblockboundscheck_d.cpp
+    datasegmentd.cpp
+    globalpoold.cpp
+    threadpoold.cpp
+    threadlistd.cpp
+    DEPENDS
+)
+vespa_add_library(vespamalloc_mallocdst16 OBJECT
+    SOURCES
+    mallocdst16.cpp
+    allocchunk.cpp
+    common.cpp
+    threadproxy.cpp
+    memblockboundscheck.cpp
+    memblockboundscheck_dst.cpp
+    datasegmentdst.cpp
+    globalpooldst.cpp
+    threadpooldst.cpp
+    threadlistdst.cpp
+    DEPENDS
+)
+vespa_workaround_gcc_bug_67055(mallocdst16.cpp)
+vespa_add_library(vespamalloc_mallocdst16_nl OBJECT
+    SOURCES
+    mallocdst16_nl.cpp
+    allocchunk.cpp
+    common.cpp
+    threadproxy.cpp
+    memblockboundscheck.cpp
+    memblockboundscheck_dst.cpp
+    datasegmentdst.cpp
+    globalpooldst.cpp
+    threadpooldst.cpp
+    threadlistdst.cpp
+    DEPENDS
+)
+vespa_workaround_gcc_bug_67055(mallocdst16_nl.cpp)
+vespa_add_library(vespamalloc_mmap OBJECT
+    SOURCES
+    mmap.cpp
+    DEPENDS
+)
diff --git a/vespamalloc/src/vespamalloc/malloc/allocchunk.cpp b/vespamalloc/src/vespamalloc/malloc/allocchunk.cpp
new file mode 100644
index 00000000000..1a21e6f1c14
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/allocchunk.cpp
@@ -0,0 +1,81 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/allocchunk.h>
+
+namespace vespamalloc {
+
+char AFListBase::_atomicLinkSpace[sizeof(AFListBase::AtomicLink)];
+char AFListBase::_lockedLinkSpace[sizeof(AFListBase::LockedLink)];
+AFListBase::LinkI     *AFListBase::_link = NULL;
+
+void AFListBase::init()
+{
+    _link =  new (_atomicLinkSpace)AtomicLink();
+}
+
+AFListBase::LinkI::~LinkI()
+{
+}
+
+void AFListBase::linkInList(HeadPtr & head, AFListBase * list)
+{
+    AFListBase * tail;
+    for (tail = list; tail->_next != NULL ;tail = tail->_next) { }
+    linkIn(head, list, tail);
+}
+
+void AFListBase::AtomicLink::linkIn(HeadPtr & head, AFListBase * csl, AFListBase * tail)
+{
+    HeadPtr oldHead = head;
+    HeadPtr newHead(csl, oldHead._tag + 1);
+    tail->_next = static_cast<AFListBase *>(oldHead._ptr);
+    while ( ! Atomic::cmpSwap(&head, newHead, oldHead) ) {
+        oldHead = head;
+        newHead._tag =  oldHead._tag + 1;
+        tail->_next = static_cast<AFListBase *>(oldHead._ptr);
+    }
+}
+
+void AFListBase::LockedLink::linkIn(HeadPtr & head, AFListBase * csl, AFListBase * tail)
+{
+    Guard guard(_mutex);
+    HeadPtr newHead(csl, head._tag + 1);
+    tail->_next = static_cast<AFListBase *>(head._ptr);
+    head = newHead;
+}
+
+AFListBase * AFListBase::LockedLink::linkOut(HeadPtr & head)
+{
+    Guard guard(_mutex);
+    HeadPtr oldHead = head;
+    AFListBase *csl = static_cast<AFListBase *>(oldHead._ptr);
+    if (csl == NULL) {
+        return NULL;
+    }
+    HeadPtr newHead(csl->_next, oldHead._tag + 1);
+    head = newHead;
+    csl->_next = NULL;
+    return csl;
+}
+
+AFListBase * AFListBase::AtomicLink::linkOut(HeadPtr & head)
+{
+    HeadPtr oldHead = head;
+    AFListBase *csl = static_cast<AFListBase *>(oldHead._ptr);
+    if (csl == NULL) {
+        return NULL;
+    }
+    HeadPtr newHead(csl->_next, oldHead._tag + 1);
+    while ( ! Atomic::cmpSwap(&head, newHead, oldHead) ) {
+        oldHead = head;
+        csl = static_cast<AFListBase *>(oldHead._ptr);
+        if (csl == NULL) {
+            return NULL;
+        }
+        newHead._ptr = csl->_next;
+        newHead._tag = oldHead._tag + 1;
+    }
+    csl->_next = NULL;
+    return csl;
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/allocchunk.h b/vespamalloc/src/vespamalloc/malloc/allocchunk.h
new file mode 100644
index 00000000000..48128e12687
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/allocchunk.h
@@ -0,0 +1,100 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/common.h>
+#include <algorithm>
+
+namespace vespamalloc {
+
+class AFListBase
+{
+public:
+    typedef Atomic::TaggedPtr HeadPtr;
+    AFListBase() : _next(NULL) { }
+    void setNext(AFListBase * csl)           { _next = csl; }
+    static void init();
+    static void enableThreadSupport()   { _link->enableThreadSupport(); }
+    static void linkInList(HeadPtr & head, AFListBase * list);
+    static void linkIn(HeadPtr & head, AFListBase * csl, AFListBase * tail) {
+        _link->linkIn(head, csl, tail);
+    }
+protected:
+    AFListBase * getNext()                      { return _next; }
+    static AFListBase * linkOut(HeadPtr & head) { return _link->linkOut(head); }
+private:
+    class LinkI
+    {
+    public:
+        virtual ~LinkI();
+        virtual void enableThreadSupport() { }
+        virtual void linkIn(HeadPtr & head, AFListBase * csl, AFListBase * tail) = 0;
+        virtual AFListBase * linkOut(HeadPtr & head) = 0;
+    };
+    class AtomicLink : public LinkI
+    {
+    private:
+        virtual void linkIn(HeadPtr & head, AFListBase * csl, AFListBase * tail);
+        virtual AFListBase * linkOut(HeadPtr & head);
+    };
+    class LockedLink : public LinkI
+    {
+    public:
+        virtual void enableThreadSupport() { _mutex.init(); }
+    private:
+        virtual void linkIn(HeadPtr & head, AFListBase * csl, AFListBase * tail);
+        virtual AFListBase * linkOut(HeadPtr & head);
+        Mutex _mutex;
+    };
+    static char _atomicLinkSpace[sizeof(AtomicLink)];
+    static char _lockedLinkSpace[sizeof(LockedLink)];
+    static LinkI     *_link;
+    AFListBase       *_next;
+};
+
+template <typename MemBlockPtrT>
+class AFList : public AFListBase
+{
+public:
+    typedef size_t CountT;
+    enum { NumBlocks = 126 };
+    AFList() : _count(0) { }
+    CountT count()              const { return _count; }
+    void add(MemBlockPtrT & ptr)      {
+        ptr.free();
+        PARANOID_CHECK2( if (full()) { *(int*)0=0; });
+        _memBlockList[_count++] = ptr;
+    }
+    void sub(MemBlockPtrT & mem)      {
+        if (empty()) {
+            return;
+        }
+        mem = _memBlockList[--_count];
+    }
+    bool empty()                const { return (_count == 0); }
+    bool full()                 const { return (_count == NumBlocks); }
+    size_t fill(void * mem, SizeClassT sc, size_t blocksPerChunk = NumBlocks);
+    AFList * getNext()                { return static_cast<AFList *>(AFListBase::getNext()); }
+    static AFList * linkOut(HeadPtr & head) {
+        return static_cast<AFList *>(AFListBase::linkOut(head));
+    }
+private:
+    CountT        _count;
+    MemBlockPtrT _memBlockList[NumBlocks];
+};
+
+
+template <typename MemBlockPtrT>
+size_t AFList<MemBlockPtrT>::fill(void * mem, SizeClassT sc, size_t blocksPerChunk)
+{
+    size_t sz = MemBlockPtrT::classSize(sc);
+    int retval(std::max(0, int(blocksPerChunk-_count)));
+    char * first = (char *) mem;
+    for(int i=0; i < retval; i++) {
+        _memBlockList[_count] = MemBlockPtrT(first + i*sz, MemBlockPtrT::unAdjustSize(sz));
+        _memBlockList[_count].free();
+        _count++;
+    }
+    return retval;
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/common.cpp b/vespamalloc/src/vespamalloc/malloc/common.cpp
new file mode 100644
index 00000000000..68181c664a4
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/common.cpp
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/common.h>
+
+namespace vespamalloc {
+
+uint32_t Mutex::_threadCount = 0;
+bool     Mutex::_stopRecursion = true;
+
+void Mutex::lock()
+{
+    if (_use) {
+        pthread_mutex_lock(&_mutex);
+    }
+}
+void Mutex::unlock()
+{
+    if (_use) {
+        pthread_mutex_unlock(&_mutex);
+    }
+}
+
+void Mutex::quit()
+{
+    if (_use) {
+        _use = false;
+        pthread_mutex_destroy(&_mutex);
+    }
+}
+
+void Mutex::init() {
+    if (!_use && ! _stopRecursion) {
+        pthread_mutex_init(&_mutex, NULL);
+        _use = true;
+    }
+}
+
+Guard::Guard(Mutex & m) :
+     _mutex(&m)
+{
+    MallocRecurseOnSuspend(false);
+    _mutex->lock();
+    MallocRecurseOnSuspend(true);
+}
+
+
+}
+
+extern "C" void MallocRecurseOnSuspend(bool recurse)
+{
+    (void) recurse;
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/common.h b/vespamalloc/src/vespamalloc/malloc/common.h
new file mode 100644
index 00000000000..a065bc43a0a
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/common.h
@@ -0,0 +1,122 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/util/atomic.h>
+#include <vespa/vespalib/util/optimized.h>
+#include <new>
+#include <vespamalloc/util/osmem.h>
+
+using vespalib::Atomic;
+
+extern "C" void MallocRecurseOnSuspend(bool recurse) __attribute__ ((noinline));
+
+namespace vespamalloc {
+
+#define NELEMS(a) sizeof(a)/sizeof(a[0])
+
+#define NUM_SIZE_CLASSES 32   // Max 64G
+
+#define NUM_THREADS 16384
+#define UNUSED(a)
+#ifdef ENABLE_DEBUG
+#define DEBUG(a) a
+#else
+#define DEBUG(a)
+#endif
+
+#ifndef PARANOID_LEVEL
+#define PARANOID_LEVEL 0
+#endif
+
+#if (PARANOID_LEVEL >= 0)
+#define PARANOID_CHECK0(a) a
+#else
+#define PARANOID_CHECK0(a)
+#endif
+
+#if (PARANOID_LEVEL >= 1)
+#define PARANOID_CHECK1(a) a
+#else
+#define PARANOID_CHECK1(a)
+#endif
+
+#if (PARANOID_LEVEL >= 2)
+#define PARANOID_CHECK2(a) a
+#else
+#define PARANOID_CHECK2(a)
+#endif
+
+#if (PARANOID_LEVEL >= 3)
+#define PARANOID_CHECK3(a) a
+#else
+#define PARANOID_CHECK3(a)
+#endif
+
+typedef MmapMemory OSMemory;
+
+typedef int SizeClassT;
+
+template <size_t MinClassSizeC>
+class CommonT
+{
+public:
+    enum {MinClassSize = MinClassSizeC};
+    static inline SizeClassT sizeClass(size_t sz) {
+        SizeClassT tmp(vespalib::Optimized::msbIdx(sz - 1) - (MinClassSizeC - 1));
+        return (sz <= (1 << MinClassSizeC )) ? 0 : tmp;
+    }
+    static inline size_t classSize(SizeClassT sc) { return (size_t(1) << (sc + MinClassSizeC)); }
+};
+
+inline void crash() { *((unsigned *) NULL) = 0; }
+
+template <typename T>
+inline void swap(T & a, T & b)      { T tmp(a); a = b; b = tmp; }
+
+class Mutex
+{
+public:
+    Mutex() : _mutex(), _use(false) { }
+    ~Mutex()           { quit(); }
+    void lock();
+    void unlock();
+    static void addThread()      { Atomic::postInc(&_threadCount); }
+    static void subThread()      { Atomic::postDec(&_threadCount); }
+    static void stopRecursion()  { _stopRecursion = true; }
+    static void allowRecursion() { _stopRecursion = false; }
+    void init();
+    void quit();
+private:
+    static uint32_t _threadCount;
+    static bool     _stopRecursion;
+    Mutex(const Mutex & org);
+    Mutex & operator = (const Mutex & org);
+    pthread_mutex_t  _mutex;
+    bool             _use;
+};
+
+class Guard
+{
+public:
+    Guard(Mutex & m);
+    ~Guard()                      { _mutex->unlock(); }
+private:
+    Mutex * _mutex;
+};
+
+class IAllocator
+{
+public:
+    virtual ~IAllocator() {}
+    virtual bool initThisThread() = 0;
+    virtual bool quitThisThread() = 0;
+    virtual void enableThreadSupport() = 0;
+    virtual void setReturnAddressStop(const void * returnAddressStop) = 0;
+    virtual size_t getMaxNumThreads() const = 0;
+};
+
+void info();
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/malloc/datasegment.cpp b/vespamalloc/src/vespamalloc/malloc/datasegment.cpp
new file mode 100644
index 00000000000..24fae26d5f4
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/datasegment.cpp
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/datasegment.hpp>
+#include <vespamalloc/malloc/memblock.h>
+
+namespace vespamalloc {
+
+template class DataSegment<MemBlock>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/datasegment.h b/vespamalloc/src/vespamalloc/malloc/datasegment.h
new file mode 100644
index 00000000000..c50d43dc1d8
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/datasegment.h
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <limits.h>
+#include <memory>
+#include <vespamalloc/malloc/common.h>
+#include <vespamalloc/util/traceutil.h>
+#include <vespamalloc/util/stream.h>
+
+namespace vespamalloc {
+
+template<typename MemBlockPtrT>
+class DataSegment
+{
+public:
+    typedef unsigned FreeCountT;
+    enum { UNMAPPED_BLOCK=-4, UNUSED_BLOCK=-3, FREE_BLOCK=-2, SYSTEM_BLOCK=-1, NUM_ADMIN_CLASSES=4 };
+    DataSegment() __attribute__((noinline));
+    ~DataSegment() __attribute__((noinline));
+
+    void * getBlock(size_t & oldBlockSize, SizeClassT sc) __attribute__((noinline));
+    void returnBlock(void *ptr) __attribute__((noinline));
+    SizeClassT sizeClass(const void * ptr)    const { return _blockList[blockId(ptr)].sizeClass(); }
+    size_t getMaxSize(const void * ptr)       const { return _blockList[blockId(ptr)].getMaxSize(); }
+    const void * start()                      const { return _osMemory.getStart(); }
+    const void * end()                        const { return _osMemory.getEnd(); }
+    static SizeClassT adjustedSizeClass(size_t sz)  { return (sz >> 16) + 0x400; }
+    static size_t adjustedClassSize(SizeClassT sc)  { return (sc > 0x400) ? (sc - 0x400) << 16 : sc; }
+    size_t dataSize()                         const { return (const char*)end() - (const char*)start(); }
+    size_t textSize()                         const { return size_t(start()); }
+    size_t infoThread(FILE * os, int level, int thread, SizeClassT sct) const __attribute__((noinline));
+    void info(FILE * os, size_t level) __attribute__((noinline));
+    void setupLog(size_t noMemLogLevel, size_t bigMemLogLevel,
+                  size_t bigLimit, size_t bigIncrement,
+                  size_t allocs2Show)
+    {
+        _noMemLogLevel = noMemLogLevel;
+        _bigSegmentLogLevel = bigMemLogLevel;
+        if ((size_t(end()) < _nextLogLimit) || (size_t(end()) < (size_t(start()) + bigLimit))) {
+            _nextLogLimit = size_t(start()) + bigLimit;
+        }
+        _bigIncrement = bigIncrement;
+        _allocs2Show = allocs2Show;
+        checkAndLogBigSegment();
+    }
+    void enableThreadSupport() { _mutex.init(); }
+    static size_t blockId(const void * ptr)       {
+        return (size_t(ptr) - Memory::getMinPreferredStartAddress())/BlockSize;
+    }
+    static void * fromBlockId(size_t id) {
+        return reinterpret_cast<void *>(id*BlockSize + Memory::getMinPreferredStartAddress());
+    }
+private:
+    const char * getAdminClassName(int id) {
+        switch (id) {
+          case UNMAPPED_BLOCK: return "UNMAPPED";
+          case   UNUSED_BLOCK: return "UNUSED";
+          case     FREE_BLOCK: return "FREE";
+          case   SYSTEM_BLOCK: return "SYSTEM";
+          default:             return "UNKNOWN";
+        }
+    }
+    DataSegment(const DataSegment & rhs);
+    DataSegment & operator = (const DataSegment & rhs);
+
+    enum { BlockSize=0x200000, BlockCount=0x80000 };  //1T
+
+    class BlockT
+    {
+    public:
+        BlockT(SizeClassT szClass = UNUSED_BLOCK, FreeCountT numBlocks = 0)
+            : _sizeClass(szClass), _freeChainLength(0), _realNumBlocks(numBlocks)
+        { }
+        SizeClassT sizeClass()              const { return _sizeClass; }
+        FreeCountT realNumBlocks()          const { return _realNumBlocks; }
+        FreeCountT freeChainLength()        const { return _freeChainLength; }
+        void sizeClass(SizeClassT sc)             { _sizeClass = sc; }
+        void realNumBlocks(FreeCountT fc)         { _realNumBlocks = fc; }
+        void freeChainLength(FreeCountT fc)       { _freeChainLength = fc; }
+        size_t getMaxSize()                 const {
+            return MemBlockPtrT::unAdjustSize(std::min(MemBlockPtrT::classSize(_sizeClass),
+                                                       size_t(_realNumBlocks) * BlockSize));
+        }
+    private:
+        SizeClassT _sizeClass;
+        /// Number of blocks free from here and on. For memory reuse, big blocks only.
+        FreeCountT _freeChainLength;
+        /// Real number of blocks used. Used to avoid rounding for big blocks.
+        FreeCountT _realNumBlocks;
+    };
+
+    template <int MaxCount>
+    class FreeListT {
+    public:
+        FreeListT(BlockT * blockList) __attribute__((noinline));
+        void add(size_t startIndex) __attribute__((noinline));
+        void * sub(size_t numBlocks) __attribute__((noinline));
+        size_t lastBlock(size_t nextBlock) __attribute__((noinline));
+        void removeLastBlock() {
+            if (_count > 0) {
+                _count--;
+            }
+        }
+        size_t info(FILE * os, int level) __attribute__((noinline));
+    private:
+        void * linkOut(size_t findex, size_t left) __attribute__((noinline));
+        BlockT *_blockList;
+        size_t  _count;
+        size_t  _freeStartIndex[MaxCount];
+    };
+
+    void checkAndLogBigSegment() __attribute__((noinline));
+
+    typedef BlockT BlockList[BlockCount];
+    typedef FreeListT<BlockCount/2> FreeList;
+    OSMemory     _osMemory;
+    size_t       _noMemLogLevel;
+    size_t       _bigSegmentLogLevel;
+    size_t       _bigIncrement;
+    size_t       _allocs2Show;
+    size_t       _unmapSize;
+
+    size_t       _nextLogLimit;
+    size_t       _partialExtension;
+    Mutex        _mutex;
+    BlockList    _blockList;
+    FreeList     _freeList;
+    FreeList     _unMappedList;
+};
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/datasegment.hpp b/vespamalloc/src/vespamalloc/malloc/datasegment.hpp
new file mode 100644
index 00000000000..dd2db0d42b2
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/datasegment.hpp
@@ -0,0 +1,418 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/datasegment.h>
+
+namespace vespamalloc {
+
+template<typename MemBlockPtrT>
+DataSegment<MemBlockPtrT>::~DataSegment()
+{
+}
+
+#define INIT_LOG_LIMIT 0x400000000ul // 16G
+
+template<typename MemBlockPtrT>
+DataSegment<MemBlockPtrT>::DataSegment() :
+    _osMemory(BlockSize),
+    _noMemLogLevel(1),
+    _bigSegmentLogLevel(0),
+    _bigIncrement (0x4000000),
+    _allocs2Show (8),
+    _unmapSize(0x100000),
+    _nextLogLimit(INIT_LOG_LIMIT),
+    _partialExtension(0),
+    _mutex(),
+    _freeList(_blockList),
+    _unMappedList(_blockList)
+{
+    size_t wanted(0x1000000000ul); //64G
+    void * everything = _osMemory.reserve(wanted);
+    if (everything) {
+        for (size_t i = blockId(everything), m = blockId(everything)+(wanted/BlockSize); i < m; i++) {
+            if (i > BlockCount) {
+                abort();
+            }
+            _blockList[i].sizeClass(UNUSED_BLOCK);
+            _blockList[i].freeChainLength(m-i);
+        }
+        _freeList.add(blockId(everything));
+    }
+    _nextLogLimit = std::max(size_t(end()) + _nextLogLimit, _nextLogLimit);
+}
+
+template<typename MemBlockPtrT>
+void * DataSegment<MemBlockPtrT>::getBlock(size_t & oldBlockSize, SizeClassT sc)
+{
+    const size_t minBlockSize = std::max(size_t(BlockSize), _osMemory.getMinBlockSize());
+    oldBlockSize = ((oldBlockSize + (minBlockSize-1))/minBlockSize)*minBlockSize;
+    size_t numBlocks((oldBlockSize + (BlockSize-1))/BlockSize);
+    size_t blockSize = BlockSize * numBlocks;
+    void * newBlock(NULL);
+    {
+        Guard sync(_mutex);
+        newBlock = _freeList.sub(numBlocks);
+        if ( newBlock == NULL ) {
+            newBlock = _unMappedList.sub(numBlocks);
+            if ( newBlock == NULL ) {
+                size_t nextBlock(blockId(end()));
+                size_t startBlock = _freeList.lastBlock(nextBlock);
+                if (startBlock) {
+                    size_t adjustedBlockSize = blockSize - BlockSize*(nextBlock-startBlock);
+                    newBlock = _osMemory.get(adjustedBlockSize);
+                    if (newBlock != NULL) {
+                        assert (newBlock == fromBlockId(nextBlock));
+                        _freeList.removeLastBlock();
+                        newBlock = fromBlockId(startBlock);
+                        _partialExtension++;
+                    }
+                } else {
+                    newBlock = _osMemory.get(blockSize);
+                }
+            } else {
+                bool result(_osMemory.reclaim(newBlock, blockSize));
+                assert (result);
+                (void) result;
+            }
+        } else {
+            DEBUG(fprintf(stderr, "Reuse segment %p(%d, %d)\n", newBlock, sc, numBlocks));
+        }
+    }
+    if (newBlock == (void *) -1) {
+        newBlock = NULL;
+        blockSize = 0;
+    } else if (newBlock == NULL) {
+        blockSize = 0;
+    } else {
+        assert(blockId(newBlock)+numBlocks < BlockCount);
+        // assumes _osMemory.get will always return a value that does not make
+        // "i" overflow the _blockList array; this will break when hitting the
+        // 2T address space boundary.
+        for (size_t i = blockId(newBlock), m = blockId(newBlock)+numBlocks; i < m; i++) {
+            _blockList[i].sizeClass(sc);
+            _blockList[i].freeChainLength(m-i);
+            _blockList[i].realNumBlocks(m-i);
+        }
+    }
+    oldBlockSize = blockSize;
+    if (newBlock == NULL) {
+        static int recurse = 0;
+        if (recurse++ == 0) {
+            perror("Failed extending datasegment: ");
+            assert(false);
+            MemBlockPtrT::dumpInfo(_noMemLogLevel);
+            sleep(2);
+        }
+        return NULL;
+    }
+    checkAndLogBigSegment();
+    return newBlock;
+}
+
+template<typename MemBlockPtrT>
+void DataSegment<MemBlockPtrT>::checkAndLogBigSegment()
+{
+    if (size_t(end()) >= _nextLogLimit) {
+        fprintf(stderr, "Datasegment is growing ! Start:%p - End:%p : nextLogLimit = %lx\n", start(), end(), _nextLogLimit);
+        _nextLogLimit = ((size_t(end()) + _bigIncrement)/_bigIncrement)*_bigIncrement;
+        static int recurse = 0;
+        if (recurse++ == 0) {
+            if (_bigSegmentLogLevel > 0) {
+                MemBlockPtrT::dumpInfo(_bigSegmentLogLevel);
+            }
+        }
+        recurse--;
+    }
+}
+
+template<typename MemBlockPtrT>
+void DataSegment<MemBlockPtrT>::returnBlock(void *ptr)
+{
+    size_t bId(blockId(ptr));
+    SizeClassT sc =  _blockList[bId].sizeClass();
+    size_t bsz = MemBlockPtrT::classSize(sc);
+    if (bsz >= BlockSize) {
+        size_t numBlocks = bsz / BlockSize;
+        if (numBlocks > _blockList[bId].realNumBlocks()) {
+            numBlocks = _blockList[bId].realNumBlocks();
+        }
+        assert(_blockList[bId].freeChainLength() >= numBlocks);
+        if ((_unmapSize < bsz) && _osMemory.release(ptr, numBlocks*BlockSize)) {
+            for(size_t i=0; i < numBlocks; i++) {
+                BlockT & b = _blockList[bId + i];
+                b.sizeClass(UNMAPPED_BLOCK);
+                b.freeChainLength(numBlocks - i);
+            }
+            {
+                Guard sync(_mutex);
+                _unMappedList.add(bId);
+            }
+        } else {
+            for(size_t i=0; i < numBlocks; i++) {
+                BlockT & b = _blockList[bId + i];
+                b.sizeClass(FREE_BLOCK);
+                b.freeChainLength(numBlocks - i);
+            }
+            {
+                Guard sync(_mutex);
+                _freeList.add(bId);
+            }
+        }
+    }
+}
+
+template<typename MemBlockPtrT>
+size_t DataSegment<MemBlockPtrT>::infoThread(FILE * os, int level, int thread, SizeClassT sct) const
+{
+    typedef CallGraph<typename MemBlockPtrT::Stack, 0x10000, Index> CallGraphLT;
+    size_t usedCount(0);
+    size_t checkedCount(0);
+    size_t allocatedCount(0);
+    size_t notAccounted(0);
+    std::unique_ptr<CallGraphLT> callGraph(new CallGraphLT);
+    for(size_t i=0; i <  NELEMS(_blockList); ) {
+        const BlockT & b = _blockList[i];
+        SizeClassT sc = b.sizeClass();
+        if (sc == sct) {
+            size_t sz(MemBlockPtrT::classSize(sc));
+            size_t numB(b.freeChainLength());
+            for(char *m((char *)(fromBlockId(i))), *em((char*)(fromBlockId(i+numB))); (m + sz) <= em; m += sz) {
+                MemBlockPtrT mem(m,0,false);
+                checkedCount++;
+                if (mem.allocated()) {
+                    allocatedCount++;
+                    if (mem.threadId() == thread) {
+                        usedCount++;
+                        if (usedCount < _allocs2Show) {
+                            mem.info(os, level);
+                        }
+                        if (mem.callStackLen() && mem.callStack()[0].valid()) {
+                            size_t csl(mem.callStackLen());
+                            for (size_t j(0); j < csl; j++) {
+                                if ( ! mem.callStack()[j].valid()) {
+                                    csl = j;
+                                }
+                            }
+                            if ( ! callGraph->addStack(mem.callStack(), csl)) {
+                                notAccounted++;
+                            }
+                        }
+                    }
+                }
+            }
+            i += numB;
+        } else {
+            i++;
+        }
+    }
+    fprintf(os, "\nCallTree(Checked=%ld, GlobalAlloc=%ld(%ld%%)," "ByMeAlloc=%ld(%2.2f%%) NotAccountedDue2FullGraph=%ld:\n",
+            checkedCount, allocatedCount, checkedCount ? allocatedCount*100/checkedCount : 0,
+            usedCount, checkedCount ? static_cast<double>(usedCount*100)/checkedCount : 0.0, notAccounted);
+    if ( ! callGraph->empty()) {
+        Aggregator agg;
+        DumpGraph<typename CallGraphLT::Node> dump(&agg, "{ ", " }");
+        callGraph->traverseDepth(dump);;
+        asciistream ost;
+        ost << agg;
+        fprintf(os, "%s\n", ost.c_str());
+    }
+    fprintf(os, " count(%ld)", usedCount);
+    return usedCount;
+}
+
+template<typename MemBlockPtrT>
+void DataSegment<MemBlockPtrT>::info(FILE * os, size_t level)
+{
+    fprintf(os, "Start at %p, End at %p(%p) size(%ld) partialExtension(%ld) NextLogLimit(%lx) logLevel(%ld)\n",
+            _osMemory.getStart(), _osMemory.getEnd(), sbrk(0), dataSize(), _partialExtension, _nextLogLimit, level);
+    size_t numFreeBlocks(0), numAllocatedBlocks(0);
+    {
+        // Guard sync(_mutex);
+        numFreeBlocks = _freeList.info(os, level);
+        _unMappedList.info(os, level);
+    }
+    if (level >= 1) {
+#ifdef PRINT_ALOT
+        SizeClassT oldSc(-17);
+        size_t oldChainLength(0);
+#endif
+        size_t scTable[32+NUM_ADMIN_CLASSES];
+        memset(scTable, 0, sizeof(scTable));
+        for (size_t i=0; (i < NELEMS(_blockList)) && ((i*BlockSize) < dataSize()); i++) {
+            BlockT & b = _blockList[i];
+#ifdef PRINT_ALOT
+            if ((b.sizeClass() != oldSc)
+                || ((oldChainLength < (b.freeChainLength()+1))
+                    && b.freeChainLength()))
+            {
+                scTable[b.sizeClass()+NUM_ADMIN_CLASSES] += b.freeChainLength();
+                oldSc = b.sizeClass();
+                if (level & 0x2) {
+                    fprintf(os, "Block %d at address %p with chainLength %d "
+                            "freeCount %d sizeClass %d and size %d\n",
+                            i, fromBlockId(i), b.freeChainLength(), b.freeCount(),
+                            b.sizeClass(), classSize(b.sizeClass()));
+                }
+            }
+            oldChainLength = b.freeChainLength();
+#else
+            scTable[b.sizeClass()+NUM_ADMIN_CLASSES]++;
+#endif
+        }
+        size_t numAdminBlocks(0);
+        for(size_t i=0; i < NUM_ADMIN_CLASSES; i++) {
+            if (scTable[i] != 0ul) {
+                numAllocatedBlocks += scTable[i];
+                numAdminBlocks += scTable[i];
+                fprintf(os, "SizeClass %2ld(%s) has %5ld blocks with %10lu bytes\n",
+                        i-NUM_ADMIN_CLASSES, getAdminClassName(i-NUM_ADMIN_CLASSES), scTable[i], scTable[i]*BlockSize);
+            }
+        }
+        for(size_t i=NUM_ADMIN_CLASSES; i < NELEMS(scTable); i++) {
+            if (scTable[i] != 0ul) {
+                numAllocatedBlocks += scTable[i];
+                fprintf(os, "SizeClass %2ld has %5ld blocks with %10lu bytes\n",
+                            i-NUM_ADMIN_CLASSES, scTable[i], scTable[i]*BlockSize);
+            }
+        }
+        size_t total(dataSize()/BlockSize);
+        fprintf(os, "Usage: Total=%ld(100%%), admin=%ld(%ld%%), unused=%ld(%ld%%), allocated=%ld(%ld%%)\n",
+                total*BlockSize,
+                numAdminBlocks*BlockSize, numAdminBlocks*100/total,
+                numFreeBlocks*BlockSize, numFreeBlocks*100/total,
+                (numAllocatedBlocks-numAdminBlocks)*BlockSize, (numAllocatedBlocks-numAdminBlocks)*100/total);
+    }
+}
+
+template<typename MemBlockPtrT>
+template <int MaxCount>
+DataSegment<MemBlockPtrT>::FreeListT<MaxCount>::FreeListT(BlockT * blockList) :
+    _blockList(blockList),
+    _count(0)
+{
+    for (size_t i = 0; i < NELEMS(_freeStartIndex); i++) {
+        _freeStartIndex[i] = -1;
+    }
+}
+
+template<typename MemBlockPtrT>
+template <int MaxCount>
+void DataSegment<MemBlockPtrT>::FreeListT<MaxCount>::add(size_t startIndex)
+{
+    size_t i(0);
+    size_t numBlocks(_blockList[startIndex].freeChainLength());
+    for (i=0; (i < _count) && (_freeStartIndex[i] < startIndex); i++) { }
+    size_t prevIndex(0), nextIndex(0);
+    BlockT * prev(NULL), * next(NULL);
+    if (i > 0) {
+        prevIndex = _freeStartIndex[i-1];
+        prev = & _blockList[prevIndex];
+    }
+    if (i < _count) {
+        nextIndex = _freeStartIndex[i];
+        next = & _blockList[nextIndex];
+    }
+
+    if (prev && (prevIndex + prev->freeChainLength() == startIndex)) {
+        // Join with freeChain ahead.
+        prev->freeChainLength(prev->freeChainLength() + numBlocks);
+        startIndex = prevIndex;
+    } else if (next && (startIndex + numBlocks == nextIndex)) {
+        // Join with freeChain that follows.
+        _freeStartIndex[i] = startIndex;
+        nextIndex = startIndex;
+        size_t oldNextCount = next->freeChainLength();
+        next = & _blockList[startIndex];
+        next->freeChainLength(oldNextCount + numBlocks);
+    } else {
+        // Insert.
+        for(size_t j=0; j < (_count-i); j++) {
+            _freeStartIndex[_count-j] = _freeStartIndex[_count-j-1];
+        }
+        _count++;
+        _freeStartIndex[i] = startIndex;
+    }
+
+    if (prev && next && (prevIndex + prev->freeChainLength() == nextIndex)) {
+        prev->freeChainLength(prev->freeChainLength() + next->freeChainLength());
+        _count--;
+        for(size_t j=i; j < _count; j++) {
+            _freeStartIndex[j] = _freeStartIndex[j+1];
+        }
+        _freeStartIndex[_count] = -1;
+    }
+}
+
+template<typename MemBlockPtrT>
+template <int MaxCount>
+void * DataSegment<MemBlockPtrT>::FreeListT<MaxCount>::sub(size_t numBlocks)
+{
+    void * block(NULL);
+    size_t bestFitIndex(_count);
+    int bestLeft(INT_MAX);
+    for(size_t i=0; i < _count; i++) {
+        size_t index(_freeStartIndex[i]);
+        BlockT & b = _blockList[index];
+        int left = b.freeChainLength() - numBlocks;
+        if ((left >= 0) && (left < bestLeft)) {
+            bestLeft = left;
+            bestFitIndex = i;
+        }
+    }
+    if (bestLeft != INT_MAX) {
+        block = linkOut(bestFitIndex, bestLeft);
+    }
+    return block;
+}
+
+template<typename MemBlockPtrT>
+template <int MaxCount>
+size_t DataSegment<MemBlockPtrT>::FreeListT<MaxCount>::lastBlock(size_t nextBlock)
+{
+    size_t lastIndex(0);
+    if (_count > 0) {
+        size_t index(_freeStartIndex[_count-1]);
+        BlockT & b = _blockList[index];
+        if (index + b.freeChainLength() == nextBlock) {
+            lastIndex = index;
+        }
+    }
+    return lastIndex;
+}
+
+template<typename MemBlockPtrT>
+template <int MaxCount>
+size_t DataSegment<MemBlockPtrT>::FreeListT<MaxCount>::info(FILE * os, int UNUSED(level))
+{
+    size_t freeBlockCount(0);
+    for (size_t i=0; i < _count; i++) {
+        size_t index(_freeStartIndex[i]);
+        const BlockT & b = _blockList[index];
+        freeBlockCount += b.freeChainLength();
+        fprintf(os, "Free #%3ld block #%5ld chainlength %5d size %10lu\n",
+                i, index, b.freeChainLength(), size_t(b.freeChainLength())*BlockSize);
+    }
+    return freeBlockCount;
+}
+
+template<typename MemBlockPtrT>
+template <int MaxCount>
+void * DataSegment<MemBlockPtrT>::FreeListT<MaxCount>::linkOut(size_t findex, size_t left)
+{
+    size_t index(_freeStartIndex[findex]);
+    BlockT & b = _blockList[index];
+    size_t startIndex = index + left;
+    void *block = fromBlockId(startIndex);
+    if (left > 0) {
+        b.freeChainLength(left);
+    } else {
+        _count--;
+        for(size_t j=findex; j < (_count); j++) {
+            _freeStartIndex[j] = _freeStartIndex[j+1];
+        }
+        _freeStartIndex[_count] = -1;
+    }
+    return block;
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/datasegmentd.cpp b/vespamalloc/src/vespamalloc/malloc/datasegmentd.cpp
new file mode 100644
index 00000000000..6fe34e530ed
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/datasegmentd.cpp
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/datasegment.hpp>
+#include <vespamalloc/malloc/memblockboundscheck_d.h>
+
+namespace vespamalloc {
+
+template class DataSegment<MemBlockBoundsCheck>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/datasegmentdst.cpp b/vespamalloc/src/vespamalloc/malloc/datasegmentdst.cpp
new file mode 100644
index 00000000000..e41faa56dfa
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/datasegmentdst.cpp
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/datasegment.hpp>
+#include <vespamalloc/malloc/memblockboundscheck_dst.h>
+
+namespace vespamalloc {
+
+template class DataSegment<MemBlockBoundsCheck>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/globalpool.cpp b/vespamalloc/src/vespamalloc/malloc/globalpool.cpp
new file mode 100644
index 00000000000..1bae5c76920
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/globalpool.cpp
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/globalpool.hpp>
+#include <vespamalloc/malloc/memblock.h>
+
+namespace vespamalloc {
+
+template class AllocPoolT<MemBlock>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/globalpool.h b/vespamalloc/src/vespamalloc/malloc/globalpool.h
new file mode 100644
index 00000000000..0669780b796
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/globalpool.h
@@ -0,0 +1,86 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/common.h>
+#include <vespamalloc/malloc/allocchunk.h>
+#include <vespamalloc/malloc/datasegment.h>
+#include <algorithm>
+
+#define USE_STAT2(a) a
+
+namespace vespamalloc {
+
+template <typename MemBlockPtrT>
+class AllocPoolT
+{
+public:
+    typedef AFList<MemBlockPtrT> ChunkSList;
+    AllocPoolT(DataSegment<MemBlockPtrT> & ds);
+    ~AllocPoolT();
+
+    ChunkSList *getFree(SizeClassT sc, size_t minBlocks);
+    ChunkSList *exchangeFree(SizeClassT sc, ChunkSList * csl);
+    ChunkSList *exchangeAlloc(SizeClassT sc, ChunkSList * csl);
+    ChunkSList *exactAlloc(size_t exactSize, SizeClassT sc, ChunkSList * csl) __attribute__((noinline));
+    ChunkSList *returnMemory(SizeClassT sc, ChunkSList * csl) __attribute__((noinline));
+
+    DataSegment<MemBlockPtrT> & dataSegment()      { return _dataSegment; }
+    void enableThreadSupport() __attribute__((noinline));
+
+    static void setParams(size_t alwaysReuseLimit, size_t threadCacheLimit) {
+        _alwaysReuseLimit = alwaysReuseLimit;
+        _threadCacheLimit = threadCacheLimit;
+    }
+
+    void info(FILE * os, size_t level=0) __attribute__((noinline));
+private:
+    ChunkSList * getFree(SizeClassT sc) __attribute__((noinline));
+    ChunkSList * getAlloc(SizeClassT sc) __attribute__((noinline));
+    ChunkSList * malloc(const Guard & guard, SizeClassT sc) __attribute__((noinline));
+    ChunkSList * getChunks(const Guard & guard, size_t numChunks) __attribute__((noinline));
+    ChunkSList * allocChunkList(const Guard & guard) __attribute__((noinline));
+    AllocPoolT(const AllocPoolT & ap);
+    AllocPoolT & operator = (const AllocPoolT & ap);
+
+    class AllocFree
+    {
+    public:
+        AllocFree() : _full(), _empty() { }
+        typename ChunkSList::HeadPtr _full;
+        typename ChunkSList::HeadPtr _empty;
+    };
+    class Stat
+    {
+    public:
+        Stat() : _getAlloc(0),
+                 _getFree(0),
+                 _exchangeAlloc(0),
+                 _exchangeFree(0),
+                 _exactAlloc(0),
+                 _return(0),_malloc(0) { }
+        size_t _getAlloc;
+        size_t _getFree;
+        size_t _exchangeAlloc;
+        size_t _exchangeFree;
+        size_t _exactAlloc;
+        size_t _return;
+        size_t _malloc;
+        bool isUsed()       const {
+            // Do not count _getFree.
+            return (_getAlloc || _exchangeAlloc || _exchangeFree || _exactAlloc || _return || _malloc);
+        }
+    };
+
+    Mutex                       _mutex;
+    ChunkSList                * _chunkPool;
+    AllocFree                   _scList[NUM_SIZE_CLASSES] VESPALIB_ATOMIC_TAGGEDPTR_ALIGNMENT;
+    DataSegment<MemBlockPtrT> & _dataSegment;
+    size_t                      _getChunks;
+    size_t                      _getChunksSum;
+    size_t                      _allocChunkList;
+    Stat                        _stat[NUM_SIZE_CLASSES];
+    static size_t               _threadCacheLimit __attribute__((visibility("hidden")));
+    static size_t               _alwaysReuseLimit __attribute__((visibility("hidden")));
+};
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/globalpool.hpp b/vespamalloc/src/vespamalloc/malloc/globalpool.hpp
new file mode 100644
index 00000000000..b620c388fb6
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/globalpool.hpp
@@ -0,0 +1,272 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/globalpool.h>
+
+#define USE_STAT2(a) a
+
+using vespalib::Atomic;
+
+namespace vespamalloc {
+
+template <typename MemBlockPtrT>
+size_t AllocPoolT<MemBlockPtrT>::_threadCacheLimit __attribute__((visibility("hidden"))) = 0x10000;
+template <typename MemBlockPtrT>
+size_t AllocPoolT<MemBlockPtrT>::_alwaysReuseLimit __attribute__((visibility("hidden"))) = 0x200000;
+
+template <typename MemBlockPtrT>
+AllocPoolT<MemBlockPtrT>::AllocPoolT(DataSegment<MemBlockPtrT> & ds)
+    : _chunkPool(NULL),
+      _dataSegment(ds),
+      _getChunks(0),
+      _getChunksSum(0),
+      _allocChunkList(0)
+{
+    ChunkSList::init();
+    memset(_scList, 0, sizeof(_scList));
+}
+
+template <typename MemBlockPtrT>
+AllocPoolT<MemBlockPtrT>::~AllocPoolT()
+{
+}
+
+template <typename MemBlockPtrT>
+void AllocPoolT<MemBlockPtrT>::enableThreadSupport()
+{
+    ChunkSList::enableThreadSupport();
+    _mutex.init();
+}
+
+template <typename MemBlockPtrT>
+typename AllocPoolT<MemBlockPtrT>::ChunkSList *
+AllocPoolT<MemBlockPtrT>::getFree(SizeClassT sc)
+{
+    typename ChunkSList::HeadPtr & empty = _scList[sc]._empty;
+    ChunkSList * csl(NULL);
+    while ((csl = ChunkSList::linkOut(empty)) == NULL) {
+        Guard sync(_mutex);
+        if (empty._ptr == NULL) {
+            ChunkSList * ncsl(getChunks(sync, 1));
+            if (ncsl) {
+                ChunkSList::linkInList(empty, ncsl);
+            } else {
+                assert(ncsl != NULL);
+                return NULL;
+            }
+        }
+    }
+    PARANOID_CHECK1( if ( !csl->empty()) { *(int*)0 = 0; } );
+    return csl;
+}
+
+template <typename MemBlockPtrT>
+typename AllocPoolT<MemBlockPtrT>::ChunkSList *
+AllocPoolT<MemBlockPtrT>::getAlloc(SizeClassT sc)
+{
+    ChunkSList * csl(NULL);
+    typename ChunkSList::HeadPtr & full = _scList[sc]._full;
+    while ((csl = ChunkSList::linkOut(full)) == NULL) {
+        Guard sync(_mutex);
+        if (full._ptr == NULL) {
+            ChunkSList * ncsl(malloc(sync, sc));
+            if (ncsl) {
+                ChunkSList::linkInList(full, ncsl);
+            } else {
+                return NULL;
+            }
+        }
+        USE_STAT2(Atomic::postInc(&_stat[sc]._getAlloc));
+    }
+    PARANOID_CHECK1( if (csl->empty() || (csl->count() > ChunkSList::NumBlocks)) { *(int*)0 = 0; } );
+    return csl;
+}
+
+template <typename MemBlockPtrT>
+typename AllocPoolT<MemBlockPtrT>::ChunkSList *
+AllocPoolT<MemBlockPtrT>::getFree(SizeClassT sc, size_t UNUSED(minBlocks))
+{
+    ChunkSList * csl = getFree(sc);
+    USE_STAT2(Atomic::postInc(&_stat[sc]._getFree));
+    return csl;
+}
+
+template <typename MemBlockPtrT>
+typename AllocPoolT<MemBlockPtrT>::ChunkSList *
+AllocPoolT<MemBlockPtrT>::exchangeFree(SizeClassT sc, typename AllocPoolT<MemBlockPtrT>::ChunkSList * csl)
+{
+    PARANOID_CHECK1( if (csl->empty() || (csl->count() > ChunkSList::NumBlocks)) { *(int*)0 = 0; } );
+    AllocFree & af = _scList[sc];
+    ChunkSList::linkIn(af._full, csl, csl);
+    ChunkSList *ncsl = getFree(sc);
+    USE_STAT2(Atomic::postInc(&_stat[sc]._exchangeFree));
+    return ncsl;
+}
+
+template <typename MemBlockPtrT>
+typename AllocPoolT<MemBlockPtrT>::ChunkSList *
+AllocPoolT<MemBlockPtrT>::exchangeAlloc(SizeClassT sc, typename AllocPoolT<MemBlockPtrT>::ChunkSList * csl)
+{
+    PARANOID_CHECK1( if ( ! csl->empty()) { *(int*)0 = 0; } );
+    AllocFree & af = _scList[sc];
+    ChunkSList::linkIn(af._empty, csl, csl);
+    ChunkSList * ncsl = getAlloc(sc);
+    USE_STAT2(Atomic::postInc(&_stat[sc]._exchangeAlloc));
+    PARANOID_CHECK1( if (ncsl->empty() || (ncsl->count() > ChunkSList::NumBlocks)) { *(int*)0 = 0; } );
+    return ncsl;
+}
+
+template <typename MemBlockPtrT>
+typename AllocPoolT<MemBlockPtrT>::ChunkSList *
+AllocPoolT<MemBlockPtrT>::exactAlloc(size_t exactSize, SizeClassT sc,
+                                     typename AllocPoolT<MemBlockPtrT>::ChunkSList * csl)
+{
+    size_t adjustedSize((( exactSize + (_alwaysReuseLimit - 1))/_alwaysReuseLimit)*_alwaysReuseLimit);
+    void *exactBlock = _dataSegment.getBlock(adjustedSize, sc);
+    MemBlockPtrT mem(exactBlock, MemBlockPtrT::unAdjustSize(adjustedSize));
+    csl->add(mem);
+    ChunkSList * ncsl = csl;
+    USE_STAT2(Atomic::postInc(&_stat[sc]._exactAlloc));
+    mem.logBigBlock(exactSize, mem.adjustSize(exactSize), MemBlockPtrT::classSize(sc));
+    PARANOID_CHECK1( if (ncsl->empty() || (ncsl->count() > ChunkSList::NumBlocks)) { *(int*)0 = 0; } );
+    return ncsl;
+}
+
+template <typename MemBlockPtrT>
+typename AllocPoolT<MemBlockPtrT>::ChunkSList *
+AllocPoolT<MemBlockPtrT>::returnMemory(SizeClassT sc,
+                                       typename AllocPoolT<MemBlockPtrT>::ChunkSList * csl)
+{
+    ChunkSList * completelyEmpty(NULL);
+#if 0
+    completelyEmpty = exchangeFree(sc, csl);
+#else
+    for(; !csl->empty(); ) {
+        MemBlockPtrT mem;
+        csl->sub(mem);
+        mem.logBigBlock(mem.size(), mem.adjustSize(mem.size()), MemBlockPtrT::classSize(sc));
+        _dataSegment.returnBlock(mem.rawPtr());
+    }
+    completelyEmpty = csl;
+#endif
+    USE_STAT2(Atomic::postInc(&_stat[sc]._return));
+    return completelyEmpty;
+}
+
+template <typename MemBlockPtrT>
+typename AllocPoolT<MemBlockPtrT>::ChunkSList *
+AllocPoolT<MemBlockPtrT>::malloc(const Guard & guard, SizeClassT sc)
+{
+    const size_t numShifts =
+        (sc <= MemBlockPtrT::SizeClassSpan) ? (MemBlockPtrT::SizeClassSpan - sc) : 0;
+    size_t numBlocks = 1 << numShifts;
+    const size_t cs(MemBlockPtrT::classSize(sc));
+    size_t blockSize = cs * numBlocks;
+    void * block = _dataSegment.getBlock(blockSize, sc);
+    ChunkSList * csl(NULL);
+    if (block != NULL) {
+        numBlocks = (blockSize + cs - 1)/cs;
+        const size_t blocksPerChunk(std::max(1, std::min(int(ChunkSList::NumBlocks),
+                                                         int(_threadCacheLimit >> (MemBlockPtrT::MinClassSize + sc)))));
+
+        const size_t numChunks = (numBlocks+(blocksPerChunk-1))/blocksPerChunk;
+        csl = getChunks(guard, numChunks);
+        if (csl != NULL) {
+            char *first = (char *) block;
+            const size_t itemSize = cs;
+            size_t numItems(0);
+            const size_t maxItems(blockSize/itemSize);
+            ChunkSList * curr = csl;
+            for ( ; curr->getNext() && (numItems < maxItems); curr = curr->getNext()) {
+                PARANOID_CHECK1( if ( ! curr->empty()) { *(int*)0 = 0; } );
+                numItems += curr->fill(first + numItems*itemSize, sc, blocksPerChunk);
+            }
+            if (numItems < maxItems) {
+                PARANOID_CHECK1( if ( ! curr->empty()) { *(int*)0 = 0; } );
+                PARANOID_CHECK1( if (numItems + blocksPerChunk < maxItems) { *(int*)1 = 1; } );
+                numItems += curr->fill(first + numItems*itemSize, sc, maxItems - numItems);
+            }
+            // Can not add empty objects to list
+            PARANOID_CHECK1( if (curr->empty()) { *(int*)0 = 0; } );
+            // There must not be empty objects in list.
+            PARANOID_CHECK1( if (curr->getNext()) { *(int*)1 = 1; } );
+        }
+    }
+    PARANOID_CHECK1( for (ChunkSList * c(csl); c; c = c->getNext()) { if (c->empty()) { *(int*)1 = 1; } } );
+    USE_STAT2(Atomic::postInc(&_stat[sc]._malloc));
+    return csl;
+}
+
+template <typename MemBlockPtrT>
+typename AllocPoolT<MemBlockPtrT>::ChunkSList *
+AllocPoolT<MemBlockPtrT>::getChunks(const Guard & guard, size_t numChunks)
+{
+    ChunkSList * csl(_chunkPool);
+    ChunkSList * prev(csl);
+    bool enough(true);
+    for (size_t i=0; enough && (i < numChunks); i++, csl = csl->getNext()) {
+        if (csl == NULL) {
+            csl = allocChunkList(guard);
+            enough = (csl != NULL);
+            if (prev) {
+                prev->setNext(csl);
+            } else {
+                _chunkPool = csl;
+            }
+        }
+        prev = csl;
+    }
+    if (enough) {
+        csl = _chunkPool;
+        _chunkPool = prev->getNext();
+        prev->setNext(NULL);
+    } else {
+        csl = NULL;
+    }
+    USE_STAT2(Atomic::postInc(&_getChunks));
+    USE_STAT2(_getChunksSum+=numChunks);
+    PARANOID_CHECK1( for (ChunkSList * c(csl); c; c = c->getNext()) { if ( ! c->empty()) { *(int*)1 = 1; } } );
+    return csl;
+}
+
+template <typename MemBlockPtrT>
+typename AllocPoolT<MemBlockPtrT>::ChunkSList *
+AllocPoolT<MemBlockPtrT>::allocChunkList(const Guard & guard)
+{
+    (void) guard;
+    size_t blockSize(sizeof(ChunkSList)*0x2000);
+    void * block = _dataSegment.getBlock(blockSize, _dataSegment.SYSTEM_BLOCK);
+    ChunkSList * newList(NULL);
+    if (block != NULL) {
+        size_t chunksInBlock(blockSize/sizeof(ChunkSList));
+        newList = new (block) ChunkSList[chunksInBlock];
+        for (size_t j=0; j < (chunksInBlock-1); j++) {
+            newList[j].setNext(newList+j+1);
+        }
+        newList[chunksInBlock-1].setNext(NULL);
+    }
+    USE_STAT2(Atomic::postInc(&_allocChunkList));
+    return newList;
+}
+
+template <typename MemBlockPtrT>
+void AllocPoolT<MemBlockPtrT>::info(FILE * os, size_t level)
+{
+    if (level > 0) {
+        fprintf(os, "GlobalPool getChunks(%ld, %ld) allocChunksList(%ld):\n",
+                _getChunks, _getChunksSum, _allocChunkList);
+        for (size_t i = 0; i < NELEMS(_stat); i++) {
+            const Stat & s = _stat[i];
+            if (s.isUsed()) {
+                fprintf(os, "SC %2ld(%10ld) GetAlloc(%6ld) GetFree(%6ld) "
+                            "ExChangeAlloc(%6ld) ExChangeFree(%6ld) ExactAlloc(%6ld) "
+                            "Returned(%6ld) Malloc(%6ld)\n",
+                            i, MemBlockPtrT::classSize(i), s._getAlloc, s._getFree,
+                            s._exchangeAlloc, s._exchangeFree, s._exactAlloc,
+                            s._return, s._malloc);
+            }
+        }
+    }
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/globalpoold.cpp b/vespamalloc/src/vespamalloc/malloc/globalpoold.cpp
new file mode 100644
index 00000000000..e6b3afc0ca6
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/globalpoold.cpp
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/globalpool.hpp>
+#include <vespamalloc/malloc/memblockboundscheck_d.h>
+
+namespace vespamalloc {
+
+template class AllocPoolT<MemBlockBoundsCheck>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/globalpooldst.cpp b/vespamalloc/src/vespamalloc/malloc/globalpooldst.cpp
new file mode 100644
index 00000000000..e61ea65a0ed
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/globalpooldst.cpp
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/globalpool.hpp>
+#include <vespamalloc/malloc/memblockboundscheck_dst.h>
+
+namespace vespamalloc {
+
+template class AllocPoolT<MemBlockBoundsCheck>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/malloc.cpp b/vespamalloc/src/vespamalloc/malloc/malloc.cpp
new file mode 100644
index 00000000000..89d7fdd5937
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/malloc.cpp
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/malloc.h>
+#include <vespamalloc/malloc/memorywatcher.h>
+#include <vespamalloc/malloc/memblock.h>
+#include <vespamalloc/malloc/stat.h>
+
+namespace vespamalloc {
+
+typedef ThreadListT<MemBlock, NoStat> ThreadList;
+typedef MemoryWatcher<MemBlock, ThreadList> Allocator;
+
+static char _Gmem[sizeof(Allocator)];
+static Allocator *_GmemP = NULL;
+
+static Allocator * createAllocator()
+{
+    if (_GmemP == NULL) {
+        _GmemP = (Allocator *)1;
+        _GmemP = new (_Gmem) Allocator(-1, 0x7fffffffffffffffl);
+    }
+    return _GmemP;
+}
+
+template <>
+void MemBlock::
+dumpInfo(size_t level)
+{
+    _GmemP->info(_logFile, level);
+}
+
+}
+
+#include <vespamalloc/malloc/overload.h>
diff --git a/vespamalloc/src/vespamalloc/malloc/malloc.h b/vespamalloc/src/vespamalloc/malloc/malloc.h
new file mode 100644
index 00000000000..07e14217304
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/malloc.h
@@ -0,0 +1,227 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/common.h>
+#include <vespamalloc/malloc/datasegment.h>
+#include <vespamalloc/malloc/allocchunk.h>
+#include <vespamalloc/malloc/globalpool.h>
+#include <vespamalloc/malloc/threadpool.h>
+#include <vespamalloc/malloc/threadlist.h>
+#include <vespamalloc/malloc/threadproxy.h>
+
+namespace vespamalloc {
+
+template <typename MemBlockPtrT, typename ThreadListT>
+class MemoryManager : public IAllocator
+{
+public:
+    MemoryManager(size_t logLimitAtStart);
+    virtual ~MemoryManager();
+    virtual bool initThisThread();
+    virtual bool quitThisThread();
+    virtual void enableThreadSupport();
+    virtual void setReturnAddressStop(const void * returnAddressStop) { MemBlockPtrT::Stack::setStopAddress(returnAddressStop); }
+    virtual size_t getMaxNumThreads() const { return _threadList.getMaxNumThreads(); }
+
+    void *malloc(size_t sz);
+    void *realloc(void *oldPtr, size_t sz);
+    void free(void *ptr) { freeSC(ptr, _segment.sizeClass(ptr)); }
+    void free(void *ptr, size_t sz) { freeSC(ptr, MemBlockPtrT::sizeClass(sz)); }
+    size_t getMinSizeForAlignment(size_t align, size_t sz) const { return MemBlockPtrT::getMinSizeForAlignment(align, sz); }
+    size_t sizeClass(const void *ptr) const { return _segment.sizeClass(ptr); }
+
+    void *calloc(size_t nelm, size_t esz) {
+        void * ptr = malloc(nelm * esz);
+        if (ptr) {
+            memset(ptr, 0, nelm * esz);
+        }
+        return ptr;
+    }
+
+    void info(FILE * os, size_t level=0) __attribute__ ((noinline));
+
+    void setupSegmentLog(size_t noMemLogLevel,
+                         size_t bigMemLogLevel,
+                         size_t bigLimit,
+                         size_t bigIncrement,
+                         size_t allocs2Show)
+    {
+        _segment.setupLog(noMemLogLevel, bigMemLogLevel, bigLimit, bigIncrement, allocs2Show);
+    }
+    void setupLog(size_t doubleDelete, size_t invalidMem, size_t prAllocLimit) {
+        _doubleDeleteLogLevel = doubleDelete;
+        _invalidMemLogLevel = invalidMem;
+        _prAllocLimit = prAllocLimit;
+    }
+    void setParams(size_t alwayReuseLimit, size_t threadCacheLimit) {
+        _threadList.setParams(alwayReuseLimit, threadCacheLimit);
+        _allocPool.setParams(alwayReuseLimit, threadCacheLimit);
+    }
+private:
+    void freeSC(void *ptr, SizeClassT sc);
+    void crash() __attribute__((noinline));;
+    typedef AllocPoolT<MemBlockPtrT> AllocPool;
+    typedef typename ThreadListT::ThreadPool  ThreadPool;
+    size_t                     _doubleDeleteLogLevel;
+    size_t                     _invalidMemLogLevel;
+    size_t                     _prAllocLimit;
+    DataSegment<MemBlockPtrT>  _segment;
+    AllocPool                  _allocPool;
+    ThreadListT                _threadList;
+};
+
+template <typename MemBlockPtrT, typename ThreadListT>
+MemoryManager<MemBlockPtrT, ThreadListT>::MemoryManager(size_t logLimitAtStart) :
+    IAllocator(),
+    _doubleDeleteLogLevel(1),
+    _invalidMemLogLevel(1),
+    _prAllocLimit(logLimitAtStart),
+    _segment(),
+    _allocPool(_segment),
+    _threadList(_allocPool)
+{
+    setAllocatorForThreads(this);
+    initThisThread();
+    Mutex::allowRecursion();
+}
+
+template <typename MemBlockPtrT, typename ThreadListT>
+MemoryManager<MemBlockPtrT, ThreadListT>::~MemoryManager()
+{
+}
+
+template <typename MemBlockPtrT, typename ThreadListT>
+bool MemoryManager<MemBlockPtrT, ThreadListT>::initThisThread()
+{
+    bool retval(_threadList.initThisThread());
+    if ( retval ) {
+        // ThreadPool & tp = _threadList.getCurrent();
+        // tp.init(_threadList.getThreadId());
+    } else {
+        abort();
+    }
+    return retval;
+}
+
+template <typename MemBlockPtrT, typename ThreadListT>
+bool MemoryManager<MemBlockPtrT, ThreadListT>::quitThisThread()
+{
+    return _threadList.quitThisThread();
+}
+
+template <typename MemBlockPtrT, typename ThreadListT>
+void MemoryManager<MemBlockPtrT, ThreadListT>::enableThreadSupport()
+{
+    _segment.enableThreadSupport();
+    _allocPool.enableThreadSupport();
+    _threadList.enableThreadSupport();
+}
+
+template <typename MemBlockPtrT, typename ThreadListT>
+void MemoryManager<MemBlockPtrT, ThreadListT>::crash()
+{
+    fprintf(stderr, "vespamalloc detected unrecoverable error.\n");
+#if 0
+    if (_invalidMemLogLevel > 0) {
+        static size_t numRecurse=0;
+        if (numRecurse++ == 0) {
+            MemBlockPtrT::dumpInfo(_invalidMemLogLevel);
+        }
+        numRecurse--;
+    }
+    sleep(1);
+#else
+    abort();
+#endif
+}
+
+template <typename MemBlockPtrT, typename ThreadListT>
+void MemoryManager<MemBlockPtrT, ThreadListT>::info(FILE * os, size_t level)
+{
+    fprintf(os, "DataSegment at %p(%ld), AllocPool at %p(%ld), ThreadList at %p(%ld)\n",
+            &_segment, sizeof(_segment), &_allocPool, sizeof(_allocPool),
+            &_threadList, sizeof(_threadList));
+    _segment.info(os, level);
+    _allocPool.info(os, level);
+    _threadList.info(os, level);
+    fflush(os);
+}
+
+template <typename MemBlockPtrT, typename ThreadListT>
+void * MemoryManager<MemBlockPtrT, ThreadListT>::malloc(size_t sz)
+{
+    MemBlockPtrT mem;
+    ThreadPool & tp = _threadList.getCurrent();
+    tp.malloc(mem.adjustSize(sz), mem);
+    if (!mem.validFree()) {
+        fprintf(stderr, "Memory %p(%ld) has been tampered with after free.\n", mem.ptr(), mem.size());
+        crash();
+    }
+    PARANOID_CHECK2(if (!mem.validFree() && mem.ptr()) { crash(); } );
+    mem.setExact(sz);
+    mem.alloc(_prAllocLimit<=sz);
+    return mem.ptr();
+}
+
+template <typename MemBlockPtrT, typename ThreadListT>
+void MemoryManager<MemBlockPtrT, ThreadListT>::freeSC(void *ptr, SizeClassT sc)
+{
+    if (MemBlockPtrT::verifySizeClass(sc)) {
+        ThreadPool & tp = _threadList.getCurrent();
+        MemBlockPtrT mem(ptr);
+        mem.readjustAlignment(_segment);
+        if (mem.validAlloc()) {
+            mem.free();
+            tp.free(mem, sc);
+        } else if (mem.validFree()) {
+            fprintf(stderr, "Already deleted %p(%ld).\n", mem.ptr(), mem.size());
+            // MemBlockPtrT::dumpInfo(_doubleDeleteLogLevel);
+            crash();
+        } else {
+            fprintf(stderr, "Someone has tamper with my pre/post signatures of my memoryblock %p(%ld).\n", mem.ptr(), mem.size());
+            crash();
+        }
+    } else {
+        fprintf(stderr, "%p not allocated here, can not be freed\n", ptr);
+        crash();
+    }
+}
+
+template <typename MemBlockPtrT, typename ThreadListT>
+void * MemoryManager<MemBlockPtrT, ThreadListT>::realloc(void *oldPtr, size_t sz)
+{
+    void *ptr(NULL);
+    if (oldPtr) {
+        MemBlockPtrT mem(oldPtr);
+        mem.readjustAlignment(_segment);
+        if (! mem.validAlloc()) {
+            fprintf(stderr, "Someone has tamper with my pre/post signatures of my memoryblock %p(%ld).\n", mem.ptr(), mem.size());
+            crash();
+        }
+        SizeClassT sc(_segment.sizeClass(oldPtr));
+        if (sc >= 0) {
+            size_t oldSz(_segment.getMaxSize(oldPtr));
+            if (sz > oldSz) {
+                ptr = malloc(sz);
+                if (ptr) {
+                    memcpy(ptr, oldPtr, oldSz);
+                    free(oldPtr);
+                }
+            } else {
+                mem.setExact(sz);
+                ptr = oldPtr;
+            }
+        } else {
+            ptr = malloc(sz);
+            if (ptr) {
+                memcpy(ptr, oldPtr, sz);
+            }
+        }
+    } else {
+        ptr = malloc(sz);
+    }
+    PARANOID_CHECK2( { MemBlockPtrT mem(ptr); mem.readjustAlignment(_segment); if (! mem.validAlloc()) { crash(); } });
+    return ptr;
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/mallocd.cpp b/vespamalloc/src/vespamalloc/malloc/mallocd.cpp
new file mode 100644
index 00000000000..8357ff8711d
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/mallocd.cpp
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/mallocd.h>
+#include <vespamalloc/malloc/memblockboundscheck_d.h>
+
+namespace vespamalloc {
+
+typedef ThreadListT<MemBlockBoundsCheck, Stat> ThreadList;
+typedef MemoryWatcher<MemBlockBoundsCheck, ThreadList> Allocator;
+
+static char _Gmem[sizeof(Allocator)];
+static Allocator *_GmemP = NULL;
+
+static Allocator * createAllocator()
+{
+    if (_GmemP == NULL) {
+        _GmemP = new (_Gmem) Allocator(-1, 0x7fffffffffffffffl);
+    }
+    return _GmemP;
+}
+
+template <size_t MaxSizeClassMultiAllocC, size_t StackTraceLen>
+void MemBlockBoundsCheckBaseT<MaxSizeClassMultiAllocC, StackTraceLen>::
+dumpInfo(size_t level)
+{
+    _GmemP->info(_logFile, level);
+}
+
+template void MemBlockBoundsCheckBaseT<20, 0>::dumpInfo(size_t);
+
+}
+
+#include <vespamalloc/malloc/overload.h>
diff --git a/vespamalloc/src/vespamalloc/malloc/mallocd.h b/vespamalloc/src/vespamalloc/malloc/mallocd.h
new file mode 100644
index 00000000000..d716225c798
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/mallocd.h
@@ -0,0 +1,13 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/memblockboundscheck.h>
+#include <vespamalloc/malloc/malloc.h>
+#include <vespamalloc/malloc/memorywatcher.h>
+#include <vespamalloc/util/callstack.h>
+#include <vespamalloc/malloc/stat.h>
+
+namespace vespamalloc {
+
+} // namespace vespamalloc
+
diff --git a/vespamalloc/src/vespamalloc/malloc/mallocdst.h b/vespamalloc/src/vespamalloc/malloc/mallocdst.h
new file mode 100644
index 00000000000..a78b6aac104
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/mallocdst.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/mallocd.h>
+#include <vespamalloc/malloc/memblockboundscheck_dst.h>
+
+namespace vespamalloc {
+
+typedef ThreadListT<MemBlockBoundsCheck, Stat> ThreadList;
+typedef MemoryWatcher<MemBlockBoundsCheck, ThreadList> Allocator;
+
+static char _Gmem[sizeof(Allocator)];
+static Allocator *_GmemP = NULL;
+
+template <size_t MaxSizeClassMultiAllocC, size_t StackTraceLen>
+void MemBlockBoundsCheckBaseT<MaxSizeClassMultiAllocC, StackTraceLen>::dumpInfo(size_t level)
+{
+    fprintf(_logFile, "mallocdst dumping at level %ld\n", level);
+    _GmemP->info(_logFile, level);
+}
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/malloc/mallocdst16.cpp b/vespamalloc/src/vespamalloc/malloc/mallocdst16.cpp
new file mode 100644
index 00000000000..f512e4d4c27
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/mallocdst16.cpp
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/mallocdst.h>
+
+namespace vespamalloc {
+
+static Allocator * createAllocator()
+{
+    if (_GmemP == NULL) {
+        _GmemP = new (_Gmem) Allocator(1, 0x200000);
+    }
+    return _GmemP;
+}
+
+class DumpAtEnd
+{
+public:
+    ~DumpAtEnd();
+};
+
+DumpAtEnd::~DumpAtEnd()
+{
+    fprintf(stderr, "mallocdst dumping at end\n");
+    _GmemP->info(stderr, 2);
+}
+
+static DumpAtEnd _Gdumper;
+
+template void MemBlockBoundsCheckBaseT<20, 16>::dumpInfo(size_t);
+
+}
+
+#include <vespamalloc/malloc/overload.h>
diff --git a/vespamalloc/src/vespamalloc/malloc/mallocdst16_nl.cpp b/vespamalloc/src/vespamalloc/malloc/mallocdst16_nl.cpp
new file mode 100644
index 00000000000..24a04ed5b25
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/mallocdst16_nl.cpp
@@ -0,0 +1,18 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/mallocdst.h>
+
+namespace vespamalloc {
+
+static Allocator * createAllocator()
+{
+    if (_GmemP == NULL) {
+        _GmemP = new (_Gmem) Allocator(1, 0x7fffffffffffffffl);
+    }
+    return _GmemP;
+}
+
+template void MemBlockBoundsCheckBaseT<20, 16>::dumpInfo(size_t);
+
+}
+
+#include <vespamalloc/malloc/overload.h>
diff --git a/vespamalloc/src/vespamalloc/malloc/memblock.cpp b/vespamalloc/src/vespamalloc/malloc/memblock.cpp
new file mode 100644
index 00000000000..499e0821294
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memblock.cpp
@@ -0,0 +1,8 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/memblock.hpp>
+
+namespace vespamalloc {
+
+template class MemBlockT<5, 20>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/memblock.h b/vespamalloc/src/vespamalloc/malloc/memblock.h
new file mode 100644
index 00000000000..0779ec1a865
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memblock.h
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/util/callstack.h>
+#include <vespamalloc/malloc/common.h>
+#include <stdio.h>
+
+namespace vespamalloc {
+
+template <size_t MinSizeClassC, size_t MaxSizeClassMultiAllocC>
+class MemBlockT : public CommonT<MinSizeClassC>
+{
+    static const size_t MAX_ALIGN= 0x200000ul;
+public:
+    typedef StackEntry<StackReturnEntry> Stack;
+    enum {
+        MaxSizeClassMultiAlloc = MaxSizeClassMultiAllocC,
+        SizeClassSpan = (MaxSizeClassMultiAllocC-MinSizeClassC)
+    };
+    MemBlockT() : _ptr(NULL) { }
+    MemBlockT(void * p) : _ptr(p) { }
+    MemBlockT(void * p, size_t /*sz*/) : _ptr(p) { }
+    MemBlockT(void * p, size_t, bool) : _ptr(p) { }
+    template<typename T>
+    void readjustAlignment(const T & segment)  { (void) segment; }
+    void *rawPtr()            { return _ptr; }
+    void *ptr()               { return _ptr; }
+    const void *ptr()   const { return _ptr; }
+    bool validAlloc()   const { return true; }
+    bool validFree()    const { return true; }
+    void setExact(size_t )    { }
+    void alloc(bool )         { }
+    void setThreadId(int )    { }
+    void free()               { }
+    size_t size()       const { return 0; }
+    bool allocated()    const { return false; }
+    int threadId()      const { return 0; }
+    void info(FILE *, unsigned level=0) const  { (void) level; }
+    Stack * callStack()                   { return NULL; }
+    size_t callStackLen()           const { return 0; }
+    void fillMemory(size_t)               { }
+    void logBigBlock(size_t exact, size_t adjusted, size_t gross) const __attribute__((noinline));
+
+    static size_t adjustSize(size_t sz)   { return sz; }
+    static size_t unAdjustSize(size_t sz) { return sz; }
+    static void dumpInfo(size_t level);
+    static void dumpFile(FILE * fp)       { _logFile = fp; }
+    static void bigBlockLimit(size_t lim) { _bigBlockLimit = lim; }
+    static void setFill(uint8_t ) { }
+    static bool verifySizeClass(int sc) { (void) sc; return true; }
+    static size_t getMinSizeForAlignment(size_t align, size_t sz) {
+        return (sz < MAX_ALIGN)
+                   ? std::max(sz, align)
+                   : (align < MAX_ALIGN) ? sz : sz + align;
+    }
+private:
+    void * _ptr;
+    static FILE *_logFile;
+    static size_t _bigBlockLimit;
+};
+
+typedef MemBlockT<5, 20> MemBlock;
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/malloc/memblock.hpp b/vespamalloc/src/vespamalloc/malloc/memblock.hpp
new file mode 100644
index 00000000000..50cf3356cc0
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memblock.hpp
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/memblock.h>
+
+namespace vespamalloc {
+
+template <size_t MinSizeClassC, size_t MaxSizeClassMultiAllocC>
+void
+MemBlockT<MinSizeClassC, MaxSizeClassMultiAllocC>::logBigBlock(size_t exact, size_t adjusted, size_t gross) const
+{
+    size_t sz(exact);
+    if (std::max(std::max(sz, adjusted), gross) > _bigBlockLimit) {
+        Stack st[32];
+        size_t count = Stack::fillStack(st, NELEMS(st));
+        fprintf(_logFile, "validating %p(%ld, %ld, %ld)",
+                ptr(), sz, adjusted, gross);
+        st[3].info(_logFile);
+        fprintf(_logFile, "\n");
+        for(size_t i=1; (i < count) && (i < NELEMS(st)); i++) {
+            const Stack & s = st[i];
+            if (s.valid()) {
+                s.info(_logFile);
+                fprintf(_logFile, " from ");
+            }
+        }
+        fprintf(_logFile, "\n");
+    }
+}
+
+template <size_t MinSizeClassC, size_t MaxSizeClassMultiAllocC>
+FILE * MemBlockT<MinSizeClassC, MaxSizeClassMultiAllocC>::_logFile = stderr;
+template <size_t MinSizeClassC, size_t MaxSizeClassMultiAllocC>
+size_t MemBlockT<MinSizeClassC, MaxSizeClassMultiAllocC>::_bigBlockLimit = 0x80000000;
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.cpp b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.cpp
new file mode 100644
index 00000000000..8dcf14ee705
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.cpp
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#ifndef _VESPAMALLOC_MALLOC_MEMBLOCKBOUNDSCHECK_HPP_
+#define _VESPAMALLOC_MALLOC_MEMBLOCKBOUNDSCHECK_HPP_
+
+#include <vespamalloc/malloc/memblockboundscheck.h>
+
+namespace vespamalloc {
+
+FILE *  MemBlockBoundsCheckBaseTBase::_logFile = stderr;
+size_t  MemBlockBoundsCheckBaseTBase::_bigBlockLimit = 0x80000000;
+uint8_t MemBlockBoundsCheckBaseTBase::_fillValue = MemBlockBoundsCheckBaseTBase::NO_FILL;
+
+void MemBlockBoundsCheckBaseTBase::verifyFill() const
+{
+    const uint8_t *c(static_cast<const uint8_t *>(ptr())), *e(c+size());
+    for(;(c < e) && (*c == _fillValue); c++) { }
+    if (c != e) {
+        fprintf(_logFile, "Incorrect fillvalue (%2x) instead of (%2x) at position %ld of %ld\n", *c, _fillValue, c - static_cast<const uint8_t *>(ptr()), size());
+        abort();
+    }
+}
+
+void MemBlockBoundsCheckBaseTBase::logBigBlock(size_t exact, size_t adjusted, size_t gross) const
+{
+    size_t sz(exact);
+    if (sz > _bigBlockLimit) {
+        Stack st[32];
+        size_t count = Stack::fillStack(st, NELEMS(st));
+        fprintf(_logFile, "validating %p(%ld, %ld, %ld)",
+                ptr(), sz, adjusted, gross);
+        st[3].info(_logFile);
+        fprintf(_logFile, "\n");
+        for(size_t i=1; (i < count) && (i < NELEMS(st)); i++) {
+            const Stack & s = st[i];
+            if (s.valid()) {
+                s.info(_logFile);
+                fprintf(_logFile, " from ");
+            }
+        }
+        fprintf(_logFile, "\n");
+    }
+}
+
+
+} // namespace vespamalloc
+
+#endif
diff --git a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h
new file mode 100644
index 00000000000..61f1f1fde5d
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.h
@@ -0,0 +1,141 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/common.h>
+#include <vespamalloc/util/callstack.h>
+
+namespace vespamalloc {
+
+class MemBlockBoundsCheckBaseTBase : public CommonT<5>
+{
+public:
+    typedef StackEntry<StackReturnEntry> Stack;
+    void * rawPtr()          { return _ptr; }
+    void *ptr()              { unsigned *p((unsigned*)_ptr); return p ? (p+4) : NULL; }
+    const void *ptr()  const { unsigned *p((unsigned*)_ptr); return p ? (p+4) : NULL; }
+
+    void setThreadId(int th)              { if (_ptr) { static_cast<uint32_t*>(_ptr)[2] = th; } }
+    bool allocated()                const { return (static_cast<unsigned*>(_ptr)[3] == ALLOC_MAGIC); }
+    size_t size()                   const { return static_cast<const uint64_t *>(_ptr)[0]; }
+    int threadId()                  const { return static_cast<int*>(_ptr)[2]; }
+    Stack * callStack()                   { return reinterpret_cast<Stack *>((char *)_ptr + size() + 4*sizeof(unsigned)); }
+    const Stack * callStack()       const { return reinterpret_cast<const Stack *>((const char *)_ptr + size() + 4*sizeof(unsigned)); }
+    void fillMemory(size_t sz) {
+        if (_fillValue != NO_FILL) {
+            memset(ptr(), _fillValue, sz);
+        }
+    }
+    static void bigBlockLimit(size_t lim) { _bigBlockLimit = lim; }
+    static void dumpFile(FILE * fp)       { _logFile = fp; }
+    static void setFill(uint8_t pattern)  { _fillValue = pattern; }
+    static bool verifySizeClass(int sc)   { return sc >= 0; }
+
+    template<typename T>
+    void readjustAlignment(const T & segment) {
+        size_t ptr_class_size = this->classSize(T::adjustedClassSize(segment.sizeClass(_ptr)));
+        size_t clamped_class_size = std::min(size_t(0x10000), ptr_class_size);
+        size_t bitmask = ~(clamped_class_size - 1);
+        size_t tmp = reinterpret_cast<size_t>(_ptr);
+        tmp &= bitmask;
+        _ptr = reinterpret_cast<void *>(tmp);
+    }
+    void logBigBlock(size_t exact, size_t adjusted, size_t gross) const __attribute__((noinline));
+protected:
+    MemBlockBoundsCheckBaseTBase(void * p) : _ptr(p) { }
+    void verifyFill() const __attribute__((noinline));
+
+    void setSize(size_t sz) { static_cast<uint64_t *>(_ptr)[0] = sz; }
+
+    enum {
+        ALLOC_MAGIC = 0xF1E2D3C4,
+        FREE_MAGIC = 0x63242367,
+        HEAD_MAGIC3 = 0x5BF29BC7,
+        TAIL_MAGIC = 0x1A2B3C4D
+    };
+    enum { NO_FILL = 0xa8};
+
+    void * _ptr;
+
+    static FILE *_logFile;
+    static size_t _bigBlockLimit;
+    static uint8_t _fillValue;
+};
+
+template <size_t MaxSizeClassMultiAllocC, size_t StackTraceLen>
+class MemBlockBoundsCheckBaseT : public MemBlockBoundsCheckBaseTBase
+{
+public:
+    enum {
+        MaxSizeClassMultiAlloc = MaxSizeClassMultiAllocC,
+        SizeClassSpan = (MaxSizeClassMultiAllocC-5)
+    };
+    MemBlockBoundsCheckBaseT() : MemBlockBoundsCheckBaseTBase(NULL) { }
+    MemBlockBoundsCheckBaseT(void * p) : MemBlockBoundsCheckBaseTBase(p ? (unsigned *)p-4 : NULL) { }
+    MemBlockBoundsCheckBaseT(void * p, size_t sz) : MemBlockBoundsCheckBaseTBase(p) { setSize(sz); }
+    MemBlockBoundsCheckBaseT(void * p, size_t, bool) : MemBlockBoundsCheckBaseTBase(p) { }
+    bool validCommon() const {
+        const unsigned *p(reinterpret_cast<const unsigned*>(_ptr));
+        return p
+            && ((p[3] == ALLOC_MAGIC) || (p[3] == FREE_MAGIC))
+            && *(reinterpret_cast<const unsigned *> ((const char*)_ptr + size() + 4*sizeof(unsigned) + StackTraceLen*sizeof(void *))) == TAIL_MAGIC;
+    }
+    bool validAlloc1() const {
+        unsigned *p((unsigned*)_ptr);
+        return validCommon() && (p[3] == ALLOC_MAGIC);
+    }
+    bool validFree1()  const {
+        unsigned *p((unsigned*)_ptr);
+        if (_fillValue != NO_FILL) {
+            verifyFill();
+        }
+        return validCommon() && (p[3] == FREE_MAGIC);
+    }
+    void alloc(bool log) {
+        unsigned *p((unsigned*)_ptr);
+        if (p) {
+            p[3] = ALLOC_MAGIC;
+            if (StackTraceLen) {
+                Stack * cStack = callStack();
+                if (log) {
+                    Stack::fillStack(cStack, StackTraceLen);
+                } else {
+                    cStack[0] = Stack();
+                }
+            }
+        }
+    }
+
+    void free() __attribute__((noinline)) {
+        static_cast<unsigned*>(_ptr)[3] = FREE_MAGIC;
+        fillMemory(size());
+        setTailMagic();
+    }
+    void setExact(size_t sz)              { init(sz); }
+    size_t callStackLen()           const {
+        const Stack * stack = callStack();
+        // Use int to avoid compiler warning about always true.
+        for (int i(0); i < (int)StackTraceLen; i++) {
+            if (! stack[i].valid()) {
+                return i+1;
+            }
+        }
+        return StackTraceLen;
+    }
+    static size_t adjustSize(size_t sz)   { return sz + ((4+1)*sizeof(unsigned) + StackTraceLen*sizeof(void *)); }
+    static size_t unAdjustSize(size_t sz) { return sz - ((4+1)*sizeof(unsigned) + StackTraceLen*sizeof(void *)); }
+    static void dumpInfo(size_t level) __attribute__((noinline));
+    static size_t getMinSizeForAlignment(size_t align, size_t sz) { return sz + align; }
+    void info(FILE * os, unsigned level=0) const __attribute__((noinline));
+
+protected:
+    void setTailMagic() { *(reinterpret_cast<unsigned *> ((char*)_ptr + size() + 4*sizeof(unsigned) + StackTraceLen*sizeof(void *))) = TAIL_MAGIC; }
+    void init(size_t sz) {
+        if (_ptr) {
+            setSize(sz);
+            setTailMagic();
+        }
+    }
+};
+
+} // namespace vespamalloc
+
diff --git a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.hpp b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.hpp
new file mode 100644
index 00000000000..93fceab199a
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck.hpp
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/memblockboundscheck.h>
+
+namespace vespamalloc {
+
+template <size_t MaxSizeClassMultiAllocC, size_t StackTraceLen>
+void MemBlockBoundsCheckBaseT<MaxSizeClassMultiAllocC, StackTraceLen>::info(FILE * os, unsigned level) const
+{
+    if (validCommon()) {
+        if (level & 0x02) {
+            fprintf(os, "{ %8p(%ld, %d) ", ptr(), size(), threadId());
+            const Stack * cStack = callStack();
+            for (int i=0; i<int(StackTraceLen);i++) {
+                if (cStack[i].valid()) {
+                    cStack[i].info(os);
+                    fprintf(os, " ");
+                }
+            }
+            fprintf(os, " }");
+        }
+        if (level & 0x01) {
+            fprintf(os, " %8p(%ld, %d)", ptr(), size(), threadId());
+        }
+        if (level == 0) {
+            fprintf(os, " %8p(%ld)", ptr(), size());
+        }
+    }
+}
+
+} // namespace vespamalloc
+
diff --git a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_d.cpp b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_d.cpp
new file mode 100644
index 00000000000..8f1b1f728d7
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_d.cpp
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/memblockboundscheck_d.h>
+#include <vespamalloc/malloc/memblockboundscheck.hpp>
+
+namespace vespamalloc {
+
+template class MemBlockBoundsCheckBaseT<20, 0>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_d.h b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_d.h
new file mode 100644
index 00000000000..ad70a0c56f9
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_d.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/memblockboundscheck.h>
+
+namespace vespamalloc {
+
+typedef MemBlockBoundsCheckBaseT<20, 0> MemBlockBoundsCheckBase;
+
+class MemBlockBoundsCheck : public MemBlockBoundsCheckBase
+{
+public:
+    MemBlockBoundsCheck() : MemBlockBoundsCheckBase() { }
+    MemBlockBoundsCheck(void * p) : MemBlockBoundsCheckBase(p) { }
+    MemBlockBoundsCheck(void * p, size_t sz) : MemBlockBoundsCheckBase(p, sz) { }
+    MemBlockBoundsCheck(void * p, size_t sz, bool dummy) : MemBlockBoundsCheckBase(p, sz, dummy) { }
+    bool validAlloc() const { return validAlloc1(); }
+    bool validFree()  const { return validFree1(); }
+};
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_dst.cpp b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_dst.cpp
new file mode 100644
index 00000000000..76ef2fced7e
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_dst.cpp
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/memblockboundscheck_dst.h>
+#include <vespamalloc/malloc/memblockboundscheck.hpp>
+
+namespace vespamalloc {
+
+template class MemBlockBoundsCheckBaseT<20, MALLOC_STACK_SAVE_LEN>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_dst.h b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_dst.h
new file mode 100644
index 00000000000..d97a059b8e8
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memblockboundscheck_dst.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/memblockboundscheck.h>
+
+#define MALLOC_STACK_SAVE_LEN 16
+
+namespace vespamalloc {
+
+typedef MemBlockBoundsCheckBaseT<20, MALLOC_STACK_SAVE_LEN> MemBlockBoundsCheckBase;
+
+class MemBlockBoundsCheck : public MemBlockBoundsCheckBase
+{
+public:
+    MemBlockBoundsCheck() : MemBlockBoundsCheckBase() { }
+    MemBlockBoundsCheck(void * p) : MemBlockBoundsCheckBase(p) { }
+    MemBlockBoundsCheck(void * p, size_t sz) : MemBlockBoundsCheckBase(p, sz) { }
+    MemBlockBoundsCheck(void * p, size_t sz, bool dummy) : MemBlockBoundsCheckBase(p, sz, dummy) { }
+    bool validAlloc() const { return validAlloc1(); }
+    bool validFree()  const { return validFree1(); }
+};
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/malloc/memorywatcher.h b/vespamalloc/src/vespamalloc/malloc/memorywatcher.h
new file mode 100644
index 00000000000..2a3f6ea42f4
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/memorywatcher.h
@@ -0,0 +1,378 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <stdio.h>
+#include <signal.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <vespa/defaults.h>
+#include <vespamalloc/malloc/malloc.h>
+#include <vespamalloc/util/callstack.h>
+
+namespace vespamalloc {
+
+
+template <typename T, typename S>
+class MemoryWatcher : public MemoryManager<T, S>
+{
+public:
+    MemoryWatcher(int infoAtEnd, size_t prAllocAtStart) __attribute__((noinline));
+    virtual ~MemoryWatcher() __attribute__((noinline));
+private:
+    void installMonitor();
+    int     getDumpSignal() const { return _params[Params::dumpsignal].valueAsLong(); }
+    static int getReconfigSignal() { return SIGHUP; }
+    bool activateLogFile(const char *logfile);
+    void activateOptions();
+    void getOptions() __attribute__ ((noinline));
+    void parseOptions(char * options) __attribute__ ((noinline));
+    virtual void signalHandler(int signum, siginfo_t *sig, void * arg);
+    static MemoryWatcher<T, S> * _manager;
+    static void ssignalHandler(int signum, siginfo_t *info, void * arg);
+    static MemoryWatcher<T, S> *manager() { return _manager; }
+    bool signal(int signum) __attribute__ ((noinline));
+    class NameValuePair {
+    public:
+        NameValuePair() : _valueName("") { _value[0] = '\0'; }
+        NameValuePair(const char *vName, const char *v)
+            : _valueName(vName)
+        {
+            value(v);
+        }
+        const char * valueName()   const { return _valueName; }
+        const char * value()       const { return _value; }
+        void value(const char * v) __attribute__((noinline));
+        long valueAsLong()         const __attribute__((noinline)) { return strtol(_value, NULL, 0); }
+        void info(FILE * os) __attribute__ ((noinline)) {
+            fprintf(os, "%s = %s %ld", valueName(), value(), valueAsLong());
+        }
+    private:
+        const char * _valueName;
+        char         _value[256];
+    };
+    class Params {
+    public:
+        enum {
+            alwaysreuselimit = 0,
+            threadcachelimit,
+            logfile,
+            sigprof_loglevel,
+            atend_loglevel,
+            pralloc_loglimit,
+            atnomem_loglevel,
+            atdoubledelete_loglevel,
+            atinvalid_loglevel,
+            bigsegment_loglevel,
+            bigsegment_limit,
+            bigsegment_increment,
+            allocs2show,
+            bigblocklimit,
+            fillvalue,
+            dumpsignal,
+            numberofentries  // Must be the last one
+        };
+        Params() __attribute__ ((noinline));
+        ~Params() __attribute__ ((noinline));
+        NameValuePair & operator[] (unsigned index)             { return _params[index]; }
+        const NameValuePair & operator[] (unsigned index) const { return _params[index]; }
+        bool update(const char *vName, const char *v) {
+            int index(find(vName));
+            if (index >= 0) {
+                _params[index].value(v);
+            }
+            return (index >= 0);
+        }
+        bool getAsChar(const char *vName, const char * & v) {
+            int index(find(vName));
+            if (index >= 0) {
+                v = _params[index].value();
+            }
+            return (index >= 0);
+        }
+        bool getAsLong(const char *vName, long & v) {
+            int index(find(vName));
+            if (index >= 0) {
+                v = _params[index].valueAsLong();
+            }
+            return (index >= 0);
+        }
+        void info(FILE * os) {
+            for (size_t i=0; i < NELEMS(_params); i++) {
+                fprintf(os, "%2ld ", i);
+                _params[i].info(os);
+                fprintf(os, "\n");
+            }
+        }
+    private:
+        int find(const char *vName) __attribute__ ((noinline));
+        NameValuePair _params[numberofentries];
+    };
+    FILE * _logFile;
+    int    _infoAtAbort;
+    int    _infoAtNOMEM;
+
+    Params _params;
+    struct sigaction _oldSig;
+};
+
+template <typename T, typename S>
+MemoryWatcher<T, S>::Params::Params()
+{
+    _params[       alwaysreuselimit] = NameValuePair("alwaysreuselimit", "0x200000"); // 2M for allignment with hugepage size.
+    _params[       threadcachelimit] = NameValuePair("threadcachelimit", "0x10000");  // 64K
+    _params[                logfile] = NameValuePair("logfile", "stderr");
+    _params[       sigprof_loglevel] = NameValuePair("sigprof_loglevel", "1");
+    _params[         atend_loglevel] = NameValuePair("atend_loglevel", "1");
+    _params[       pralloc_loglimit] = NameValuePair("pralloc_loglimit", "0x2000000");
+    _params[       atnomem_loglevel] = NameValuePair("atnomem_loglevel", "1");
+    _params[atdoubledelete_loglevel] = NameValuePair("atdoubledelete_loglevel", "1");
+    _params[     atinvalid_loglevel] = NameValuePair("atinvalid_loglevel", "1");
+    _params[    bigsegment_loglevel] = NameValuePair("bigsegment_loglevel", "1");
+    _params[       bigsegment_limit] = NameValuePair("bigsegment_limit", "0x1000000000");  // 64GM
+    _params[   bigsegment_increment] = NameValuePair("bigsegment_increment", "0x100000000"); //4GM
+    _params[            allocs2show] = NameValuePair("allocs2show", "8");
+    _params[          bigblocklimit] = NameValuePair("bigblocklimit", "0x80000000"); // 8M
+    _params[              fillvalue] = NameValuePair("fillvalue", "0xa8"); // Means NO fill.
+    _params[             dumpsignal] = NameValuePair("dumpsignal", "27"); // SIGPROF
+}
+
+template <typename T, typename S>
+MemoryWatcher<T, S>::Params::~Params()
+{
+}
+
+template <typename T, typename S>
+int MemoryWatcher<T, S>::Params::find(const char *vName)
+{
+    int index(-1);
+    for (size_t i=0; (index < 0) && (i < NELEMS(_params)); i++) {
+        if (strcmp(vName, _params[i].valueName()) == 0) {
+            index = i;
+        }
+    }
+    return index;
+}
+
+template <typename T, typename S>
+void MemoryWatcher<T, S>::NameValuePair::value(const char * v) {
+    strncpy(_value, v, sizeof(_value)-1);
+    _value[sizeof(_value)-1] = '\0';
+}
+
+template <typename T, typename S>
+MemoryWatcher<T, S>::MemoryWatcher(int infoAtEnd, size_t prAllocAtStart) :
+    MemoryManager<T, S>(prAllocAtStart),
+    _logFile(stderr),
+    _infoAtAbort(-1),
+    _infoAtNOMEM(1)
+{
+    _manager = this;
+    char tmp[16];
+    sprintf(tmp, "%d", infoAtEnd);
+    _params[Params::atend_loglevel].value(tmp);
+    installMonitor();
+}
+
+template <typename T, typename S>
+void MemoryWatcher<T, S>::installMonitor()
+{
+    getOptions();
+
+    signal(getDumpSignal());
+    signal(getReconfigSignal());
+}
+
+template <typename T, typename S>
+bool MemoryWatcher<T, S>::activateLogFile(const char *logfile)
+{
+    FILE * oldFp(_logFile);
+    if (strcmp(logfile, "stderr") == 0) {
+        _logFile = stderr;
+    } else if (strcmp(logfile, "stdout") == 0) {
+        _logFile = stdout;
+    } else {
+        char logFileName[1024];
+        snprintf(logFileName, sizeof(logFileName), "%s.%d", logfile, getpid());
+        _logFile = fopen(logFileName, "a");
+    }
+    if ((oldFp != stderr) && (oldFp != stdout)) {
+        fclose(oldFp);
+    }
+    return (_logFile != NULL);
+}
+
+template <typename T, typename S>
+void MemoryWatcher<T, S>::activateOptions()
+{
+    activateLogFile(_params[Params::logfile].value());
+    T::dumpFile(_logFile);
+    this->setupSegmentLog(_params[Params::atnomem_loglevel].valueAsLong(),
+                    _params[Params::bigsegment_loglevel].valueAsLong(),
+                    _params[Params::bigsegment_limit].valueAsLong(),
+                    _params[Params::bigsegment_increment].valueAsLong(),
+                    _params[Params::allocs2show].valueAsLong());
+    this->setupLog(_params[Params::atdoubledelete_loglevel].valueAsLong(),
+                   _params[Params::atinvalid_loglevel].valueAsLong(),
+                   _params[Params::pralloc_loglimit].valueAsLong());
+    this->setParams(_params[Params::alwaysreuselimit].valueAsLong(),
+                    _params[Params::threadcachelimit].valueAsLong());
+    T::bigBlockLimit(_params[Params::bigblocklimit].valueAsLong());
+    T::setFill(_params[Params::fillvalue].valueAsLong());
+
+}
+
+namespace {
+
+const char *vespaHomeConf(char pathName[])
+{
+    const char *home = "/opt/vespa";
+    const char *env = getenv("VESPA_HOME");
+    if (env != NULL) {
+        home = env;
+    }
+    strncpy(pathName, home, PATH_MAX);
+    strncat(pathName, "/etc/vespamalloc.conf", PATH_MAX);
+    pathName[PATH_MAX - 1] = '\0';
+    return pathName;
+}
+
+} // namespace <unnamed>
+
+template <typename T, typename S>
+void MemoryWatcher<T, S>::getOptions()
+{
+    char homeConf[PATH_MAX];
+    const char * searchOrder[3] = {
+        "vespamalloc.conf",
+        vespaHomeConf(homeConf),
+        "/etc/vespamalloc.conf"
+    };
+    struct stat st;
+    int retval(-1);
+    unsigned index(0);
+    for (unsigned i=0; (retval == -1) && (i < NELEMS(searchOrder)); i++) {
+        retval = stat(searchOrder[i], & st);
+        index = i;
+    }
+    if (retval == 0) {
+        int fd = open(searchOrder[index], O_RDONLY);
+        char buffer[4096];
+        assert(st.st_size+1 < int(sizeof(buffer)));
+        retval = read(fd, buffer, st.st_size);
+        if (retval == st.st_size) {
+            buffer[st.st_size] = 0;
+            parseOptions(buffer);
+            activateOptions();
+        }
+        close (fd);
+    }
+}
+
+template <typename T, typename S>
+void MemoryWatcher<T, S>::parseOptions(char * options)
+{
+    bool isComment(false);
+    const char ignore('\0');
+    const char *valueName(NULL);
+    const char *value(NULL);
+    bool isWhite(true);
+    for(char *p=options; *p; p++) {
+        char c(*p);
+        if (c == '\n') {
+            if ((valueName != NULL) && (value != NULL)) {
+                if (_params.update(valueName, value) == false) {
+                    fprintf(stderr, "Invalid parameter %s", valueName);
+                }
+            }
+            isComment = false;
+            isWhite = true;
+            valueName = NULL;
+            value = NULL;
+        } else if (isComment) {
+            *p = ignore;
+        } else if (c == '#') {
+            isComment = true;
+            *p = ignore;
+        } else {
+            if (isWhite) {
+                if (!isspace(c)) {
+                    if (valueName == NULL) {
+                        valueName = p;
+                    } else {
+                        value = p;
+                    }
+                    isWhite = false;
+                } else {
+                    *p = ignore;
+                }
+            } else {
+                if (isspace(c)) {
+                    isWhite = true;
+                    *p = ignore;
+                }
+            }
+        }
+    }
+}
+
+template <typename T, typename S>
+MemoryWatcher<T, S>::~MemoryWatcher() {
+    int infoAtEnd(_params[Params::atend_loglevel].valueAsLong());
+    if (infoAtEnd >= 0) {
+        this->info(_logFile, infoAtEnd);
+    }
+    fclose(_logFile);
+}
+
+template <typename T, typename S>
+void MemoryWatcher<T, S>::signalHandler(int signum, siginfo_t * sig, void *  arg)
+{
+    if (_params[Params::sigprof_loglevel].valueAsLong() > 1) {
+        fprintf(_logFile, "SignalHandler %d caught\n", signum);
+    }
+    if (signum == getDumpSignal()) {
+        this->info(_logFile, _params[Params::sigprof_loglevel].valueAsLong());
+    } else if (signum == getReconfigSignal()) {
+        getOptions();
+        if (_params[Params::sigprof_loglevel].valueAsLong() > 1) {
+            _params.info(_logFile);
+        }
+    }
+    if (_params[Params::sigprof_loglevel].valueAsLong() > 1) {
+        fprintf(_logFile, "SignalHandler %d done\n", signum);
+    }
+    if ((_oldSig.sa_handler != SIG_IGN) && (_oldSig.sa_handler != SIG_DFL) && (_oldSig.sa_handler != NULL)) {
+        (_oldSig.sa_sigaction)(signum, sig, arg);
+    }
+}
+
+template <typename T, typename S>
+void MemoryWatcher<T, S>::ssignalHandler(int signum, siginfo_t *info, void * arg)
+{
+    if (_manager) {
+        _manager->signalHandler(signum, info, arg);
+    } else {
+        fprintf(stderr, "Manager not initialized when signal arrives");
+    }
+}
+
+template <typename T, typename S>
+bool MemoryWatcher<T, S>::signal(int signum)
+{
+    bool retval(true);
+    struct sigaction sig;
+    sig.sa_sigaction = ssignalHandler;
+    sigemptyset(& sig.sa_mask);
+    sig.sa_flags = SA_SIGINFO;
+    if (!(retval = (sigaction(signum, &sig, &_oldSig) == 0))) {
+        fprintf(stderr, "Signal handler for %d FAILED to install!\n", signum);
+    }
+    return retval;
+}
+
+template <typename T, typename S>
+MemoryWatcher<T, S> * MemoryWatcher<T, S>::_manager = NULL;
+
+} // namespace vespamalloc
+
diff --git a/vespamalloc/src/vespamalloc/malloc/mmap.cpp b/vespamalloc/src/vespamalloc/malloc/mmap.cpp
new file mode 100644
index 00000000000..6da13d9ac92
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/mmap.cpp
@@ -0,0 +1,97 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <vespa/vespalib/util/backtrace.h>
+
+extern "C" {
+
+typedef void * (*mmap_function) (void *addr, size_t length, int prot, int flags, int fd, off_t offset);
+typedef void * (*mmap64_function) (void *addr, size_t length, int prot, int flags, int fd, off64_t offset);
+typedef int (*munmap_function) (void *addr, size_t length);
+
+void * local_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) __asm__("mmap");
+void * local_mmap64(void *addr, size_t length, int prot, int flags, int fd, off64_t offset) __asm__("mmap64");
+int munmap(void *addr, size_t length) __asm__("munmap");
+
+// This is a dirty prototype of an internal, yet visible method in libc that avoids
+// allocations as they will cause a loop when used with vespamalloc.
+void *_dl_sym (void *handle, const char *name, void *who);
+
+static size_t getLogLimit()
+{
+    static size_t LogLimit = -2l;
+    if (LogLimit == static_cast<size_t>(-2l)) {
+        const char * s = getenv("VESPA_MMAP_BIGBLOCK_LOGLIMIT");
+        if (s) {
+            LogLimit = strtoul(s, NULL, 0);
+        } else {
+            LogLimit = -1l;
+        }
+    }
+    return LogLimit;
+}
+
+const size_t MagicVespaMallocStartOfHeap = 0x100000000;
+const size_t MagicVespaMallocStartOfHeapFilter = 0xffffffff00000000ul;
+
+
+static bool isFromVespaMalloc(const void * addr)
+{
+    size_t v(reinterpret_cast<size_t>(addr));
+    return (v & MagicVespaMallocStartOfHeapFilter) == MagicVespaMallocStartOfHeap;
+}
+
+void * local_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+    static mmap_function real_func = NULL;
+    // This is a dirty trick for use with vespamalloc as there can be
+    // no allocations before the initial mmap from vespamalloc has succeded.
+    if (real_func == NULL) {
+        real_func = (mmap_function) _dl_sym (RTLD_NEXT, "mmap", __builtin_return_address (0));
+        if (real_func == NULL) {
+            fprintf (stderr, "Could not find the mmap function!\n");
+            abort();
+        }
+    }
+    if ((length >= getLogLimit()) && !isFromVespaMalloc(addr)) {
+        fprintf (stderr, "mmap requesting block of size %ld from %s\n", length, vespalib::getStackTrace(0).c_str());
+    }
+    return (*real_func)(addr, length, prot, flags, fd, offset);
+}
+
+void * local_mmap64(void *addr, size_t length, int prot, int flags, int fd, off64_t offset)
+{
+    static mmap64_function real_func = NULL;
+    if (real_func == NULL) {
+        real_func = (mmap64_function) dlsym (RTLD_NEXT, "mmap64");
+        if (real_func == NULL) {
+            fprintf (stderr, "Could not find the mmap64 function!\n");
+            abort();
+        }
+    }
+    if (length >= getLogLimit() && !isFromVespaMalloc(addr)) {
+        fprintf (stderr, "mmap requesting block of size %ld from %s\n", length, vespalib::getStackTrace(0).c_str());
+    }
+    return (*real_func)(addr, length, prot, flags, fd, offset);
+}
+
+int local_munmap(void *addr, size_t length)
+{
+    static munmap_function real_func = NULL;
+    if (real_func == NULL) {
+        real_func = (munmap_function) dlsym (RTLD_NEXT, "munmap");
+        if (real_func == NULL) {
+            fprintf (stderr, "Could not find the munmap function!\n");
+            abort();
+        }
+    }
+    if ((length >= getLogLimit()) && !isFromVespaMalloc(addr)) {
+        fprintf (stderr, "munmap releasing block of size %ld from %s\n", length, vespalib::getStackTrace(0).c_str());
+    }
+    return (*real_func)(addr, length);
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/overload.h b/vespamalloc/src/vespamalloc/malloc/overload.h
new file mode 100644
index 00000000000..caf8184acd0
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/overload.h
@@ -0,0 +1,229 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <new>
+#include <stdlib.h>
+
+class CreateAllocator
+{
+public:
+    CreateAllocator() : _initialized(0x192A3B4C) {
+        vespamalloc::createAllocator();
+    }
+private:
+    unsigned _initialized;
+};
+
+static CreateAllocator _CreateAllocator __attribute__ ((init_priority (543)));
+
+#if 1 // Only until we get on to a new C++14 compiler
+void operator delete(void* ptr, std::size_t sz) noexcept __attribute__((visibility ("default")));
+void operator delete[](void* ptr, std::size_t sz) noexcept __attribute__((visibility ("default")));
+void operator delete(void* ptr, std::size_t sz, const std::nothrow_t&) noexcept __attribute__((visibility ("default")));
+void operator delete[](void* ptr, std::size_t sz, const std::nothrow_t&) noexcept __attribute__((visibility ("default")));
+#endif
+
+void* operator new(std::size_t sz) throw (std::bad_alloc)
+{
+    void * ptr(vespamalloc::createAllocator()->malloc(sz));
+    if (ptr == NULL) {
+        throw std::bad_alloc();
+    }
+    return ptr;
+}
+
+void* operator new[](std::size_t sz) throw (std::bad_alloc)
+{
+    return ::operator new(sz);
+}
+
+void* operator new(std::size_t sz, const std::nothrow_t&) noexcept {
+    return vespamalloc::_GmemP->malloc(sz);
+}
+void* operator new[](std::size_t sz, const std::nothrow_t&) noexcept {
+    return vespamalloc::_GmemP->malloc(sz);
+}
+
+void operator delete(void* ptr) noexcept {
+    if (ptr) { vespamalloc::_GmemP->free(ptr); }
+}
+void operator delete[](void* ptr) noexcept {
+    if (ptr) { vespamalloc::_GmemP->free(ptr); }
+}
+void operator delete(void* ptr, const std::nothrow_t&) noexcept {
+    if (ptr) { vespamalloc::_GmemP->free(ptr); }
+}
+void operator delete[](void* ptr, const std::nothrow_t&) noexcept {
+    if (ptr) { vespamalloc::_GmemP->free(ptr); }
+}
+void operator delete(void* ptr, std::size_t sz) noexcept {
+    if (ptr) { vespamalloc::_GmemP->free(ptr, sz); }
+}
+void operator delete[](void* ptr, std::size_t sz) noexcept {
+    if (ptr) { vespamalloc::_GmemP->free(ptr, sz); }
+}
+void operator delete(void* ptr, std::size_t sz, const std::nothrow_t&) noexcept {
+    if (ptr) { vespamalloc::_GmemP->free(ptr, sz); }
+}
+void operator delete[](void* ptr, std::size_t sz, const std::nothrow_t&) noexcept {
+    if (ptr) { vespamalloc::_GmemP->free(ptr, sz); }
+}
+
+extern "C" {
+
+void * malloc(size_t sz) {
+    return vespamalloc::createAllocator()->malloc(sz);
+}
+
+void * calloc(size_t nelem, size_t esz)
+{
+    return vespamalloc::createAllocator()->calloc(nelem, esz);
+}
+
+void * realloc(void * ptr, size_t sz)
+{
+    return vespamalloc::createAllocator()->realloc(ptr, sz);
+}
+
+void* memalign(size_t align, size_t sz) __attribute__((visibility ("default")));
+void* memalign(size_t align, size_t sz)
+{
+    void *ptr(NULL);
+    size_t align_1(align - 1);
+    if ((align & (align_1)) == 0) {
+        ptr = vespamalloc::_GmemP->malloc(vespamalloc::_GmemP->getMinSizeForAlignment(align, sz));
+        ptr = (void *) ((size_t(ptr) + align_1) & ~align_1);
+    }
+    return ptr;
+}
+
+int posix_memalign(void** ptr, size_t align, size_t sz) __THROW __attribute__((visibility ("default")));
+
+int posix_memalign(void** ptr, size_t align, size_t sz) __THROW
+{
+    int retval(0);
+    if (((align % sizeof(void*)) != 0) ||
+        ((align & (align - 1)) != 0) ||
+        (align == 0)) {
+        retval = EINVAL;
+    } else {
+        void* result = memalign(align, sz);
+        if (result) {
+            *ptr = result;
+        } else {
+            retval = ENOMEM;
+        }
+    }
+    return retval;
+}
+
+void *valloc(size_t size) __attribute__((visibility ("default")));
+void *valloc(size_t size)
+{
+  return memalign(sysconf(_SC_PAGESIZE),size);
+}
+
+
+void free(void * ptr) {
+    if (ptr) { vespamalloc::_GmemP->free(ptr); }
+}
+
+#define ALIAS(x) __attribute__ ((weak, alias (x), visibility ("default")))
+void cfree(void *)                                   ALIAS("free");
+void* __libc_malloc(size_t sz)                       ALIAS("malloc");
+void  __libc_free(void* ptr)                         ALIAS("free");
+void* __libc_realloc(void* ptr, size_t sz)           ALIAS("realloc");
+void* __libc_calloc(size_t n, size_t sz)             ALIAS("calloc");
+void  __libc_cfree(void* ptr)                        ALIAS("cfree");
+void* __libc_memalign(size_t align, size_t s)        ALIAS("memalign");
+int   __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign");
+#undef ALIAS
+
+#if 0
+#include <dlfcn.h>
+
+typedef void * (*dlopen_function) (const char *filename, int flag);
+
+extern "C" VESPA_DLL_EXPORT void * local_dlopen(const char *filename, int flag) __asm__("dlopen");
+
+VESPA_DLL_EXPORT void * local_dlopen(const char *filename, int flag)
+{
+    // A pointer to the library version of dlopen.
+    static dlopen_function real_dlopen = NULL;
+
+    const char * dlopenName = "dlopen";
+
+    if (real_dlopen == NULL) {
+        real_dlopen = (dlopen_function) dlsym (RTLD_NEXT, dlopenName);
+        if (real_dlopen == NULL) {
+            fprintf (stderr, "Could not find the dlopen function!\n");
+            abort();
+        }
+    }
+    //flag = (flag & ~RTLD_DEEPBIND & ~RTLD_NOW) | RTLD_LAZY;
+    //fprintf(stderr, "modified dlopen('%s', %0x)\n", filename, flag);
+    void * handle = real_dlopen(filename, flag);
+    fprintf(stderr, "dlopen('%s', %0x) = %p\n", filename, flag, handle);
+    return handle;
+}
+
+typedef int (*dlclose_function) (void * handle);
+extern "C" VESPA_DLL_EXPORT int local_dlclose(void * handle) __asm__("dlclose");
+VESPA_DLL_EXPORT int local_dlclose(void * handle)
+{
+    // A pointer to the library version of dlclose.
+    static dlclose_function real_dlclose = NULL;
+
+    const char * dlcloseName = "dlclose";
+
+    if (real_dlclose == NULL) {
+        real_dlclose = (dlclose_function) dlsym (RTLD_NEXT, dlcloseName);
+        if (real_dlclose == NULL) {
+            fprintf (stderr, "Could not find the dlclose function!\n");
+            abort();
+        }
+    }
+    int retval = real_dlclose(handle);
+    fprintf(stderr, "dlclose(%p) = %d\n", handle, retval);
+    return retval;
+}
+
+typedef void * (*dlsym_function) (void * handle, const char * symbol);
+extern "C" VESPA_DLL_EXPORT void * local_dlsym(void * handle, const char * symbol) __asm__("dlsym");
+VESPA_DLL_EXPORT void * local_dlsym(void * handle, const char * symbol)
+{
+    // A pointer to the library version of dlsym.
+    static dlsym_function real_dlsym = NULL;
+
+    const char * dlsymName = "dlsym";
+
+    if (real_dlsym == NULL) {
+        real_dlsym = (dlsym_function) dlvsym (RTLD_NEXT, dlsymName, "GLIBC_2.2.5");
+        if (real_dlsym == NULL) {
+            fprintf (stderr, "Could not find the dlsym function!\n");
+            abort();
+        }
+    }
+    if (handle == RTLD_NEXT) {
+        fprintf(stderr, "dlsym(RTLD_NEXT, %s)\n", symbol);
+    } else if (handle == RTLD_DEFAULT) {
+        fprintf(stderr, "dlsym(RTLD_DEFAULT, %s)\n", symbol);
+    } else {
+        fprintf(stderr, "dlsym(%p, %s)\n", handle, symbol);
+    }
+    void * retval = real_dlsym(handle, symbol);
+    if (handle == RTLD_NEXT) {
+        fprintf(stderr, "dlsym(RTLD_NEXT, %s) = %p\n", symbol, retval);
+    } else if (handle == RTLD_DEFAULT) {
+        fprintf(stderr, "dlsym(RTLD_DEFAULT, %s) = %p\n", symbol, retval);
+    } else {
+        fprintf(stderr, "dlsym(%p, %s) = %p\n", handle, symbol, retval);
+    }
+    return retval;
+}
+
+#endif
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/stat.h b/vespamalloc/src/vespamalloc/malloc/stat.h
new file mode 100644
index 00000000000..d3771fcbe85
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/stat.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace vespamalloc {
+
+class NoStat
+{
+public:
+    void incAlloc()         { }
+    void incExchangeFree()  { }
+    void incReturnFree()    { }
+    void incFree()          { }
+    void incExchangeAlloc() { }
+    void incExactAlloc()    { }
+
+    static bool isDummy()        { return true; }
+    size_t alloc()         const { return 0; }
+    size_t free()          const { return 0; }
+    size_t exchangeAlloc() const { return 0; }
+    size_t exchangeFree()  const { return 0; }
+    size_t returnFree()    const { return 0; }
+    size_t exactAlloc()    const { return 0; }
+    bool   isUsed()        const { return false; }
+};
+
+class Stat
+{
+public:
+    Stat()
+        : _free(0),
+          _alloc(0),
+          _exchangeAlloc(0),
+          _exchangeFree(0),
+          _exactAlloc(0),
+          _return(0)
+    { }
+    void incAlloc()         { _alloc++; }
+    void incExchangeFree()  { _exchangeFree++; }
+    void incReturnFree()    { _return++; }
+    void incFree()          { _free++; }
+    void incExchangeAlloc() { _exchangeAlloc++; }
+    void incExactAlloc()    { _exactAlloc++; }
+
+    bool isUsed()       const {
+        return (_alloc || _free || _exchangeAlloc || _exchangeFree || _exactAlloc || _return);
+    }
+    static bool isDummy()        { return false; }
+    size_t alloc()         const { return _alloc; }
+    size_t free()          const { return _free; }
+    size_t exchangeAlloc() const { return _exchangeAlloc; }
+    size_t exchangeFree()  const { return _exchangeFree; }
+    size_t exactAlloc()    const { return _exactAlloc; }
+    size_t returnFree()    const { return _return; }
+private:
+    size_t _free;
+    size_t _alloc;
+    size_t _exchangeAlloc;
+    size_t _exchangeFree;
+    size_t _exactAlloc;
+    size_t _return;
+};
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadlist.cpp b/vespamalloc/src/vespamalloc/malloc/threadlist.cpp
new file mode 100644
index 00000000000..dd68fab500e
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadlist.cpp
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/threadlist.hpp>
+#include <vespamalloc/malloc/memblock.h>
+#include <vespamalloc/malloc/stat.h>
+
+namespace vespamalloc {
+
+template class ThreadListT<MemBlock, NoStat>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadlist.h b/vespamalloc/src/vespamalloc/malloc/threadlist.h
new file mode 100644
index 00000000000..9901c9f6960
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadlist.h
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/threadpool.h>
+
+namespace vespamalloc {
+
+#ifdef __PIC__
+    #define TLS_LINKAGE __attribute__((visibility("hidden"), tls_model("initial-exec")))
+#else
+    #define TLS_LINKAGE __attribute__((visibility("hidden"), tls_model("local-exec")))
+#endif
+
+template <typename MemBlockPtrT, typename ThreadStatT>
+class ThreadListT
+{
+public:
+    typedef ThreadPoolT<MemBlockPtrT, ThreadStatT > ThreadPool;
+    typedef AllocPoolT<MemBlockPtrT> AllocPool;
+    ThreadListT(AllocPool & pool);
+    ~ThreadListT();
+    void setParams(size_t alwayReuseLimit, size_t threadCacheLimit) {
+        ThreadPool::setParams(alwayReuseLimit, threadCacheLimit);
+    }
+    bool quitThisThread();
+    bool initThisThread();
+    ThreadPool & getCurrent()  { return *_myPool; }
+    size_t getThreadId() const { return (_myPool - _threadVector); }
+    void enableThreadSupport() {
+        if ( ! _isThreaded ) {
+            _isThreaded = true;
+        }
+    }
+
+    void info(FILE * os, size_t level=0);
+    size_t getMaxNumThreads() const { return NELEMS(_threadVector); }
+private:
+    size_t getThreadCount()        const { return _threadCount; }
+    size_t getThreadCountAccum()   const { return _threadCountAccum; }
+    ThreadListT(const ThreadListT & tl);
+    ThreadListT & operator = (const ThreadListT & tl);
+    enum {ThreadStackSize=2048*1024};
+    volatile bool              _isThreaded;
+    volatile size_t            _threadCount;
+    volatile size_t            _threadCountAccum;
+    ThreadPool                 _threadVector[NUM_THREADS];
+    AllocPoolT<MemBlockPtrT> & _allocPool;
+    static __thread ThreadPool * _myPool TLS_LINKAGE;
+};
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadlist.hpp b/vespamalloc/src/vespamalloc/malloc/threadlist.hpp
new file mode 100644
index 00000000000..a1ea517beed
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadlist.hpp
@@ -0,0 +1,79 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/threadlist.h>
+
+namespace vespamalloc {
+
+template <typename MemBlockPtrT, typename ThreadStatT>
+ThreadListT<MemBlockPtrT, ThreadStatT>::ThreadListT(AllocPool & pool) :
+    _isThreaded(false),
+    _threadCount(0),
+    _threadCountAccum(0),
+    _allocPool(pool)
+{
+    for (size_t i = 0; i < getMaxNumThreads(); i++) {
+        _threadVector[i].setPool(_allocPool);
+    }
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT>
+ThreadListT<MemBlockPtrT, ThreadStatT>::~ThreadListT()
+{
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT>
+void ThreadListT<MemBlockPtrT, ThreadStatT>::info(FILE * os, size_t level)
+{
+    size_t peakThreads(0);
+    size_t activeThreads(0);
+    for (size_t i(0); i < getMaxNumThreads(); i++) {
+        const ThreadPool & thread = _threadVector[i];
+        if (thread.isActive()) {
+            activeThreads++;
+            if ( ! ThreadStatT::isDummy()) {
+                fprintf(os, "Thread #%ld = pid # %d\n", i, thread.osThreadId());
+                if (thread.isUsed()) {
+                    thread.info(os, level, _allocPool.dataSegment());
+                }
+            }
+            peakThreads = i;
+        }
+    }
+    fprintf(os, "#%ld active threads. Peak threads #%ld\n", activeThreads, peakThreads);
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT>
+bool ThreadListT<MemBlockPtrT, ThreadStatT>::quitThisThread()
+{
+    ThreadPool & tp = getCurrent();
+    tp.quit();
+    Atomic::postDec(&_threadCount);
+    return true;
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT>
+bool ThreadListT<MemBlockPtrT, ThreadStatT>::initThisThread()
+{
+    bool retval(true);
+    Atomic::postInc(&_threadCount);
+    size_t lidAccum = Atomic::postInc(&_threadCountAccum);
+    long localId(-1);
+    for(size_t i = 0; (localId < 0) && (i < getMaxNumThreads()); i++) {
+        ThreadPool & tp = _threadVector[i];
+        if (tp.grabAvailable()) {
+            localId = i;
+        }
+    }
+    assert(localId >= 0);
+    _myPool = &_threadVector[localId];
+    assert(getThreadId() == size_t(localId));
+
+    getCurrent().init(lidAccum);
+
+    return retval;
+}
+template <typename MemBlockPtrT, typename ThreadStatT>
+__thread ThreadPoolT<MemBlockPtrT, ThreadStatT> * ThreadListT<MemBlockPtrT, ThreadStatT>::_myPool TLS_LINKAGE = NULL;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadlistd.cpp b/vespamalloc/src/vespamalloc/malloc/threadlistd.cpp
new file mode 100644
index 00000000000..262eefdf73d
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadlistd.cpp
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/threadlist.hpp>
+#include <vespamalloc/malloc/memblockboundscheck_d.h>
+#include <vespamalloc/malloc/stat.h>
+
+namespace vespamalloc {
+
+template class ThreadListT<MemBlockBoundsCheck, Stat>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadlistdst.cpp b/vespamalloc/src/vespamalloc/malloc/threadlistdst.cpp
new file mode 100644
index 00000000000..757573066da
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadlistdst.cpp
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/threadlist.hpp>
+#include <vespamalloc/malloc/memblockboundscheck_dst.h>
+#include <vespamalloc/malloc/stat.h>
+
+namespace vespamalloc {
+
+template class ThreadListT<MemBlockBoundsCheck, Stat>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadpool.cpp b/vespamalloc/src/vespamalloc/malloc/threadpool.cpp
new file mode 100644
index 00000000000..0e021421d5d
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadpool.cpp
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/threadpool.hpp>
+#include <vespamalloc/malloc/memblock.h>
+#include <vespamalloc/malloc/stat.h>
+
+namespace vespamalloc {
+
+template class ThreadPoolT<MemBlock, NoStat>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadpool.h b/vespamalloc/src/vespamalloc/malloc/threadpool.h
new file mode 100644
index 00000000000..45f6a0aef6d
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadpool.h
@@ -0,0 +1,76 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <atomic>
+#include <vespamalloc/malloc/common.h>
+#include <vespamalloc/malloc/allocchunk.h>
+#include <vespamalloc/malloc/globalpool.h>
+
+namespace vespamalloc {
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+class ThreadPoolT
+{
+public:
+    typedef AFList<MemBlockPtrT> ChunkSList;
+    typedef AllocPoolT<MemBlockPtrT> AllocPool;
+    ThreadPoolT();
+    ~ThreadPoolT();
+    void setPool(AllocPool & pool) {
+        _allocPool = & pool;
+    }
+    void malloc(size_t sz, MemBlockPtrT & mem) __attribute__((noinline));
+    void free(MemBlockPtrT mem, SizeClassT sc) __attribute__((noinline));
+
+    void info(FILE * os, size_t level, const DataSegment<MemBlockPtrT> & ds) const __attribute__((noinline));
+    /**
+     * Indicates if it represents an active thread.
+     * @return true if this represents an active thread.
+     */
+    bool isActive() const;
+    /**
+     * Indicates if it represents an active thread that actually has done any allocations/deallocations.
+     * @return true if this represents an active used thread.
+     */
+    bool isUsed() const;
+    int osThreadId()       const { return _osThreadId; }
+    void quit() { _osThreadId = 0; } // Implicit memory barrier
+    void init(int thrId);
+    static void setParams(size_t alwayReuseLimit, size_t threadCacheLimit);
+    bool grabAvailable();
+private:
+    bool hasActuallyBeenUsed() const;
+    ThreadPoolT(const ThreadPoolT & rhs);
+    ThreadPoolT & operator =(const ThreadPoolT & rhs);
+    unsigned threadId()    const { return _threadId; }
+    void setThreadId(unsigned th)   { _threadId = th; }
+    class AllocFree {
+    public:
+        AllocFree() : _allocFrom(NULL), _freeTo(NULL) { }
+        void init(AllocPool & allocPool, SizeClassT sc) {
+            if (_allocFrom == NULL) {
+                _allocFrom = allocPool.getFree(sc, 1);
+                assert(_allocFrom != NULL);
+                _freeTo = allocPool.getFree(sc, 1);
+                assert(_freeTo != NULL);
+            }
+        }
+        void swap() {
+            std::swap(_allocFrom, _freeTo);
+        }
+        ChunkSList *_allocFrom;
+        ChunkSList *_freeTo;
+    };
+    void mallocHelper(size_t exactSize, SizeClassT sc, AllocFree & af, MemBlockPtrT & mem) __attribute__ ((noinline));
+    bool alwaysReuse(SizeClassT sc) { return sc > _alwaysReuseSCLimit; }
+
+    AllocPool   * _allocPool;
+    AllocFree     _memList[NUM_SIZE_CLASSES];
+    ThreadStatT   _stat[NUM_SIZE_CLASSES];
+    unsigned      _threadId;
+    std::atomic<ssize_t> _osThreadId;
+    static SizeClassT _alwaysReuseSCLimit __attribute__((visibility("hidden")));
+    static size_t     _threadCacheLimit __attribute__((visibility("hidden")));
+};
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadpool.hpp b/vespamalloc/src/vespamalloc/malloc/threadpool.hpp
new file mode 100644
index 00000000000..e14d4bccaa5
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadpool.hpp
@@ -0,0 +1,212 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/threadpool.h>
+
+namespace vespamalloc {
+
+template <typename MemBlockPtrT, typename ThreadStatT>
+SizeClassT ThreadPoolT<MemBlockPtrT, ThreadStatT>::_alwaysReuseSCLimit __attribute__((visibility("hidden"))) = MemBlockPtrT::sizeClass(0x200000);
+template <typename MemBlockPtrT, typename ThreadStatT>
+size_t ThreadPoolT<MemBlockPtrT, ThreadStatT>::_threadCacheLimit __attribute__((visibility("hidden"))) = 0x10000;
+
+template <typename MemBlockPtrT, typename ThreadStatT>
+void ThreadPoolT<MemBlockPtrT, ThreadStatT>::info(FILE * os, size_t level, const DataSegment<MemBlockPtrT> & ds) const {
+    if (level > 0) {
+        for (size_t i=0; i < NELEMS(_stat); i++) {
+            const ThreadStatT & s = _stat[i];
+            const AllocFree & af = _memList[i];
+            if (s.isUsed()) {
+                size_t localAvailCount((af._freeTo ? af._freeTo->count() : 0)
+                                       + (af._allocFrom ? af._allocFrom->count() : 0));
+                fprintf(os, "SC %2ld(%10ld) Local(%3ld) Alloc(%10ld), "
+                        "Free(%10ld) ExchangeAlloc(%8ld), ExChangeFree(%8ld) "
+                        "Returned(%8ld) ExactAlloc(%8ld)\n",
+                        i, MemBlockPtrT::classSize(i), localAvailCount,
+                        s.alloc(), s.free(), s.exchangeAlloc(),
+                        s.exchangeFree(), s.returnFree(), s.exactAlloc());
+            }
+        }
+    }
+    if (level > 1) {
+        fprintf(os, "BlockList:%ld,%ld,%ld\n", NELEMS(_stat), sizeof(_stat), sizeof(_stat[0]));
+        size_t sum(0), sumLocal(0);
+        for (size_t i=0; i < NELEMS(_stat); i++) {
+            const ThreadStatT & s = _stat[i];
+            if (s.isUsed()) {
+                fprintf(os, "Allocated Blocks SC %2ld(%10ld): ", i, MemBlockPtrT::classSize(i));
+                size_t allocCount = ds.infoThread(os, level, threadId(), i);
+                const AllocFree & af = _memList[i];
+                size_t localAvailCount((af._freeTo ? af._freeTo->count() : 0)
+                                       + (af._allocFrom ? af._allocFrom->count() : 0));
+                sum += allocCount*MemBlockPtrT::classSize(i);
+                sumLocal += localAvailCount*MemBlockPtrT::classSize(i);
+                fprintf(os, " Total used(%ld + %ld = %ld(%ld)).\n",
+                        allocCount, localAvailCount, localAvailCount+allocCount,
+                        (localAvailCount+allocCount)*MemBlockPtrT::classSize(i));
+            }
+        }
+        fprintf(os, "Sum = (%ld + %ld) = %ld\n", sum, sumLocal, sum+sumLocal);
+    }
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+void ThreadPoolT<MemBlockPtrT, ThreadStatT>::
+mallocHelper(size_t exactSize,
+             SizeClassT sc,
+             typename ThreadPoolT<MemBlockPtrT, ThreadStatT>::AllocFree & af,
+             MemBlockPtrT & mem)
+{
+    if (!af._freeTo->empty()) {
+        af.swap();
+        af._allocFrom->sub(mem);
+        PARANOID_CHECK2( if (!mem.ptr()) { *(int *)0 = 0; } );
+    } else {
+        if ( ! this->alwaysReuse(sc) ) {
+            af._allocFrom = _allocPool->exchangeAlloc(sc, af._allocFrom);
+            _stat[sc].incExchangeAlloc();
+            if (af._allocFrom) {
+                af._allocFrom->sub(mem);
+                PARANOID_CHECK2( if (!mem.ptr()) { *(int *)1 = 1; } );
+            } else {
+                PARANOID_CHECK2( *(int *)2 = 2; );
+            }
+        } else {
+            af._allocFrom = _allocPool->exactAlloc(exactSize, sc, af._allocFrom);
+            _stat[sc].incExactAlloc();
+            if (af._allocFrom) {
+                af._allocFrom->sub(mem);
+                PARANOID_CHECK2( if (!mem.ptr()) { *(int *)3 = 3; } );
+            } else {
+                PARANOID_CHECK2( *(int *)4 = 4; );
+            }
+        }
+    }
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+ThreadPoolT<MemBlockPtrT, ThreadStatT>::ThreadPoolT() :
+    _allocPool(NULL),
+    _threadId(0),
+    _osThreadId(0)
+{
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+ThreadPoolT<MemBlockPtrT, ThreadStatT>::~ThreadPoolT()
+{
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+void ThreadPoolT<MemBlockPtrT, ThreadStatT>::malloc(size_t sz, MemBlockPtrT & mem)
+{
+    SizeClassT sc = MemBlockPtrT::sizeClass(sz);
+    AllocFree & af = _memList[sc];
+    af._allocFrom->sub(mem);
+    if ( !mem.ptr()) {
+        mallocHelper(sz, sc, af, mem);
+    }
+    PARANOID_CHECK2(if (!mem.validFree()) { *(int *)1 = 1; } );
+    _stat[sc].incAlloc();
+    mem.setThreadId(_threadId);
+    PARANOID_CHECK2(if (af._allocFrom->count() > ChunkSList::NumBlocks) { *(int *)1 = 1; } );
+    PARANOID_CHECK2(if (af._freeTo->count() > ChunkSList::NumBlocks) { *(int *)1 = 1; } );
+    PARANOID_CHECK2(if (af._freeTo->full()) { *(int *)1 = 1; } );
+    PARANOID_CHECK2(if (af._allocFrom->full()) { *(int *)1 = 1; } );
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+void ThreadPoolT<MemBlockPtrT, ThreadStatT>::free(MemBlockPtrT mem, SizeClassT sc)
+{
+    PARANOID_CHECK2(if (!mem.validFree()) { *(int *)1 = 1; } );
+    AllocFree & af = _memList[sc];
+    const size_t cs(MemBlockPtrT::classSize(sc));
+    if ((af._allocFrom->count()+1)*cs < _threadCacheLimit) {
+        if ( ! af._allocFrom->full() ) {
+            af._allocFrom->add(mem);
+        } else {
+            af._freeTo->add(mem);
+            if (af._freeTo->full()) {
+                af._freeTo = _allocPool->exchangeFree(sc, af._freeTo);
+                _stat[sc].incExchangeFree();
+            }
+        }
+    } else if (cs < _threadCacheLimit) {
+        af._freeTo->add(mem);
+        if (af._freeTo->count()*cs > _threadCacheLimit) {
+            af._freeTo = _allocPool->exchangeFree(sc, af._freeTo);
+            _stat[sc].incExchangeFree();
+        }
+    } else if ( !alwaysReuse(sc) ) {
+        af._freeTo->add(mem);
+        af._freeTo = _allocPool->exchangeFree(sc, af._freeTo);
+        _stat[sc].incExchangeFree();
+    } else {
+        af._freeTo->add(mem);
+        af._freeTo = _allocPool->returnMemory(sc, af._freeTo);
+        _stat[sc].incReturnFree();
+    }
+
+    _stat[sc].incFree();
+    PARANOID_CHECK2(if (af._allocFrom->count() > ChunkSList::NumBlocks) { *(int *)1 = 1; } );
+    PARANOID_CHECK2(if (af._freeTo->count() > ChunkSList::NumBlocks) { *(int *)1 = 1; } );
+    PARANOID_CHECK2(if (af._freeTo->full()) { *(int *)1 = 1; } );
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+bool ThreadPoolT<MemBlockPtrT, ThreadStatT>::isActive() const
+{
+    return (_osThreadId != 0);
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+bool ThreadPoolT<MemBlockPtrT, ThreadStatT>::isUsed() const
+{
+    return isActive() && hasActuallyBeenUsed();
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+bool ThreadPoolT<MemBlockPtrT, ThreadStatT>::hasActuallyBeenUsed() const
+{
+    bool used(false);
+    for (size_t i=0; !used && (i < NELEMS(_memList)); i++) {
+        used = (_memList[i]._allocFrom != NULL
+                && !_memList[i]._allocFrom->empty()
+                && !_memList[i]._freeTo->full());
+    }
+    return used;
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+void ThreadPoolT<MemBlockPtrT, ThreadStatT>::init(int thrId)
+{
+    setThreadId(thrId);
+    assert(_osThreadId.load(std::memory_order_relaxed) == -1);
+    _osThreadId = pthread_self();
+    for (size_t i=0; (i < NELEMS(_memList)); i++) {
+        _memList[i].init(*_allocPool, i);
+    }
+    // printf("OsThreadId = %lx, threadId = %x\n", _osThreadId, _threadId);
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+void ThreadPoolT<MemBlockPtrT, ThreadStatT>::setParams(size_t alwaysReuseLimit, size_t threadCacheLimit)
+{
+    _alwaysReuseSCLimit = std::max(MemBlockPtrT::sizeClass(alwaysReuseLimit),
+                                   SizeClassT(MemBlockPtrT::SizeClassSpan));
+    _threadCacheLimit = threadCacheLimit;
+}
+
+template <typename MemBlockPtrT, typename ThreadStatT >
+bool ThreadPoolT<MemBlockPtrT, ThreadStatT>::grabAvailable()
+{
+    if (_osThreadId.load(std::memory_order_relaxed) == 0) {
+        ssize_t expected = 0;
+        if (_osThreadId.compare_exchange_strong(expected, -1)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadpoold.cpp b/vespamalloc/src/vespamalloc/malloc/threadpoold.cpp
new file mode 100644
index 00000000000..cbea18cdf43
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadpoold.cpp
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/threadpool.hpp>
+#include <vespamalloc/malloc/memblockboundscheck_d.h>
+#include <vespamalloc/malloc/stat.h>
+
+namespace vespamalloc {
+
+template class ThreadPoolT<MemBlockBoundsCheck, Stat>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadpooldst.cpp b/vespamalloc/src/vespamalloc/malloc/threadpooldst.cpp
new file mode 100644
index 00000000000..a011e4f9ad1
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadpooldst.cpp
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/threadpool.hpp>
+#include <vespamalloc/malloc/memblockboundscheck_dst.h>
+#include <vespamalloc/malloc/stat.h>
+
+namespace vespamalloc {
+
+template class ThreadPoolT<MemBlockBoundsCheck, Stat>;
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadproxy.cpp b/vespamalloc/src/vespamalloc/malloc/threadproxy.cpp
new file mode 100644
index 00000000000..7bbe2da345d
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadproxy.cpp
@@ -0,0 +1,112 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/malloc/threadproxy.h>
+#include <dlfcn.h>
+
+namespace vespamalloc {
+
+IAllocator * _G_myMemP = NULL;
+
+void setAllocatorForThreads(IAllocator * allocator)
+{
+    _G_myMemP = allocator;
+}
+
+}
+extern "C" {
+
+typedef void * (*VoidpFunctionVoidp) (void *);
+class ThreadArg
+{
+public:
+    ThreadArg(VoidpFunctionVoidp func, void * arg) : _func(func), _arg(arg) { }
+    VoidpFunctionVoidp  _func;
+    void               * _arg;
+};
+
+typedef int (*pthread_create_function) (pthread_t *thread,
+                                        const pthread_attr_t *attr,
+                                        VoidpFunctionVoidp start_routine,
+                                        void *arg);
+
+int linuxthreads_pthread_getattr_np(pthread_t pid, pthread_attr_t *dst);
+
+static void * _G_mallocThreadProxyReturnAddress = NULL;
+static volatile size_t _G_threadCount = 1;  // You always have the main thread.
+
+static void cleanupThread(void * arg)
+{
+    ThreadArg * ta = (ThreadArg *) arg;
+    delete ta;
+    vespamalloc::_G_myMemP->quitThisThread();
+    vespamalloc::Mutex::subThread();
+    vespalib::Atomic::postDec(&_G_threadCount);
+}
+
+void * mallocThreadProxy (void * arg)
+{
+    ThreadArg * ta = (ThreadArg *) arg;
+
+    void * tempReturnAddress = __builtin_return_address(0);
+    assert((_G_mallocThreadProxyReturnAddress == NULL) || (_G_mallocThreadProxyReturnAddress == tempReturnAddress));
+    _G_mallocThreadProxyReturnAddress = tempReturnAddress;
+    vespamalloc::_G_myMemP->setReturnAddressStop(tempReturnAddress);
+
+    vespamalloc::Mutex::addThread();
+    vespamalloc::_G_myMemP->initThisThread();
+    void * result = NULL;
+    DEBUG(fprintf(stderr, "arg(%p=%p), local(%p=%p)\n", &arg, arg, &ta, ta));
+
+    pthread_cleanup_push(cleanupThread, ta);
+        result = (*ta->_func)(ta->_arg);
+    pthread_cleanup_pop(1);
+
+    return result;
+}
+
+
+extern "C" VESPA_DLL_EXPORT int local_pthread_create (pthread_t *thread,
+                                     const pthread_attr_t *attrOrg,
+                                     void * (*start_routine) (void *),
+                                     void * arg) __asm__("pthread_create");
+
+VESPA_DLL_EXPORT int local_pthread_create (pthread_t *thread,
+                          const pthread_attr_t *attrOrg,
+                          void * (*start_routine) (void *),
+                          void * arg)
+{
+    size_t numThreads;
+    for (numThreads = _G_threadCount
+        ;(numThreads < vespamalloc::_G_myMemP->getMaxNumThreads()) && ! vespalib::Atomic::cmpSwap(&_G_threadCount, numThreads+1, numThreads)
+        ; numThreads = _G_threadCount) {
+    }
+    if (numThreads >= vespamalloc::_G_myMemP->getMaxNumThreads()) {
+        return EAGAIN;
+    }
+    // A pointer to the library version of pthread_create.
+    static pthread_create_function real_pthread_create = NULL;
+
+    const char * pthread_createName = "pthread_create";
+
+    if (real_pthread_create == NULL) {
+        real_pthread_create = (pthread_create_function) dlsym (RTLD_NEXT, pthread_createName);
+        if (real_pthread_create == NULL) {
+            fprintf (stderr, "Could not find the pthread_create function!\n");
+            abort();
+        }
+    }
+
+    ThreadArg * args = new ThreadArg(start_routine, arg);
+    pthread_attr_t locAttr;
+    pthread_attr_t *attr(const_cast<pthread_attr_t *>(attrOrg));
+    if (attr == NULL) {
+        pthread_attr_init(&locAttr);
+        attr = &locAttr;
+    }
+
+    vespamalloc::_G_myMemP->enableThreadSupport();
+    int retval = (*real_pthread_create)(thread, attr, mallocThreadProxy, args);
+
+    return retval;
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/malloc/threadproxy.h b/vespamalloc/src/vespamalloc/malloc/threadproxy.h
new file mode 100644
index 00000000000..4865e5fbd5f
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/malloc/threadproxy.h
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespamalloc/malloc/common.h>
+
+namespace vespamalloc {
+
+void setAllocatorForThreads(IAllocator * allocator);
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/util/.gitignore b/vespamalloc/src/vespamalloc/util/.gitignore
new file mode 100644
index 00000000000..ca84dea06d6
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+test
+test_cpu
diff --git a/vespamalloc/src/vespamalloc/util/CMakeLists.txt b/vespamalloc/src/vespamalloc/util/CMakeLists.txt
new file mode 100644
index 00000000000..5d06264f1df
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(vespamalloc_util OBJECT
+    SOURCES
+    callstack.cpp
+    traceutil.cpp
+    osmem.cpp
+    stream.cpp
+    DEPENDS
+)
diff --git a/vespamalloc/src/vespamalloc/util/callgraph.h b/vespamalloc/src/vespamalloc/util/callgraph.h
new file mode 100644
index 00000000000..0cf472c04c5
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/callgraph.h
@@ -0,0 +1,158 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <stdio.h>
+#include <vespamalloc/util/stream.h>
+#include <memory>
+
+namespace vespamalloc {
+
+template<typename T, typename AddSub>
+class CallGraphNode
+{
+public:
+    CallGraphNode() : _callers(NULL), _next(NULL), _content(), _count(0) { }
+    const CallGraphNode *next()    const { return _next; }
+    const CallGraphNode *callers() const { return _callers; }
+    const T & content()            const { return _content; }
+    CallGraphNode *next()                { return _next; }
+    CallGraphNode *callers()             { return _callers; }
+    T & content()                        { return _content; }
+    size_t count()                 const { return _count; }
+    void content(const T & v)            { _content = v; }
+    template <typename Store>
+    bool addStack(T * stack, size_t nelem, Store & store);
+    template<typename Object>
+    void traverseDepth(size_t depth, size_t width, Object func);
+    template<typename Object>
+    void traverseWidth(size_t depth, size_t width, Object & func);
+    friend asciistream & operator << (asciistream & os, const CallGraphNode & v) {
+        return os << v._content << '(' << v._count << ')';
+    }
+private:
+    CallGraphNode * _callers;
+    CallGraphNode * _next;
+    T               _content;
+    AddSub          _count;
+};
+
+template<typename T, typename AddSub>
+template <typename Store>
+bool CallGraphNode<T, AddSub>::addStack(T * stack, size_t nelem, Store & store) {
+    bool retval(false);
+    if (nelem == 0) {
+        retval = true;
+    } else if (_content == stack[0]) {
+        _count++;
+        if (nelem > 1) {
+            if (_callers == NULL) {
+                _callers = store.alloc();
+                if (_callers != NULL) {
+                    _callers->content(stack[1]);
+                }
+            }
+            if (_callers) {
+                retval = _callers->addStack(stack+1, nelem-1, store);
+            }
+        } else {
+            retval = true;
+        }
+    } else {
+        if (_next == NULL) {
+            _next = store.alloc();
+            if (_next != NULL) {
+                _next->content(stack[0]);
+            }
+        }
+        if (_next) {
+            retval = _next->addStack(stack, nelem, store);
+        }
+    }
+    return retval;
+}
+
+template<typename T, typename AddSub>
+template<typename Object>
+void CallGraphNode<T, AddSub>::traverseDepth(size_t depth, size_t width, Object func) {
+    Object newFunc(func);
+    newFunc.handle(*this);
+    if (_callers) {
+        _callers->traverseDepth(depth+1, width, newFunc);
+    }
+    if (_next) {
+        _next->traverseDepth(depth, width+1, func);
+    }
+}
+
+template<typename T, typename AddSub>
+template<typename Object>
+void CallGraphNode<T, AddSub>::traverseWidth(size_t depth, size_t width, Object & func) {
+    Object newFunc(func);
+    newFunc.handle(*this);
+    if (_next) {
+        _next->traverseWidth(depth, width+1, newFunc);
+    }
+    if (_callers) {
+        _callers->traverseWidth(depth+1, width, func);
+    }
+}
+
+template<typename T, size_t MaxElem, typename AddSub>
+class ArrayStore
+{
+public:
+    ArrayStore() : _used(0) { }
+    T * alloc() { return (_used < MaxElem) ? &_array[_used++] : NULL; }
+    AddSub size() const { return _used; }
+private:
+    AddSub _used;
+    T      _array[MaxElem];
+};
+
+template <typename Content, size_t MaxElems, typename AddSub>
+class CallGraph
+{
+public:
+    typedef CallGraphNode<Content, AddSub> Node;
+
+    CallGraph() :
+        _root(NULL),
+        _nodeStore(new NodeStore())
+    { }
+    CallGraph(Content root) :
+        _root(NULL),
+        _nodeStore(new NodeStore())
+    {
+        checkOrSetRoot(root);
+    }
+    bool addStack(Content * stack, size_t nelem) {
+        checkOrSetRoot(stack[0]);
+        return _root->addStack(stack, nelem, *_nodeStore);
+    }
+    template<typename Object>
+    void traverseDepth(Object func) {
+        if (_root) { _root->traverseDepth(0, 0, func); }
+    }
+    template<typename Object>
+    void traverseWidth(Object func) {
+        if (_root) {_root->traverseWidth(0, 0, func); }
+    }
+    size_t size() const { return _nodeStore->size(); }
+    bool empty()  const { return size()==0; }
+private:
+    CallGraph(const CallGraph &);
+    CallGraph & operator = (const CallGraph &);
+    bool checkOrSetRoot(const Content & root) {
+        if (_root == NULL) {
+            _root = _nodeStore->alloc();
+            _root->content(root);
+        }
+        return (_root != NULL);
+    }
+    typedef ArrayStore<Node, MaxElems, AddSub> NodeStore;
+    Node      * _root;
+    std::unique_ptr<NodeStore>   _nodeStore;
+};
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/util/callstack.cpp b/vespamalloc/src/vespamalloc/util/callstack.cpp
new file mode 100644
index 00000000000..7780701a1a9
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/callstack.cpp
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <dlfcn.h>
+#include <ctype.h>
+#include <vespamalloc/util/callstack.h>
+
+namespace vespamalloc {
+
+const char * dlAddr(const void * func) {
+    static const char * _unknown = "UNKNOWN";
+    const char * funcName = _unknown;
+    Dl_info info;
+    int ret = dladdr(func, &info);
+    if (ret != 0) {
+        funcName = info.dli_sname;
+    }
+    return funcName;
+}
+
+const void * dlNextSym(const void * func)
+{
+    const char * f = static_cast<const char *>(func);
+    size_t i(0);
+    bool done(false);
+    for( i = 0; !done; i++) {
+        Dl_info info;
+        int ret = dladdr(f+i, &info);
+        if (ret == 0) {
+            fprintf(stderr, "dladdr failed for %p\n", f+i);
+        }
+        done = (f != info.dli_saddr);
+    }
+    return f+i;
+}
+
+static void verifyAndCopy(const void * addr, char *v, size_t sz)
+{
+    size_t pos(0);
+    const char * sym = dlAddr(addr);
+    for (;sym && (sym[pos] != '\0') && (pos < sz-1); pos++) {
+        char c(sym[pos]);
+        v[pos] = isprint(c) ? c : '.';
+    }
+    v[pos] = '\0';
+}
+
+void StackReturnEntry::info(FILE * os) const
+{
+    static char tmp[0x400];
+    verifyAndCopy(_return, tmp, sizeof(tmp));
+    fprintf(os, "%s(%p)", tmp, _return);
+}
+
+asciistream & operator << (asciistream & os, const StackReturnEntry & v)
+{
+    static char tmp[0x100];
+    static char t[0x200];
+    verifyAndCopy(v._return, tmp, sizeof(tmp));
+    snprintf(t, sizeof(t), "%s(%p)", tmp, v._return);
+    return os << t;
+}
+
+void StackFrameReturnEntry::info(FILE * os) const
+{
+    static char tmp[0x400];
+    verifyAndCopy(_return, tmp, sizeof(tmp));
+    fprintf(os, "%s(%p, %p)", tmp, _return, _stack);
+}
+
+asciistream & operator << (asciistream & os, const StackFrameReturnEntry & v)
+{
+    static char tmp[0x100];
+    static char t[0x200];
+    verifyAndCopy(v._return, tmp, sizeof(tmp));
+    snprintf(t, sizeof(t), "%s(%p, %p)", tmp, v._return, v._stack);
+    return os << t;
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/util/callstack.h b/vespamalloc/src/vespamalloc/util/callstack.h
new file mode 100644
index 00000000000..ebed00da581
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/callstack.h
@@ -0,0 +1,173 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <stdio.h>
+#include <dlfcn.h>
+#include <limits>
+#include </usr/include/execinfo.h>
+#include <vespamalloc/util/stream.h>
+
+namespace vespamalloc {
+
+const void * dlNextSym(const void * f);
+const char * dlAddr(const void * addr);
+
+class StackReturnEntry {
+public:
+    StackReturnEntry(const void * returnAddress = NULL,
+                     const void * stack=NULL)
+        : _return(returnAddress)
+    {
+        (void) stack;
+    }
+    int cmp(const StackReturnEntry & b) const {
+        return (size_t(_return) - size_t(b._return));
+    }
+    void info(FILE * os) const;
+    bool valid() const { return _return != NULL; }
+    bool valid(const void * stopAddr) const { return valid() && (_return != stopAddr); }
+    bool valid(const void * stopAddrMin, const void * stopAddrMax) const { return valid() && ! ((stopAddrMin <= _return) && (_return < stopAddrMax)); }
+private:
+    friend asciistream & operator << (asciistream & os, const StackReturnEntry & v);
+    const void * _return;
+};
+
+class StackFrameReturnEntry {
+public:
+    StackFrameReturnEntry(const void * returnAddress = NULL,
+                          const void * stack = NULL)
+        : _return(returnAddress),
+          _stack(stack)
+    { }
+    int cmp(const StackFrameReturnEntry & b) const {
+        int diff (size_t(_return) - size_t(b._return));
+        if (diff == 0) {
+            diff = size_t(_stack) - size_t(b._stack);
+        }
+        return diff;
+    }
+    friend asciistream & operator << (asciistream & os, const StackFrameReturnEntry & v);
+    void info(FILE * os) const;
+private:
+    const void * _return;
+    const void * _stack;
+};
+
+template <typename StackRep>
+class StackEntry {
+public:
+    StackEntry(const void * returnAddress = NULL,
+               const void * stack = NULL)
+        : _stackRep(returnAddress, stack)
+    { }
+    bool operator == (const StackEntry & b) const { return cmp(b) == 0; }
+    bool operator < (const StackEntry & b)  const { return cmp(b) < 0; }
+    bool operator > (const StackEntry & b)  const { return cmp(b) > 0; }
+    void info(FILE * os)                    const { _stackRep.info(os); }
+    bool valid()                            const { return _stackRep.valid(_stopAddr); }
+    static size_t fillStack2(StackEntry *stack, size_t nelems);
+    static size_t fillStack(StackEntry *stack, size_t nelems);
+    static void setStopAddress(const void * stopAddr) { _stopAddr = stopAddr; }
+private:
+    int cmp(const StackEntry & b) const { return _stackRep.cmp(b._stackRep); }
+    friend asciistream & operator << (asciistream & os, const StackEntry<StackRep> & v) {
+        return os << v._stackRep;
+    }
+    StackRep _stackRep;
+    static const void * _stopAddr;
+};
+
+template <typename S, int N>
+inline bool generateStackEntry(S & stack)
+{
+    void * s = __builtin_frame_address(N);
+    void * r(NULL);
+    if (s && (s > (void*)0x1000000) && (s < (void *)(std::numeric_limits<long>::max()))) {
+      r = __builtin_return_address(N);
+      stack = S(r, s);
+    } else {
+      stack = S(0, 0);
+    }
+    return (r == NULL) || (s == NULL);
+}
+
+#define CASESTACK(n) \
+  case n: {                                   \
+    done = generateStackEntry< StackEntry<StackRep>, n >(stack[n]);   \
+    break;                                    \
+  }
+
+template <typename StackRep>
+const void * StackEntry<StackRep>::_stopAddr = NULL;
+
+template <typename StackRep>
+size_t StackEntry<StackRep>::fillStack(StackEntry<StackRep> *stack, size_t nelems)
+{
+    void * retAddr[nelems];
+    int sz = backtrace(retAddr, nelems);
+    if ((sz > 0) && (size_t(sz) <= nelems)) {
+        for(int i(1); i < sz; i++) {
+            StackEntry<StackRep> entry(retAddr[i], NULL);
+            if (entry.valid()) {
+                stack[i-1] = entry;
+            } else {
+                sz = i;
+            }
+        }
+        sz -= 1;  // Do not count self
+    } else {
+        sz = 0;
+    }
+    return sz;
+}
+
+template <typename StackRep>
+size_t StackEntry<StackRep>::fillStack2(StackEntry<StackRep> *stack, size_t nelems)
+{
+    bool done(false);
+    size_t i(0);
+    for ( i=0; !done && (i < nelems); i++) {
+        switch (i) {
+            CASESTACK(31);
+            CASESTACK(30);
+            CASESTACK(29);
+            CASESTACK(28);
+            CASESTACK(27);
+            CASESTACK(26);
+            CASESTACK(25);
+            CASESTACK(24);
+            CASESTACK(23);
+            CASESTACK(22);
+            CASESTACK(21);
+            CASESTACK(20);
+            CASESTACK(19);
+            CASESTACK(18);
+            CASESTACK(17);
+            CASESTACK(16);
+            CASESTACK(15);
+            CASESTACK(14);
+            CASESTACK(13);
+            CASESTACK(12);
+            CASESTACK(11);
+            CASESTACK(10);
+            CASESTACK(9);
+            CASESTACK(8);
+            CASESTACK(7);
+            CASESTACK(6);
+            CASESTACK(5);
+            CASESTACK(4);
+            CASESTACK(3);
+            CASESTACK(2);
+            CASESTACK(1);
+            CASESTACK(0);
+        default:
+            break;
+        }
+    }
+    return i-1;
+}
+
+#undef CASESTACK
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/util/index.h b/vespamalloc/src/vespamalloc/util/index.h
new file mode 100644
index 00000000000..f7513114edc
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/index.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/atomic.h>
+#include <stdio.h>
+
+namespace vespamalloc {
+
+class Index
+{
+public:
+    typedef size_t index_t;
+    Index(index_t index = 0) : _index(index) { }
+    operator index_t ()       const { return _index; }
+    index_t operator ++ (int)       { return _index++; }
+    index_t operator -- (int)       { return _index--; }
+    index_t operator += (index_t v) { return _index += v; }
+    index_t operator -= (index_t v) { return _index -= v; }
+private:
+    index_t _index;
+};
+
+class AtomicIndex
+{
+public:
+    typedef size_t index_t;
+    AtomicIndex(index_t index = 0) : _index(index) { }
+    operator index_t ()       const { return _index; }
+    index_t operator ++ (int)       { return vespalib::Atomic::postInc(&_index); }
+    index_t operator -- (int)       { return vespalib::Atomic::postDec(&_index); }
+    index_t operator += (index_t v) { return _index += v; }
+    index_t operator -= (index_t v) { return _index -= v; }
+private:
+    index_t _index;
+};
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/util/osmem.cpp b/vespamalloc/src/vespamalloc/util/osmem.cpp
new file mode 100644
index 00000000000..c5f44342c75
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/osmem.cpp
@@ -0,0 +1,240 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/util/osmem.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/statfs.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+#include <algorithm>
+
+namespace vespamalloc {
+
+void * MmapMemory::reserve(size_t & len)
+{
+    len = 0;
+    const size_t wLen(0x1000);
+    void * wanted = get(wLen);
+    int test = munmap(wanted, wLen);
+    assert( test == 0 );
+    (void) test;
+    setStart(wanted);
+    setEnd(getStart());
+    return NULL;
+}
+
+size_t findInMemInfo(const char * wanted)
+{
+    size_t value(0);
+    char memInfo[8192];
+    int fd(open("/proc/meminfo", O_RDONLY));
+    assert(fd >= 0);
+    if (fd >= 0) {
+        int sz(read(fd, memInfo, sizeof(memInfo)));
+        assert((sz < int(sizeof(memInfo))) && (sz >= 0));
+        memInfo[sz] = '\0';
+        const char  * found(strstr(memInfo, wanted));
+        if (found != NULL) {
+            found += strlen(wanted);
+            value = strtoul(found, NULL, 0);
+        }
+        close(fd);
+    }
+    return value;
+}
+
+const char * getToken(const char * & s, const char * e)
+{
+    for (; (s < e) && isspace(s[0]); s++) { }
+    const char * c = s;
+    for (; (s < e) && ! isspace(s[0]); s++) { }
+    return c;
+}
+
+bool verifyHugePagesMount(const char * mount)
+{
+    const unsigned int HUGETLBFS_MAGIC(0x958458f6);
+    struct statfs64 st;
+    int ret(statfs64(mount, &st));
+    return (ret == 0) && (st.f_type == HUGETLBFS_MAGIC);
+}
+
+MmapMemory::MmapMemory(size_t blockSize) :
+    Memory(blockSize),
+    _useMAdvLimit(getBlockAlignment()*32),
+    _hugePagesFd(-1),
+    _hugePagesOffset(0),
+    _hugePageSize(0)
+{
+    setupFAdvise();
+    setupHugePages();
+}
+
+void MmapMemory::setupFAdvise()
+{
+    const char * madv = getenv("VESPA_MALLOC_MADVISE_LIMIT");
+    if (madv) {
+        _useMAdvLimit = strtoul(madv, NULL, 0);
+    }
+}
+
+void MmapMemory::setupHugePages()
+{
+    _hugePagesFileName[0] = '\0';
+    const char * vespaHugePages = getenv("VESPA_MALLOC_HUGEPAGES");
+    if (vespaHugePages && strcmp(vespaHugePages , "no")) {
+        int pid(getpid());
+        _hugePageSize = findInMemInfo("Hugepagesize:");
+        size_t pagesTotal = findInMemInfo("HugePages_Total:");
+        if ((_hugePageSize > 0) && (pagesTotal > 0)) {
+            if (verifyHugePagesMount(vespaHugePages)) {
+                snprintf(_hugePagesFileName, sizeof(_hugePagesFileName), "%s/%d.mem", vespaHugePages, pid);
+            } else {
+                int fd(open("/proc/mounts", O_RDONLY));
+                if (fd >= 0) {
+                    char mounts[8192];
+                    int sz(read(fd, mounts, sizeof(mounts)));
+                    assert((sz < int(sizeof(mounts))) && (sz >= 0));
+                    (void) sz;
+                    const char * c = mounts;
+                    for (size_t lineNo(0); *c; lineNo++) {
+                        const char *e = c;
+                        for (; e[0] && (e[0] != '\n'); e++) { }
+                        const char *dev = getToken(c, e);
+                        (void) dev;
+                        const char *mount = getToken(c, e);
+                        size_t mountLen(c - mount);
+                        const char *fstype = getToken(c, e);
+                        if (strstr(fstype, "hugetlbfs") == fstype) {
+                            char mountCopy[1024];
+                            assert(mountLen < sizeof(mountCopy));
+                            strncpy(mountCopy, mount, mountLen);
+                            mountCopy[mountLen] = '\0';
+                            if (verifyHugePagesMount(mountCopy)) {
+                                snprintf(_hugePagesFileName, sizeof(_hugePagesFileName), "%s/%d.mem", mountCopy, pid);
+                                break;
+                            }
+                        }
+                        c = e[0] ? e + 1 : e;
+                    }
+                    close(fd);
+                }
+            }
+            if (_hugePagesFileName[0] != '\0') {
+                _blockSize = std::max(_blockSize, _hugePageSize);
+                _hugePagesFd = open(_hugePagesFileName, O_CREAT | O_RDWR, 0755);
+                assert(_hugePagesFd >= 0);
+                int retval(unlink(_hugePagesFileName));
+                assert(retval == 0);
+                (void) retval;
+            }
+        }
+    }
+}
+
+MmapMemory::~MmapMemory()
+{
+    if (_hugePagesFd >= 0) {
+        close(_hugePagesFd);
+        _hugePagesOffset = 0;
+    }
+}
+
+void * MmapMemory::get(size_t len)
+{
+    void * memory(NULL);
+    memory = getHugePages(len);
+    if (memory ==NULL) {
+        memory = getNormalPages(len);
+    }
+    return memory;
+}
+
+void * MmapMemory::getHugePages(size_t len)
+{
+    void * memory(NULL);
+    if ( ((len & 0x1fffff) == 0) && len) {
+        memory = getBasePages(len, MAP_ANON | MAP_PRIVATE | MAP_HUGETLB, -1, 0);
+        if (memory == NULL) {
+            if (_hugePagesFd >= 0) {
+                memory = getBasePages(len, MAP_SHARED, _hugePagesFd, _hugePagesOffset);
+                if (memory) {
+                    _hugePagesOffset += len;
+                }
+            }
+        }
+    }
+    return memory;
+}
+
+void * MmapMemory::getNormalPages(size_t len)
+{
+    return getBasePages(len, MAP_ANON | MAP_PRIVATE, -1, 0);
+}
+
+void * MmapMemory::getBasePages(size_t len, int mmapOpt, int fd, size_t offset)
+{
+    char * wanted = reinterpret_cast<char *>(std::max(reinterpret_cast<size_t>(getEnd()), getMinPreferredStartAddress()));
+    void * mem(NULL);
+    for (bool ok(false) ; !ok && (mem != MAP_FAILED); wanted += getBlockAlignment()) {
+        if (mem != NULL) {
+            int tmp(munmap(mem, len));
+            assert(tmp == 0);
+            (void) tmp;
+            mem = NULL;
+        }
+        // no alignment to _blockSize needed?
+        // both 0x10000000000ul*4 and 0x200000 are multiples of the current block size.
+        mem = mmap(wanted, len, PROT_READ | PROT_WRITE, mmapOpt, fd, offset);
+        ok = (mem == wanted);
+    }
+    if (mem != MAP_FAILED) {
+        if (getStart() == NULL) {
+            setStart(mem);
+            // assumes len parameter is always multiple of the current block size.
+            setEnd(static_cast<char *>(mem)+len);
+        } else if (getEnd() < static_cast<char *>(mem)+len) {
+            setEnd(static_cast<char *>(mem)+len);
+        }
+        return mem;
+    }
+    return  NULL;
+}
+
+bool MmapMemory::release(void * mem, size_t len)
+{
+    int ret(0);
+    if (_useMAdvLimit <= len) {
+        ret = madvise(mem, len, MADV_DONTNEED);
+        if (ret != 0) {
+            char tmp[256];
+            fprintf(stderr, "madvise(%p, %0lx, MADV_DONTNEED) = %d errno=%s\n", mem, len, ret, strerror_r(errno, tmp, sizeof(tmp)));
+        }
+    }
+    return true;
+}
+
+bool MmapMemory::freeTail(void * mem, size_t len)
+{
+    int ret(0);
+    if ((_useMAdvLimit <= len) && (static_cast<char *>(mem) + len) == getEnd()) {
+        ret = munmap(mem, len);
+        assert(ret == 0);
+        setEnd(mem);
+    }
+    return (ret == 0);
+}
+
+bool MmapMemory::reclaim(void * mem, size_t len)
+{
+    int ret(0);
+    if (_useMAdvLimit <= len) {
+        ret = madvise(mem, len, MADV_NORMAL);
+        if (ret != 0) {
+            char tmp[256];
+            fprintf(stderr, "madvise(%p, %0lx, MADV_NORMAL) = %d errno=%s\n", mem, len, ret, strerror_r(errno, tmp, sizeof(tmp)));
+        }
+    }
+    return true;
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/util/osmem.h b/vespamalloc/src/vespamalloc/util/osmem.h
new file mode 100644
index 00000000000..f5c51c2000d
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/osmem.h
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+#include <string.h>
+#include <algorithm>
+
+namespace vespamalloc {
+
+class Memory
+{
+public:
+    Memory(size_t blockSize) : _blockSize(std::max(blockSize, size_t(getpagesize()))), _start(NULL), _end(NULL) { }
+    virtual ~Memory() { }
+    void * getStart() const  { return _start; }
+    void * getEnd()   const  { return _end; }
+    size_t getMinBlockSize() const { return _blockSize; }
+    static size_t getMinPreferredStartAddress() { return 0x10000000000; } // 1T
+    static size_t getBlockAlignment() { return 0x200000; } //2M
+protected:
+    void setStart(void * v) { _start = v; }
+    void setEnd(void * v)   { _end = v; }
+    size_t _blockSize;
+    void * _start;
+    void * _end;
+};
+
+class MmapMemory : public Memory
+{
+public:
+    MmapMemory(size_t blockSize);
+    virtual ~MmapMemory();
+    void *reserve(size_t & len);
+    void *get(size_t len);
+    bool release(void * mem, size_t len);
+    bool reclaim(void * mem, size_t len);
+    bool freeTail(void * mem, size_t len);
+private:
+    void * getHugePages(size_t len);
+    void * getNormalPages(size_t len);
+    void * getBasePages(size_t len, int mmapOpt, int fd, size_t offset);
+    void setupFAdvise();
+    void setupHugePages();
+    size_t   _useMAdvLimit;
+    int      _hugePagesFd;
+    size_t   _hugePagesOffset;
+    size_t   _hugePageSize;
+    char     _hugePagesFileName[256];
+};
+
+}
diff --git a/vespamalloc/src/vespamalloc/util/stream.cpp b/vespamalloc/src/vespamalloc/util/stream.cpp
new file mode 100644
index 00000000000..592d3a07709
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/stream.cpp
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <algorithm>
+#include "stream.h"
+
+namespace vespamalloc {
+
+asciistream::asciistream() :
+    _rPos(0),
+    _wPos(0),
+    _buffer(static_cast<char *>(malloc(1024))),
+    _sz(1024)
+{
+}
+
+asciistream::~asciistream()
+{
+    free(_buffer);
+    _buffer = NULL;
+}
+
+asciistream::asciistream(const asciistream & rhs) :
+    _rPos(0),
+    _wPos(rhs._wPos - rhs._rPos),
+    _buffer(static_cast<char *>(malloc(_wPos+1))),
+    _sz(_wPos)
+{
+    memcpy(_buffer, (rhs._buffer + rhs._rPos), _sz);
+    _buffer[_wPos] = 0;
+}
+
+asciistream & asciistream::operator = (const asciistream & rhs)
+{
+    if (this != &rhs) {
+        asciistream newStream(rhs);
+        swap(newStream);
+    }
+    return *this;
+}
+
+void asciistream::swap(asciistream & rhs)
+{
+    std::swap(_rPos, rhs._rPos);
+    std::swap(_wPos, rhs._wPos);
+    std::swap(_buffer, rhs._buffer);
+    std::swap(_sz, rhs._sz);
+}
+
+asciistream & asciistream::operator << (int32_t v)
+{
+    char tmp[16];
+    int len = snprintf(tmp, sizeof(tmp), "%d", v);
+    write(tmp, len);
+    return *this;
+}
+
+asciistream & asciistream::operator << (uint32_t v)
+{
+    char tmp[16];
+    int len = snprintf(tmp, sizeof(tmp), "%u", v);
+    write(tmp, len);
+    return *this;
+}
+
+asciistream & asciistream::operator << (int64_t v)
+{
+    char tmp[32];
+    int len = snprintf(tmp, sizeof(tmp), "%" PRId64, v);
+    write(tmp, len);
+    return *this;
+}
+
+asciistream & asciistream::operator << (uint64_t v)
+{
+    char tmp[32];
+    int len = snprintf(tmp, sizeof(tmp), "%" PRIu64, v);
+    write(tmp, len);
+    return *this;
+}
+
+asciistream & asciistream::operator << (float v)
+{
+    char tmp[64];
+    int len = snprintf(tmp, sizeof(tmp), "%g", v);
+    write(tmp, len);
+    return *this;
+}
+
+asciistream & asciistream::operator << (double v)
+{
+    char tmp[64];
+    int len = snprintf(tmp, sizeof(tmp), "%g", v);
+    write(tmp, len);
+    return *this;
+}
+
+void asciistream::write(const void * buf, size_t len)
+{
+    if (_rPos == _wPos) {
+        _rPos = _wPos = 0;
+    }
+    if ((_sz - _wPos) < len + 1) {
+        _buffer = static_cast<char *>(realloc(_buffer, _sz * 2 + len));
+        _sz = _sz * 2 + len + 1;
+    }
+    memcpy(_buffer + _wPos, buf, len);
+    _wPos += len;
+    _buffer[_wPos] = 0;
+}
+
+size_t asciistream::read(void * buf, size_t len)
+{
+    size_t available = _wPos - _rPos;
+    size_t toRead(std::min(len, available));
+    memcpy(buf, _buffer+_rPos, toRead);
+    _rPos += toRead;
+    return toRead;
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/util/stream.h b/vespamalloc/src/vespamalloc/util/stream.h
new file mode 100644
index 00000000000..175ae5de00f
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/stream.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <string>
+
+namespace vespamalloc {
+
+class asciistream
+{
+public:
+    asciistream();
+    ~asciistream();
+    asciistream(const asciistream & rhs);
+    asciistream & operator = (const asciistream & rhs);
+    void swap(asciistream & rhs);
+    asciistream & operator << (char v)                { write(&v, 1); return *this; }
+    asciistream & operator << (unsigned char v)       { write(&v, 1); return *this; }
+    asciistream & operator << (const char * v)        { if (v != NULL) { write(v, strlen(v)); } return *this; }
+    asciistream & operator << (int32_t v);
+    asciistream & operator << (uint32_t v);
+    asciistream & operator << (int64_t v);
+    asciistream & operator << (uint64_t v);
+    asciistream & operator << (float v);
+    asciistream & operator << (double v);
+    const char * c_str() const { return _buffer + _rPos; }
+    size_t        size() const { return _wPos - _rPos; }
+    size_t    capacity() const { return _sz; }
+private:
+    void write(const void * buf, size_t len);
+    size_t read(void * buf, size_t len);
+    size_t _rPos;
+    size_t _wPos;
+    char * _buffer;
+    size_t _sz;
+};
+
+class string : public asciistream
+{
+public:
+    string(const char * v = NULL) : asciistream() { *this << v; }
+    string & operator += (const char * v) { *this << v; return *this; }
+    string & operator += (const asciistream & v) { *this << v.c_str(); return *this; }
+};
+
+}
+
diff --git a/vespamalloc/src/vespamalloc/util/traceutil.cpp b/vespamalloc/src/vespamalloc/util/traceutil.cpp
new file mode 100644
index 00000000000..094e1632228
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/traceutil.cpp
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespamalloc/util/traceutil.h>
+#include <algorithm>
+
+namespace vespamalloc {
+
+Aggregator::Aggregator()
+{
+}
+
+Aggregator::~Aggregator()
+{
+}
+
+struct CmpGraph
+{
+    bool operator () (const std::pair<size_t, string> & a, const std::pair<size_t, string> & b) const {
+        return a.first < b.first;
+    }
+};
+
+asciistream & operator << (asciistream & os, const Aggregator & v)
+{
+    Aggregator::Map map(v._map);
+    std::sort(map.begin(), map.end(), CmpGraph());
+    for (Aggregator::Map::const_iterator it=map.begin(); it != map.end(); it++) {
+        os << it->first << " : " << it->second.c_str() << '\n';
+    }
+    return os;
+}
+
+}
diff --git a/vespamalloc/src/vespamalloc/util/traceutil.h b/vespamalloc/src/vespamalloc/util/traceutil.h
new file mode 100644
index 00000000000..23bb7036e72
--- /dev/null
+++ b/vespamalloc/src/vespamalloc/util/traceutil.h
@@ -0,0 +1,81 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <dlfcn.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <vector>
+#include <vespamalloc/util/index.h>
+#include <vespamalloc/util/callstack.h>
+#include <vespamalloc/util/callgraph.h>
+
+
+namespace vespamalloc {
+
+typedef StackEntry<StackReturnEntry> StackElem;
+typedef CallGraph<StackElem, 0x10000, Index> CallGraphT;
+
+class Aggregator
+{
+public:
+    Aggregator();
+    ~Aggregator();
+    void push_back(size_t num, const string & s) { _map.push_back(Map::value_type(num, s)); }
+    friend asciistream & operator << (asciistream & os, const Aggregator & v);
+private:
+    typedef std::vector< std::pair<size_t, string> > Map;
+    Map _map;
+};
+
+
+template<typename N>
+class DumpGraph
+{
+public:
+    DumpGraph(Aggregator * aggregator, const char * s="{ ", const char * end=" }") __attribute__ ((noinline));
+    ~DumpGraph() __attribute__ ((noinline));
+    void handle(const N & node) __attribute__ ((noinline));
+private:
+    string       _string;
+    string       _endString;
+    size_t       _sum;
+    size_t       _min;
+    Aggregator * _aggregator;
+};
+
+asciistream & operator << (asciistream & os, const Aggregator & v);
+
+template<typename N>
+DumpGraph<N>::DumpGraph(Aggregator * aggregator, const char * start, const char * end) :
+    _string(start),
+    _endString(end),
+    _sum(0),
+    _min(-1),
+    _aggregator(aggregator)
+{
+}
+
+template<typename N>
+DumpGraph<N>::~DumpGraph()
+{
+}
+
+template<typename N>
+void DumpGraph<N>::handle(const N & node)
+{
+    _sum += node.count();
+    if (node.count() < _min) {
+        _min = node.count();
+    }
+    asciistream os;
+    os << ' ' << node;
+    _string += os.c_str();
+    if (node.callers() == NULL) {
+        _string += _endString;
+        _aggregator->push_back(_min, _string);
+    }
+}
+
+}
+