diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-01-10 11:28:48 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-01-10 16:14:24 +0000 |
commit | f5d18ea3a6dcbe815986cc9413eb93b6d0804ab1 (patch) | |
tree | b389b7bacdf3c6c04ffe8619913469fae41e31eb /vespalib | |
parent | 1402b8c5ac7cf608bcbdabb46873d6a3308ab69d (diff) |
Add a stresstest for verifying memory not changing under your feet.
Both verify heap memory, and stress file backed swap like memory.
Diffstat (limited to 'vespalib')
5 files changed, 266 insertions, 3 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index a3d5054973f..2ceb56bf226 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -21,6 +21,7 @@ vespa_define_module( src/apps/vespa-drop-file-from-cache src/apps/vespa-probe-io-uring src/apps/vespa-resource-limits + src/apps/vespa-stress-and-validate-memory src/apps/vespa-tsan-digest src/apps/vespa-validate-hostname diff --git a/vespalib/src/apps/vespa-stress-and-validate-memory/.gitignore b/vespalib/src/apps/vespa-stress-and-validate-memory/.gitignore new file mode 100644 index 00000000000..77cf05d77d5 --- /dev/null +++ b/vespalib/src/apps/vespa-stress-and-validate-memory/.gitignore @@ -0,0 +1 @@ +vespa-stress-and-validate-memory diff --git a/vespalib/src/apps/vespa-stress-and-validate-memory/CMakeLists.txt b/vespalib/src/apps/vespa-stress-and-validate-memory/CMakeLists.txt new file mode 100644 index 00000000000..17ea9d709df --- /dev/null +++ b/vespalib/src/apps/vespa-stress-and-validate-memory/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_stress-and-validate-memory_app + SOURCES + stress_and_validate_memory.cpp + OUTPUT_NAME vespa-stress-and-validate-memory + INSTALL bin + DEPENDS + vespalib +) diff --git a/vespalib/src/apps/vespa-stress-and-validate-memory/stress_and_validate_memory.cpp b/vespalib/src/apps/vespa-stress-and-validate-memory/stress_and_validate_memory.cpp new file mode 100644 index 00000000000..a1e7bbd040f --- /dev/null +++ b/vespalib/src/apps/vespa-stress-and-validate-memory/stress_and_validate_memory.cpp @@ -0,0 +1,252 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/util/mmap_file_allocator.h> +#include <vespa/vespalib/util/size_literals.h> +#include <vespa/vespalib/util/time.h> +#include <thread> +#include <vector> +#include <atomic> +#include <cstring> +#include <mutex> +#include <filesystem> +#include <iostream> + +std::atomic<bool> stopped = false; +std::mutex log_mutex; +using namespace vespalib; + +const char * description = + "Runs stress test of memory by slowly growing a heap filled with 0.\n" + "Each core on the node will then continously read back and verify random memory sections still being zero.\n" + "-h heap_in_GB(1) and -t run_time_in_seconds(10) are the options available.\n" + "Memory will grow slowly during the first half of the test and then stay put.\n" + "There is also the option to include stress testing of swap files by using -s <directory>.\n" + "The swap will grow to twice the heap size in the same manner.\n" + "Swap memory is stressed by constant random writing from all cores.\n"; + +class Config { +public: + Config(size_t heap_size, size_t nprocs, size_t allocs_per_thread, duration alloc_time) + : _heap_size(heap_size), + _nprocs(nprocs), + _allocs_per_thread(allocs_per_thread), + _alloc_time(alloc_time) + {} + size_t allocs_per_thread() const { return _allocs_per_thread; } + duration alloc_time() const { return _alloc_time; } + size_t alloc_size() const { return _heap_size / _nprocs / _allocs_per_thread; } + size_t nprocs() const { return _nprocs; } + size_t heap_size() const { return _heap_size; } +private: + const size_t _heap_size; + const size_t _nprocs; + const size_t _allocs_per_thread; + const duration _alloc_time; +}; + +class Allocations { +public: + Allocations(const Config & config); + ~Allocations(); + size_t make_and_load_alloc_per_thread(); + size_t verify_random_allocation() const; + const Config & cfg() const { return _cfg; } + size_t num_errors() const { + std::lock_guard guard(_mutex); + return _total_errors; + } +private: + const Config & _cfg; + mutable std::mutex _mutex; + mutable size_t _total_errors; + std::vector<std::unique_ptr<char[]>> _allocations; +}; + +Allocations::Allocations(const Config & config) + : _cfg(config), + _mutex(), + _total_errors(0), + _allocations() +{ + _allocations.reserve(config.nprocs() * config.allocs_per_thread()); + std::cout << "Starting memory stress with " << config.nprocs() << " threads and heap size " << (config.heap_size()/1_Mi) << " mb. Allocation size = " << config.alloc_size() << std::endl; +} + +Allocations::~Allocations() = default; + +size_t +Allocations::make_and_load_alloc_per_thread() { + auto alloc = std::make_unique<char[]>(cfg().alloc_size()); + memset(alloc.get(), 0, cfg().alloc_size()); + std::lock_guard guard(_mutex); + _allocations.push_back(std::move(alloc)); + return 1; +} + +size_t +Allocations::verify_random_allocation() const { + const char * alloc; + { + std::lock_guard guard(_mutex); + alloc = _allocations[std::rand() % _allocations.size()].get(); + } + size_t error_count = 0; + for (size_t i = 0; i < cfg().alloc_size(); i++) { + if (alloc[i] != 0) { + error_count++; + std::lock_guard guard(log_mutex); + std::cout << "Tread " << std::this_thread::get_id() << ": Unexpected byte(" << std::hex << int(alloc[i]) << ") at " << (alloc + i) << std::endl; + } + } + std::lock_guard guard(_mutex); + _total_errors += error_count; + return error_count; +} + +class FileBackedMemory { +public: + FileBackedMemory(const Config & config, std::string dir); + ~FileBackedMemory(); + const Config & cfg() const { return _cfg; } + size_t make_and_load_alloc_per_thread(); + void random_write(); +private: + using PtrAndSize = std::pair<void *, size_t>; + const Config & _cfg; + mutable std::mutex _mutex; + alloc::MmapFileAllocator _allocator; + std::vector<PtrAndSize> _allocations; +}; + +FileBackedMemory::FileBackedMemory(const Config & config, std::string dir) + : _cfg(config), + _mutex(), + _allocator(dir), + _allocations() +{ + _allocations.reserve(config.nprocs() * config.allocs_per_thread()); + std::cout << "Starting mmapped stress in '" << dir << "' with " << config.nprocs() << " threads and heap size " << (config.heap_size()/1_Mi) << " mb. Allocation size = " << config.alloc_size() << std::endl; +} + +FileBackedMemory::~FileBackedMemory() { + std::lock_guard guard(_mutex); + for (auto ptrAndSize : _allocations) { + _allocator.free(ptrAndSize); + } +} + + +size_t +FileBackedMemory::make_and_load_alloc_per_thread() { + PtrAndSize alloc; + { + std::lock_guard guard(_mutex); + alloc = _allocator.alloc(cfg().alloc_size()); + } + memset(alloc.first, 0, cfg().alloc_size()); + std::lock_guard guard(_mutex); + _allocations.push_back(std::move(alloc)); + return 1; +} + +void +FileBackedMemory::random_write() { + PtrAndSize ptrAndSize; + { + std::lock_guard guard(_mutex); + ptrAndSize = _allocations[std::rand() % _allocations.size()]; + } + memset(ptrAndSize.first, std::rand()%256, ptrAndSize.second); +} + +void +stress_and_validate_heap(Allocations *allocs) { + size_t num_verifications = 0; + size_t num_errors = 0; + size_t num_allocs = allocs->make_and_load_alloc_per_thread(); + const size_t max_allocs = allocs->cfg().allocs_per_thread(); + const double alloc_time = to_s(allocs->cfg().alloc_time()); + steady_time start = steady_clock::now(); + for (;!stopped; num_verifications++) { + num_errors += allocs->verify_random_allocation(); + double ratio = to_s(steady_clock::now() - start) / alloc_time; + if (num_allocs < std::min(size_t(ratio*max_allocs), max_allocs)) { + num_allocs += allocs->make_and_load_alloc_per_thread(); + } + } + std::lock_guard guard(log_mutex); + std::cout << "Thread " << std::this_thread::get_id() << ": Completed " << num_verifications << " verifications with " << num_errors << std::endl; +} + +void +stress_file_backed_memory(FileBackedMemory * mmapped) { + size_t num_writes = 0; + size_t num_allocs = mmapped->make_and_load_alloc_per_thread(); + const size_t max_allocs = mmapped->cfg().allocs_per_thread(); + const double alloc_time = to_s(mmapped->cfg().alloc_time()); + steady_time start = steady_clock::now(); + for (;!stopped; num_writes++) { + mmapped->random_write(); + double ratio = to_s(steady_clock::now() - start) / alloc_time; + if (num_allocs < std::min(size_t(ratio*max_allocs), max_allocs)) { + num_allocs += mmapped->make_and_load_alloc_per_thread(); + } + } + std::lock_guard guard(log_mutex); + std::cout << "Thread " << std::this_thread::get_id() << ": Completed " << num_writes << " writes" << std::endl; +} + +int +main(int argc, char *argv[]) { + size_t heapSize = 1_Gi; + duration runTime = 10s; + std::string swap_dir; + std::cout << description << std::endl; + for (int i = 1; i+2 <= argc; i+=2) { + char option = argv[i][strlen(argv[i]) - 1]; + char *arg = argv[i+1]; + switch (option) { + case 'h': heapSize = atof(arg) * 1_Gi; + break; + case 's': swap_dir = arg; + break; + case 't': runTime = from_s(atof(arg)); + break; + default: + std::cerr << "Option " << option << " not in allowed set [h,s,t]" << std::endl; + break; + } + } + size_t nprocs = std::thread::hardware_concurrency(); + size_t allocations_per_thread = 1024; + + Config cfgHeap(heapSize, nprocs, allocations_per_thread, runTime/2); + Config cfgFile(heapSize*2, nprocs, allocations_per_thread, runTime/2); + Allocations allocations(cfgHeap); + std::unique_ptr<FileBackedMemory> filebackedMemory; + + std::vector<std::thread> heapValidators; + heapValidators.reserve(nprocs*2); + for (unsigned int i = 0; i < nprocs; i++) { + heapValidators.emplace_back(stress_and_validate_heap, &allocations); + } + if ( ! swap_dir.empty()) { + std::filesystem::create_directories(swap_dir); + filebackedMemory = std::make_unique<FileBackedMemory>(cfgFile, swap_dir); + for (unsigned int i = 0; i < nprocs; i++) { + heapValidators.emplace_back(stress_file_backed_memory, filebackedMemory.get()); + } + } + std::cout << "Running memory stresstest for " << to_s(runTime) << " seconds" << std::endl; + steady_time eot = steady_clock::now() + runTime; + while (steady_clock::now() < eot) { + std::this_thread::sleep_for(1s); + } + stopped = true; + for (auto & th : heapValidators) { + th.join(); + } + heapValidators.clear(); + std::cout << "Completed stresstest with " << allocations.num_errors() << " errors" << std::endl; + return 0; +} diff --git a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.h b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.h index 3b7b0039fab..0a83bfb4e60 100644 --- a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.h +++ b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.h @@ -13,7 +13,7 @@ namespace vespalib::alloc { /* * Class handling memory allocations backed by one or more files. - * Not reentant. Should not be destructed before all allocations + * Not reentrant or thread safe. Should not be destructed before all allocations * have been freed. */ class MmapFileAllocator : public MemoryAllocator { @@ -30,8 +30,8 @@ class MmapFileAllocator : public MemoryAllocator { { } }; - vespalib::string _dir_name; - mutable File _file; + const vespalib::string _dir_name; + mutable File _file; mutable uint64_t _end_offset; mutable hash_map<void *, SizeAndOffset> _allocations; mutable FileAreaFreeList _freelist; |