diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-05-12 15:15:06 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2022-05-13 10:59:35 +0000 |
commit | 84b48df485ad9af0c569c53aab7c78753efa5906 (patch) | |
tree | 9fb39e4a55747176547fdb40f7a8bf006de0d151 /vespamalloc | |
parent | 1df3b3c59251bd4fd1b099ae5cfb4c280313e76d (diff) |
Add experimental library for loading code into huge pages.
Diffstat (limited to 'vespamalloc')
-rw-r--r-- | vespamalloc/src/vespamalloc/CMakeLists.txt | 7 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/CMakeLists.txt | 5 | ||||
-rw-r--r-- | vespamalloc/src/vespamalloc/malloc/load_as_huge.cpp | 129 |
3 files changed, 141 insertions, 0 deletions
diff --git a/vespamalloc/src/vespamalloc/CMakeLists.txt b/vespamalloc/src/vespamalloc/CMakeLists.txt index 8ee0f150fb1..2bbf9e59fec 100644 --- a/vespamalloc/src/vespamalloc/CMakeLists.txt +++ b/vespamalloc/src/vespamalloc/CMakeLists.txt @@ -42,3 +42,10 @@ vespa_add_library(vespammap DEPENDS dl ) +vespa_add_library(vespa_load_as_huge + SOURCES + $<TARGET_OBJECTS:vespamalloc_load_as_huge> + INSTALL lib64/vespa/malloc + DEPENDS + dl +) diff --git a/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt b/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt index 985cd9948ad..5f50cd97aee 100644 --- a/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt +++ b/vespamalloc/src/vespamalloc/malloc/CMakeLists.txt @@ -69,3 +69,8 @@ vespa_add_library(vespamalloc_mmap OBJECT mmap.cpp DEPENDS ) +vespa_add_library(vespamalloc_load_as_huge OBJECT + SOURCES + load_as_huge.cpp + DEPENDS +) diff --git a/vespamalloc/src/vespamalloc/malloc/load_as_huge.cpp b/vespamalloc/src/vespamalloc/malloc/load_as_huge.cpp new file mode 100644 index 00000000000..d5e99b5c2dc --- /dev/null +++ b/vespamalloc/src/vespamalloc/malloc/load_as_huge.cpp @@ -0,0 +1,129 @@ +#include <cstdio> +#include <cerrno> +#include <cassert> +#include <cstring> +#include <link.h> +#include <sys/mman.h> + +/** + * This is experimental code that will map code segments in binary and dso into + * anonymous mappings prefering huge page mappings. + */ +namespace { + +constexpr size_t HUGEPAGE_SIZE = 0x200000; + +void * +mmap_huge(size_t sz) { + assert ((sz % HUGEPAGE_SIZE) == 0); + void * mem = mmap(nullptr, sz, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + assert(mem != MAP_FAILED); + int retval = madvise(mem, sz, MADV_HUGEPAGE); + assert(retval == 0); + return mem; +} + +size_t round_huge_down(size_t v) { return v & ~(HUGEPAGE_SIZE - 1); } +size_t round_huge_up(size_t v) { return round_huge_down(v + (HUGEPAGE_SIZE - 1)); } + +void +non_optimized_non_inlined_memcpy(void *dest_in, const void *src_in, size_t n) __attribute__((noinline, optimize(1))) ; + +// Simple memcpy replacement to avoid calling code in other dso. +void +non_optimized_non_inlined_memcpy(void *dest_in, const void *src_in, size_t n) { + char *dest = static_cast<char *>(dest_in); + const char *src = static_cast<const char *>(src_in); + for (size_t i(0); i < n ; i++) { + dest[i] = src[i]; + } +} + +/** + * Make a large mapping if code is larger than HUGEPAGE_SIZE and copies the content of the various segments. + * Then remaps the areas back to its original location. + */ +bool +remap_segments(size_t base_vaddr, const Elf64_Phdr * segments, size_t count) { + assert(count > 0); + const Elf64_Phdr & first = segments[0]; + const Elf64_Phdr & last = segments[count - 1]; + size_t start_vaddr = base_vaddr + first.p_vaddr; + size_t end_vaddr = base_vaddr + last.p_vaddr + last.p_memsz; + if (end_vaddr - start_vaddr < HUGEPAGE_SIZE) { + return false; + } + + size_t huge_start = round_huge_down(start_vaddr); + size_t huge_end = round_huge_up(end_vaddr); + size_t huge_size = huge_end - huge_start; + char * new_huge = static_cast<char *>(mmap_huge(huge_size)); + char * new_huge_end = new_huge + huge_size; + char * last_end = new_huge; + for (size_t i(0); i < count; i++) { + size_t vaddr = base_vaddr + segments[i].p_vaddr; + size_t huge_offset = vaddr - huge_start; + char * dest = new_huge + huge_offset; + assert(dest >= last_end); + if (dest > last_end) { + int munmap_retval = munmap(last_end, dest - last_end); + assert(munmap_retval == 0); + } + size_t sz = segments[i].p_memsz; + last_end = dest + sz; + + int madvise_retval = madvise(dest, sz, MADV_HUGEPAGE); + assert(madvise_retval == 0); + non_optimized_non_inlined_memcpy(dest, reinterpret_cast<void*>(vaddr), sz); + int prot = PROT_READ; + if (segments[i].p_flags & PF_X) prot|= PROT_EXEC; + if (segments[i].p_flags & PF_W) prot|= PROT_WRITE; + int mprotect_retval = mprotect(dest, sz, prot); + if (mprotect_retval != 0) { + fprintf(stderr, "mprotect(%p, %ld, %x) FAILED = %d, errno= %d = %s\n", dest, sz, prot, mprotect_retval, errno, strerror(errno)); + } + void * remapped = mremap(dest, sz, sz, MREMAP_FIXED | MREMAP_MAYMOVE, vaddr); + assert(remapped != MAP_FAILED); + assert(remapped == reinterpret_cast<void *>(vaddr)); + fprintf(stderr, "remapped dest=%p, size=%lu to %p\n", dest, sz, remapped); + } + assert(new_huge_end >= last_end); + if (new_huge_end > last_end) { + int munmap_retval = munmap(last_end, new_huge_end - last_end); + assert(munmap_retval); + } + return true; +} + +int +remapElfHeader(struct dl_phdr_info *info, size_t info_size, void *data) { + (void) info_size; + (void) data; + fprintf(stderr, "processing elf header '%s' with %d entries, start=%lx\n", + info->dlpi_name, info->dlpi_phnum, info->dlpi_addr); + for (int i = 0; i < info->dlpi_phnum; i++) { + const Elf64_Phdr &phdr = info->dlpi_phdr[i]; + //fprintf(stderr, "p_vaddr=%lx p_paddr=%lx, p_offset=%lx p_filesz=%lx, p_memsz=%lx, allign=%lu type=%d flags=%x\n", + // phdr.p_vaddr, phdr.p_paddr, phdr.p_offset, phdr.p_filesz, phdr.p_memsz, phdr.p_align, phdr.p_type, phdr.p_flags); + if ((phdr.p_type == PT_LOAD) && (phdr.p_flags == (PF_R | PF_X))) { + //void *vaddr = reinterpret_cast<void *>(info->dlpi_addr + phdr.p_vaddr); + //uint64_t size = phdr.p_filesz; + //fprintf(stderr, "LOAD_RX: vaddr=%lx p_filesz=%lu, p_memsz=%lu\n", phdr.p_vaddr, phdr.p_filesz, phdr.p_memsz); + remap_segments(info->dlpi_addr, &phdr, 1); + } + } + return 0; +} + +} + +extern "C" int remapTextWithHugePages(); + +int +remapTextWithHugePages() { + int retval = dl_iterate_phdr(remapElfHeader, nullptr); + fprintf(stderr, "dl_iterate_phdr() = %d\n", retval); + return retval; +} + +static long num_huge_code_pages = remapTextWithHugePages(); |