From f58d34fe284e063a3dee5e4bfc7f7be47b9551ab Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 20 Jun 2021 21:05:11 +0800 Subject: [PATCH] Detect L1 cache size at compile time Common cache line sizes are 32, 64 and 128 bytes. On x86_64 the standard cache line size is 64B. Even though this is not architecturally required, all the x86_64implementations stick to it. Some AArch64 processors also follow the x86_64 style with 64B cachelines. However, on Apple M1 devices, the underlying hardware is using a 128B cache line size. Quote from Apple Developer documentation [1]: "Some features of Apple silicon are decidedly different than those of Intel-based Mac computers, and may impact your code if you don't fetch them dynamically. These features include: * Cache line sizes are different. Fetch the hw.cachelinesize setting using sysctl." M1 cache lines are double of what is commonly used by x86_64 and other Arm implementation. The cache line sizes for Arm depend on implementations, not architectures. For example, TI AM57x (Cortex-A15) uses 64B cache line while TI AM437x (Cortex-A9) uses 32B cache line. And, there are even Arm implementations with cache line sizes configurable at boot time. This patch attempts to detect L1 cache size at compile time. For Aarch64 hosts, the build process would collect system information and determine L1 cache line size. At present, both macOS and Linux are supported. For Arm targets, the software packages are usually cross-compiled, and developers should specify the appropriate MI_CACHE_LINE setting in advance. 64B is the default cache line size if none of the above is able to set. [1] https://developer.apple.com/documentation/apple-silicon/addressing-architectural-differences-in-your-macos-code --- CMakeLists.txt | 24 ++++++++++++++++++++++++ include/mimalloc-internal.h | 25 ++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b56953c43..69bd80398 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,6 +203,30 @@ else() endif() endif() +# ----------------------------------------------------------------------------- +# Cache line size detection +# ----------------------------------------------------------------------------- +if (CMAKE_CROSSCOMPILING) + message(STATUS "Cross-compiling - cache line size detection disabled") +else() + if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*)") + if (APPLE) + execute_process(COMMAND sysctl -n hw.cachelinesize + OUTPUT_VARIABLE L1_DCACHE_LINE_SIZE + OUTPUT_STRIP_TRAILING_WHITESPACE) + else() + if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + execute_process(COMMAND getconf LEVEL1_DCACHE_LINESIZE + OUTPUT_VARIABLE L1_DCACHE_LINE_SIZE + OUTPUT_STRIP_TRAILING_WHITESPACE) + endif() + endif() + endif() +endif() +if (L1_DCACHE_LINE_SIZE) + list(APPEND mi_defines MI_CACHE_LINE=${L1_DCACHE_LINE_SIZE}) +endif() + # ----------------------------------------------------------------------------- # Install and output names # ----------------------------------------------------------------------------- diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 1e1a79665..f5d7e59a7 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -16,7 +16,30 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_trace_message(...) #endif -#define MI_CACHE_LINE 64 +// Determine system L1 cache line size at compile time for purposes of alignment. +#ifndef MI_CACHE_LINE +#if defined(__i386__) || defined(__x86_64__) +#define MI_CACHE_LINE 64 +#elif defined(__aarch64__) +// FIXME: read special register ctr_el0 to get L1 dcache size. +#define MI_CACHE_LINE 64 +#elif defined(__arm__) +// The cache line sizes for Arm depend on implementations, not architectures. +// There are even implementations with cache line sizes configurable at boot time. +#if __ARM_ARCH__ == 7 +#define MI_CACHE_LINE 64 +#else +// TODO: list known Arm implementations +#define MI_CACHE_LINE 32 +#endif +#endif +#endif + +#ifndef MI_CACHE_LINE +// A reasonable default value +#define MI_CACHE_LINE 64 +#endif + #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #define mi_decl_noinline __declspec(noinline)