Skip to content

Commit

Permalink
Make: Inline ASM for detecting CPU features on ARM
Browse files Browse the repository at this point in the history
Closes #143
  • Loading branch information
GoWind authored Dec 7, 2024
1 parent 715ad10 commit 0ee549a
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
49 changes: 38 additions & 11 deletions c/lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,43 @@ extern void *malloc(size_t length);
#endif
#endif

// On Apple Silicon, `mrs` is not allowed in user-space, so we need to use the `sysctl` API.
#if defined(__APPLE__) && defined(__MACH__)
#define SZ_APPLE 1
#include <sys/sysctl.h>
#endif

#if defined(__linux__)
#define SZ_LINUX 1
#endif

SZ_INTERNAL sz_capability_t sz_capabilities_arm(void) {
// https://github.com/ashvardanian/SimSIMD/blob/28e536083602f85ad0c59456782c8864463ffb0e/include/simsimd/simsimd.h#L434
// for documentation on how we detect capabilities across different ARM platforms.
#if defined(SZ_APPLE)

// On Apple Silicon, `mrs` is not allowed in user-space, so we need to use the `sysctl` API.
uint32_t supports_neon = 0;
size_t size = sizeof(supports_neon);
if (sysctlbyname("hw.optional.neon", &supports_neon, &size, NULL, 0) != 0) supports_neon = 0;

return (sz_capability_t)( //
(sz_cap_arm_neon_k * (supports_neon)) | //
(sz_cap_serial_k));

#elif defined(SZ_LINUX)
unsigned supports_neon = 1; // NEON is always supported
__asm__ __volatile__("mrs %0, ID_AA64PFR0_EL1" : "=r"(id_aa64pfr0_el1));
unsigned supports_sve = ((id_aa64pfr0_el1 >> 32) & 0xF) >= 1;
return (sz_capability_t)( //
(sz_cap_neon_k * (supports_neon)) | //
(sz_cap_sve_k * (supports_sve)) | //
(sz_cap_serial_k));
#else // SIMSIMD_DEFINED_LINUX
return sz_cap_serial_k;
#endif
}

SZ_DYNAMIC sz_capability_t sz_capabilities(void) {

#if SZ_USE_X86_AVX512 || SZ_USE_X86_AVX2
Expand Down Expand Up @@ -96,22 +133,12 @@ SZ_DYNAMIC sz_capability_t sz_capabilities(void) {

#if SZ_USE_ARM_NEON || SZ_USE_ARM_SVE

// Every 64-bit Arm CPU supports NEON
unsigned supports_neon = 1;
unsigned supports_sve = 0;
unsigned supports_sve2 = 0;
sz_unused(supports_sve);
sz_unused(supports_sve2);

return (sz_capability_t)( //
(sz_cap_arm_neon_k * supports_neon) | //
(sz_cap_serial_k));
return sz_capabilities_arm();

#endif // SIMSIMD_TARGET_ARM

return sz_cap_serial_k;
}

typedef struct sz_implementations_t {
sz_equal_t equal;
sz_order_t order;
Expand Down
5 changes: 2 additions & 3 deletions include/stringzilla/stringzilla.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,16 +260,15 @@ typedef enum sz_capability_t {

sz_cap_arm_neon_k = 1 << 10, /// ARM NEON capability
sz_cap_arm_sve_k = 1 << 11, /// ARM SVE capability TODO: Not yet supported or used

sz_cap_arm_sve2_k = 1 << 12,
sz_cap_arm_sve2p1_k = 1 << 13,
sz_cap_x86_avx2_k = 1 << 20, /// x86 AVX2 capability
sz_cap_x86_avx512f_k = 1 << 21, /// x86 AVX512 F capability
sz_cap_x86_avx512bw_k = 1 << 22, /// x86 AVX512 BW instruction capability
sz_cap_x86_avx512vl_k = 1 << 23, /// x86 AVX512 VL instruction capability
sz_cap_x86_avx512vbmi_k = 1 << 24, /// x86 AVX512 VBMI instruction capability
sz_cap_x86_gfni_k = 1 << 25, /// x86 AVX512 GFNI instruction capability

sz_cap_x86_avx512vbmi2_k = 1 << 26, /// x86 AVX512 VBMI 2 instruction capability

} sz_capability_t;

/**
Expand Down

0 comments on commit 0ee549a

Please sign in to comment.