From a92dc25fb3a257ad418da2d1227b092db49bc2b4 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 29 Sep 2023 08:08:29 -0500 Subject: [PATCH 01/21] Fix Makefile.power for xlf --- Makefile.power | 4 ++++ f_check | 3 +++ 2 files changed, 7 insertions(+) diff --git a/Makefile.power b/Makefile.power index 33702c9326..46afb2d4aa 100644 --- a/Makefile.power +++ b/Makefile.power @@ -70,8 +70,12 @@ else FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math endif else +ifeq ($(F_COMPILER), IBM) +FCOMMON_OPT += -O2 -qrecur -qnosave +else FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math endif +endif else FCOMMON_OPT += -O2 -Mrecursive endif diff --git a/f_check b/f_check index f30231bc44..31f4376d0d 100755 --- a/f_check +++ b/f_check @@ -117,6 +117,9 @@ else vendor=PGI openmp='-mp' ;; + *xlf*) + vendor=IBM + ;; *) vendor=G77 openmp='' From a69367c43b28e2e0029d42092e791415565fe804 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 29 Sep 2023 09:29:41 -0500 Subject: [PATCH 02/21] Fix Makefile.system for OpenXL. --- Makefile.system | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.system b/Makefile.system index ae6db40b0e..af840f029a 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1170,6 +1170,8 @@ CCOMMON_OPT += -DF_INTERFACE_IBM FEXTRALIB += -lxlf90 ifeq ($(C_COMPILER), GCC) FCOMMON_OPT += -qextname +else ifeq ($(C_COMPILER), CLANG) +FCOMMON_OPT += -qextname endif # FCOMMON_OPT += -qarch=440 ifdef BINARY64 From 3b1150fcee164922ed932c7d46b28a8ffec744a8 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:00:48 -0500 Subject: [PATCH 03/21] Fix CPU identification to work on AIX. --- driver/others/dynamic_power.c | 216 +++++++++------------------------- 1 file changed, 58 insertions(+), 158 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 2847ea9ae5..7f8bfd5b98 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -20,12 +20,10 @@ static char *corename[] = { "POWER10" }; -#define NUM_CORETYPES 4 +#define NUM_CORETYPES 5 char *gotoblas_corename(void) { -#ifndef C_PGI if (gotoblas == &gotoblas_POWER6) return corename[1]; -#endif if (gotoblas == &gotoblas_POWER8) return corename[2]; #if (!defined __GNUC__) || ( __GNUC__ >= 6) if (gotoblas == &gotoblas_POWER9) return corename[3]; @@ -36,177 +34,81 @@ char *gotoblas_corename(void) { return corename[0]; } -#if defined(__clang__) -static int __builtin_cpu_supports(char* arg) -{ - return 0; -} -#endif - -#if defined(C_PGI) || defined(__clang__) -/* - * NV HPC compilers do not yet implement __builtin_cpu_is(). - * Fake a version here for use in the CPU detection code below. - * - * Strategy here is to first check the CPU to see what it actually is, - * and then test the input to see if what the CPU actually is matches - * what was requested. - */ +#ifdef _AIX +#include -#include - -/* - * Define POWER processor version table. - * - * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time - */ - -#define CPU_UNKNOWN 0 -#define CPU_POWER5 5 -#define CPU_POWER6 6 -#define CPU_POWER8 8 -#define CPU_POWER9 9 +#define CPU_UNKNOWN 0 +#define CPU_POWER6 6 +#define CPU_POWER7 7 +#define CPU_POWER8 8 +#define CPU_POWER9 9 #define CPU_POWER10 10 -static struct { - uint32_t pvr_mask; - uint32_t pvr_value; - const char* cpu_name; - uint32_t cpu_type; -} pvrPOWER [] = { - - { /* POWER6 in P5+ mode; 2.04-compliant processor */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000001, - .cpu_name = "POWER5+", - .cpu_type = CPU_POWER5, - }, - - { /* Power6 aka POWER6X*/ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003e0000, - .cpu_name = "POWER6 (raw)", - .cpu_type = CPU_POWER6, - }, - - { /* Power7 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003f0000, - .cpu_name = "POWER7 (raw)", - .cpu_type = CPU_POWER6, - }, - - { /* Power7+ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004A0000, - .cpu_name = "POWER7+ (raw)", - .cpu_type = CPU_POWER6, - }, - - { /* Power8E */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004b0000, - .cpu_name = "POWER8E (raw)", - .cpu_type = CPU_POWER8, - }, - - { /* Power8NVL */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004c0000, - .cpu_name = "POWER8NVL (raw)", - .cpu_type = CPU_POWER8, - }, - - { /* Power8 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004d0000, - .cpu_name = "POWER8 (raw)", - .cpu_type = CPU_POWER8, - }, - - { /* Power9 DD2.0 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0200, - .cpu_name = "POWER9 (raw)", - .cpu_type = CPU_POWER9, - }, - - { /* Power9 DD 2.1 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0201, - .cpu_name = "POWER9 (raw)", - .cpu_type = CPU_POWER9, - }, - - { /* Power9 DD2.2 or later */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004e0000, - .cpu_name = "POWER9 (raw)", - .cpu_type = CPU_POWER9, - }, - - { /* Power10 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00800000, - .cpu_name = "POWER10 (raw)", - .cpu_type = CPU_POWER10, - }, - - { /* End of table, pvr_mask and pvr_value must be zero */ - .pvr_mask = 0x0, - .pvr_value = 0x0, - .cpu_name = "Unknown", - .cpu_type = CPU_UNKNOWN, - }, -}; - -static int __builtin_cpu_is(const char *cpu) { - int i; - uint32_t pvr; - uint32_t cpu_type; - - asm("mfpvr %0" : "=r"(pvr)); - - for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { - if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { - break; - } - } - -#if defined(DEBUG) - printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, - pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); +int cpuid() +{ + int arch = _system_configuration.implementation; +#ifdef POWER_6 + if (arch == POWER_6) return CPU_POWER6; #endif - cpu_type = pvrPOWER[i].cpu_type; - - if (!strcmp(cpu, "power8")) - return cpu_type == CPU_POWER8; - if (!strcmp(cpu, "power9")) - return cpu_type == CPU_POWER9; - return 0; +#ifdef POWER_7 + else if (arch == POWER_7) return CPU_POWER7; +#endif +#ifdef POWER_8 + else if (arch == POWER_8) return CPU_POWER8; +#endif +#ifdef POWER_9 + else if (arch == POWER_9) return CPU_POWER9; +#endif +#ifdef POWER_10 + else if (arch == POWER_10) return CPU_POWER10; +#endif + return CPU_UNKNOWN; } -#endif /* C_PGI */ +#ifndef __BUILTIN_CPU_SUPPORTS__ +static int __builtin_cpu_supports(char* arg) +{ + static int ipinfo = -1; + if (ipinfo < 0) { + ipinfo = cpuid(); + } + if (ipinfo >= CPU_POWER10) { + if (!strcmp(arg, "power10")) return 1; + } + if (ipinfo >= CPU_POWER9) { + if (!strcmp(arg, "power9")) return 1; + } + if (ipinfo >= CPU_POWER8) { + if (!strcmp(arg, "power8")) return 1; + } + if (ipinfo >= CPU_POWER6) { + if (!strcmp(arg, "power6")) return 1; + } + return 0; +} +#endif static gotoblas_t *get_coretype(void) { -#ifndef C_PGI - if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x")) + if (__builtin_cpu_supports("power6")) return &gotoblas_POWER6; -#endif - if (__builtin_cpu_is("power8")) + if (__builtin_cpu_supports("power8")) return &gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (__builtin_cpu_is("power9")) + if (__builtin_cpu_supports("power9")) return &gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT - if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) +#ifdef _AIX + if (__builtin_cpu_supports("power10")) +#else + if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) +#endif return &gotoblas_POWER10; #endif /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ -#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) - if (__builtin_cpu_is("power10")) +#if (!defined __GNUC__) || ( __GNUC__ < 11) || (__GNUC__ == 10 && __GNUC_MINOR__ < 2) + if (__builtin_cpu_supports("power10")) return &gotoblas_POWER9; #endif return NULL; @@ -229,9 +131,7 @@ static gotoblas_t *force_coretype(char * coretype) { switch (found) { -#ifndef C_PGI case 1: return (&gotoblas_POWER6); -#endif case 2: return (&gotoblas_POWER8); #if (!defined __GNUC__) || ( __GNUC__ >= 6) case 3: return (&gotoblas_POWER9); From eb738d99293dc658bd6941cc6c2b76cd6ece0c11 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:14:46 -0500 Subject: [PATCH 04/21] Minor changes. --- Makefile.system | 2 -- driver/others/dynamic_power.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile.system b/Makefile.system index 1fd47e68ea..b1a357fdf2 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1170,8 +1170,6 @@ CCOMMON_OPT += -DF_INTERFACE_IBM FEXTRALIB += -lxlf90 ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG)) FCOMMON_OPT += -qextname -else ifeq ($(C_COMPILER), CLANG) -FCOMMON_OPT += -qextname endif # FCOMMON_OPT += -qarch=440 ifdef BINARY64 diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 7f8bfd5b98..1d3f368757 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -44,7 +44,7 @@ char *gotoblas_corename(void) { #define CPU_POWER9 9 #define CPU_POWER10 10 -int cpuid() +static int cpuid(void) { int arch = _system_configuration.implementation; #ifdef POWER_6 From 12130ee9613936f2fa49fd58a7f6bf8210a65552 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:19:22 -0500 Subject: [PATCH 05/21] Remove tab. --- driver/others/dynamic_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 1d3f368757..3c5f1f3c10 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -102,7 +102,7 @@ static gotoblas_t *get_coretype(void) { #ifdef _AIX if (__builtin_cpu_supports("power10")) #else - if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) + if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) #endif return &gotoblas_POWER10; #endif From a922a07e610e0508e2f2f84ae158c46e2e3d7a0e Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:24:30 -0500 Subject: [PATCH 06/21] Cleanup white spaces. --- driver/others/dynamic_power.c | 158 +++++++++++++++++----------------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 3c5f1f3c10..40f00a634f 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -13,25 +13,25 @@ extern gotoblas_t gotoblas_POWER10; extern void openblas_warning(int verbose, const char *msg); static char *corename[] = { - "unknown", - "POWER6", - "POWER8", - "POWER9", - "POWER10" + "unknown", + "POWER6", + "POWER8", + "POWER9", + "POWER10" }; #define NUM_CORETYPES 5 char *gotoblas_corename(void) { - if (gotoblas == &gotoblas_POWER6) return corename[1]; - if (gotoblas == &gotoblas_POWER8) return corename[2]; + if (gotoblas == &gotoblas_POWER6) return corename[1]; + if (gotoblas == &gotoblas_POWER8) return corename[2]; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (gotoblas == &gotoblas_POWER9) return corename[3]; + if (gotoblas == &gotoblas_POWER9) return corename[3]; #endif #ifdef HAVE_P10_SUPPORT - if (gotoblas == &gotoblas_POWER10) return corename[4]; + if (gotoblas == &gotoblas_POWER10) return corename[4]; #endif - return corename[0]; + return corename[0]; } #ifdef _AIX @@ -90,13 +90,13 @@ static int __builtin_cpu_supports(char* arg) static gotoblas_t *get_coretype(void) { - if (__builtin_cpu_supports("power6")) - return &gotoblas_POWER6; - if (__builtin_cpu_supports("power8")) - return &gotoblas_POWER8; + if (__builtin_cpu_supports("power6")) + return &gotoblas_POWER6; + if (__builtin_cpu_supports("power8")) + return &gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (__builtin_cpu_supports("power9")) - return &gotoblas_POWER9; + if (__builtin_cpu_supports("power9")) + return &gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT #ifdef _AIX @@ -104,84 +104,84 @@ static gotoblas_t *get_coretype(void) { #else if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) #endif - return &gotoblas_POWER10; + return &gotoblas_POWER10; #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ + /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ #if (!defined __GNUC__) || ( __GNUC__ < 11) || (__GNUC__ == 10 && __GNUC_MINOR__ < 2) - if (__builtin_cpu_supports("power10")) - return &gotoblas_POWER9; -#endif - return NULL; + if (__builtin_cpu_supports("power10")) + return &gotoblas_POWER9; +#endif + return NULL; } static gotoblas_t *force_coretype(char * coretype) { - int i ; - int found = -1; - char message[128]; - - for ( i = 0 ; i < NUM_CORETYPES; i++) - { - if (!strncasecmp(coretype, corename[i], 20)) - { - found = i; - break; - } - } - - switch (found) - { - case 1: return (&gotoblas_POWER6); - case 2: return (&gotoblas_POWER8); + int i ; + int found = -1; + char message[128]; + + for ( i = 0 ; i < NUM_CORETYPES; i++) + { + if (!strncasecmp(coretype, corename[i], 20)) + { + found = i; + break; + } + } + + switch (found) + { + case 1: return (&gotoblas_POWER6); + case 2: return (&gotoblas_POWER8); #if (!defined __GNUC__) || ( __GNUC__ >= 6) - case 3: return (&gotoblas_POWER9); + case 3: return (&gotoblas_POWER9); #endif #ifdef HAVE_P10_SUPPORT - case 4: return (&gotoblas_POWER10); + case 4: return (&gotoblas_POWER10); #endif - default: return NULL; - } - snprintf(message, 128, "Core not found: %s\n", coretype); - openblas_warning(1, message); + default: return NULL; + } + snprintf(message, 128, "Core not found: %s\n", coretype); + openblas_warning(1, message); } void gotoblas_dynamic_init(void) { - char coremsg[128]; - char coren[22]; - char *p; - - - if (gotoblas) return; - - p = getenv("OPENBLAS_CORETYPE"); - if ( p ) - { - gotoblas = force_coretype(p); - } - else - { - gotoblas = get_coretype(); - } - - if (gotoblas == NULL) - { - snprintf(coremsg, 128, "Falling back to POWER8 core\n"); - openblas_warning(1, coremsg); - gotoblas = &gotoblas_POWER8; - } - - if (gotoblas && gotoblas -> init) { - strncpy(coren,gotoblas_corename(),20); - sprintf(coremsg, "Core: %s\n",coren); - openblas_warning(2, coremsg); - gotoblas -> init(); - } else { - openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); - exit(1); - } + char coremsg[128]; + char coren[22]; + char *p; + + + if (gotoblas) return; + + p = getenv("OPENBLAS_CORETYPE"); + if ( p ) + { + gotoblas = force_coretype(p); + } + else + { + gotoblas = get_coretype(); + } + + if (gotoblas == NULL) + { + snprintf(coremsg, 128, "Falling back to POWER8 core\n"); + openblas_warning(1, coremsg); + gotoblas = &gotoblas_POWER8; + } + + if (gotoblas && gotoblas -> init) { + strncpy(coren,gotoblas_corename(),20); + sprintf(coremsg, "Core: %s\n",coren); + openblas_warning(2, coremsg); + gotoblas -> init(); + } else { + openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); + exit(1); + } } void gotoblas_dynamic_quit(void) { - gotoblas = NULL; + gotoblas = NULL; } From 10210748de17a217fd67f6cb8501272b8bfa88c2 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:44:07 -0500 Subject: [PATCH 07/21] Revert PGI changes. --- driver/others/dynamic_power.c | 310 +++++++++++++++++++++++++--------- 1 file changed, 234 insertions(+), 76 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 40f00a634f..0f5b06be55 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -13,27 +13,181 @@ extern gotoblas_t gotoblas_POWER10; extern void openblas_warning(int verbose, const char *msg); static char *corename[] = { - "unknown", - "POWER6", - "POWER8", - "POWER9", - "POWER10" + "unknown", + "POWER6", + "POWER8", + "POWER9", + "POWER10" }; #define NUM_CORETYPES 5 char *gotoblas_corename(void) { - if (gotoblas == &gotoblas_POWER6) return corename[1]; - if (gotoblas == &gotoblas_POWER8) return corename[2]; +#ifndef C_PGI + if (gotoblas == &gotoblas_POWER6) return corename[1]; +#endif + if (gotoblas == &gotoblas_POWER8) return corename[2]; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (gotoblas == &gotoblas_POWER9) return corename[3]; + if (gotoblas == &gotoblas_POWER9) return corename[3]; #endif #ifdef HAVE_P10_SUPPORT - if (gotoblas == &gotoblas_POWER10) return corename[4]; + if (gotoblas == &gotoblas_POWER10) return corename[4]; +#endif + return corename[0]; +} + +#if defined(__clang__) +static int __builtin_cpu_supports(char* arg) +{ + return 0; +} +#endif + +#if defined(C_PGI) || defined(__clang__) +/* + * NV HPC compilers do not yet implement __builtin_cpu_is(). + * Fake a version here for use in the CPU detection code below. + * + * Strategy here is to first check the CPU to see what it actually is, + * and then test the input to see if what the CPU actually is matches + * what was requested. + */ + +#include + +/* + * Define POWER processor version table. + * + * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time + */ + +#define CPU_UNKNOWN 0 +#define CPU_POWER5 5 +#define CPU_POWER6 6 +#define CPU_POWER8 8 +#define CPU_POWER9 9 +#define CPU_POWER10 10 + +static struct { + uint32_t pvr_mask; + uint32_t pvr_value; + const char* cpu_name; + uint32_t cpu_type; +} pvrPOWER [] = { + + { /* POWER6 in P5+ mode; 2.04-compliant processor */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000001, + .cpu_name = "POWER5+", + .cpu_type = CPU_POWER5, + }, + + { /* Power6 aka POWER6X*/ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003e0000, + .cpu_name = "POWER6 (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power7 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003f0000, + .cpu_name = "POWER7 (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power7+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004A0000, + .cpu_name = "POWER7+ (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power8E */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004b0000, + .cpu_name = "POWER8E (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power8NVL */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004c0000, + .cpu_name = "POWER8NVL (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power8 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004d0000, + .cpu_name = "POWER8 (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power9 DD2.0 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0200, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power9 DD 2.1 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0201, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power9 DD2.2 or later */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004e0000, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_type = CPU_POWER10, + }, + + { /* End of table, pvr_mask and pvr_value must be zero */ + .pvr_mask = 0x0, + .pvr_value = 0x0, + .cpu_name = "Unknown", + .cpu_type = CPU_UNKNOWN, + }, +}; + +static int __builtin_cpu_is(const char *cpu) { + int i; + uint32_t pvr; + uint32_t cpu_type; + + asm("mfpvr %0" : "=r"(pvr)); + + for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { + if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { + break; + } + } + +#if defined(DEBUG) + printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, + pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); #endif - return corename[0]; + cpu_type = pvrPOWER[i].cpu_type; + + if (!strcmp(cpu, "power8")) + return cpu_type == CPU_POWER8; + if (!strcmp(cpu, "power9")) + return cpu_type == CPU_POWER9; + return 0; } +#endif /* C_PGI */ + #ifdef _AIX #include @@ -90,98 +244,102 @@ static int __builtin_cpu_supports(char* arg) static gotoblas_t *get_coretype(void) { - if (__builtin_cpu_supports("power6")) - return &gotoblas_POWER6; - if (__builtin_cpu_supports("power8")) - return &gotoblas_POWER8; +#ifndef C_PGI + if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x")) + return &gotoblas_POWER6; +#endif + if (__builtin_cpu_is("power8")) + return &gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (__builtin_cpu_supports("power9")) - return &gotoblas_POWER9; + if (__builtin_cpu_is("power9")) + return &gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT #ifdef _AIX - if (__builtin_cpu_supports("power10")) + if (__builtin_cpu_supports("power10")) #else - if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) -#endif - return &gotoblas_POWER10; + if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ -#if (!defined __GNUC__) || ( __GNUC__ < 11) || (__GNUC__ == 10 && __GNUC_MINOR__ < 2) - if (__builtin_cpu_supports("power10")) - return &gotoblas_POWER9; + return &gotoblas_POWER10; #endif - return NULL; + /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ +#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) + if (__builtin_cpu_is("power10")) + return &gotoblas_POWER9; +#endif + return NULL; } static gotoblas_t *force_coretype(char * coretype) { - int i ; - int found = -1; - char message[128]; - - for ( i = 0 ; i < NUM_CORETYPES; i++) - { - if (!strncasecmp(coretype, corename[i], 20)) - { - found = i; - break; - } - } + int i ; + int found = -1; + char message[128]; - switch (found) - { - case 1: return (&gotoblas_POWER6); - case 2: return (&gotoblas_POWER8); + for ( i = 0 ; i < NUM_CORETYPES; i++) + { + if (!strncasecmp(coretype, corename[i], 20)) + { + found = i; + break; + } + } + + switch (found) + { +#ifndef C_PGI + case 1: return (&gotoblas_POWER6); +#endif + case 2: return (&gotoblas_POWER8); #if (!defined __GNUC__) || ( __GNUC__ >= 6) - case 3: return (&gotoblas_POWER9); + case 3: return (&gotoblas_POWER9); #endif #ifdef HAVE_P10_SUPPORT - case 4: return (&gotoblas_POWER10); + case 4: return (&gotoblas_POWER10); #endif - default: return NULL; - } - snprintf(message, 128, "Core not found: %s\n", coretype); - openblas_warning(1, message); + default: return NULL; + } + snprintf(message, 128, "Core not found: %s\n", coretype); + openblas_warning(1, message); } void gotoblas_dynamic_init(void) { - char coremsg[128]; - char coren[22]; - char *p; + char coremsg[128]; + char coren[22]; + char *p; - if (gotoblas) return; + if (gotoblas) return; - p = getenv("OPENBLAS_CORETYPE"); - if ( p ) - { - gotoblas = force_coretype(p); - } - else - { - gotoblas = get_coretype(); - } + p = getenv("OPENBLAS_CORETYPE"); + if ( p ) + { + gotoblas = force_coretype(p); + } + else + { + gotoblas = get_coretype(); + } - if (gotoblas == NULL) - { - snprintf(coremsg, 128, "Falling back to POWER8 core\n"); - openblas_warning(1, coremsg); - gotoblas = &gotoblas_POWER8; - } + if (gotoblas == NULL) + { + snprintf(coremsg, 128, "Falling back to POWER8 core\n"); + openblas_warning(1, coremsg); + gotoblas = &gotoblas_POWER8; + } - if (gotoblas && gotoblas -> init) { - strncpy(coren,gotoblas_corename(),20); - sprintf(coremsg, "Core: %s\n",coren); - openblas_warning(2, coremsg); - gotoblas -> init(); - } else { - openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); - exit(1); - } + if (gotoblas && gotoblas -> init) { + strncpy(coren,gotoblas_corename(),20); + sprintf(coremsg, "Core: %s\n",coren); + openblas_warning(2, coremsg); + gotoblas -> init(); + } else { + openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); + exit(1); + } } void gotoblas_dynamic_quit(void) { - gotoblas = NULL; + gotoblas = NULL; } From e5dc376912dab278afdf677cb112008d36ead0fe Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:48:47 -0500 Subject: [PATCH 08/21] Remove duplicate defines. --- driver/others/dynamic_power.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 0f5b06be55..7b0b4ea01e 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -43,6 +43,13 @@ static int __builtin_cpu_supports(char* arg) } #endif +#define CPU_UNKNOWN 0 +#define CPU_POWER6 6 +#define CPU_POWER7 7 +#define CPU_POWER8 8 +#define CPU_POWER9 9 +#define CPU_POWER10 10 + #if defined(C_PGI) || defined(__clang__) /* * NV HPC compilers do not yet implement __builtin_cpu_is(). @@ -61,13 +68,6 @@ static int __builtin_cpu_supports(char* arg) * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time */ -#define CPU_UNKNOWN 0 -#define CPU_POWER5 5 -#define CPU_POWER6 6 -#define CPU_POWER8 8 -#define CPU_POWER9 9 -#define CPU_POWER10 10 - static struct { uint32_t pvr_mask; uint32_t pvr_value; @@ -191,13 +191,6 @@ static int __builtin_cpu_is(const char *cpu) { #ifdef _AIX #include -#define CPU_UNKNOWN 0 -#define CPU_POWER6 6 -#define CPU_POWER7 7 -#define CPU_POWER8 8 -#define CPU_POWER9 9 -#define CPU_POWER10 10 - static int cpuid(void) { int arch = _system_configuration.implementation; From b677d0d5fd175768e63d02253b12d1b0ccb2d242 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 13:09:12 -0500 Subject: [PATCH 09/21] Adding missing endif --- driver/others/dynamic_power.c | 1 + 1 file changed, 1 insertion(+) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 7b0b4ea01e..6ed26ad1ef 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -234,6 +234,7 @@ static int __builtin_cpu_supports(char* arg) return 0; } #endif +#endif static gotoblas_t *get_coretype(void) { From a8c90eb3ed5cae583bdc289846fe7d37fdc42d28 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Tue, 3 Oct 2023 10:24:04 -0500 Subject: [PATCH 10/21] Added cpu_is --- driver/others/dynamic_power.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 6ed26ad1ef..252e409b37 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -233,8 +233,27 @@ static int __builtin_cpu_supports(char* arg) } return 0; } + +static int __builtin_cpu_is(char *arg) +{ + static int ipinfo = -1; + if (ipinfo < 0) { + ipinfo = cpuid(); + } + if (ipinfo == CPU_POWER10) { + if (!strcmp(arg, "power10") return 1; + } else if (ipinfo == CPU_POWER9) { + if (!strcmp(arg, "power9") return 1; + } else if (ipinfo == CPU_POWER8) { + if (!strcmp(arg, "power8") return 1; + } else if (ipinfo == CPU_POWER6) { + if (!strcmp(arg, "power6") return 1; + } else { + return 0; + } +} #endif -#endif +#endif /* _AIX */ static gotoblas_t *get_coretype(void) { From 2d0b2334259d41c2003b51a07580dbd25cfe267c Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Tue, 3 Oct 2023 10:26:14 -0500 Subject: [PATCH 11/21] Fix missing parens. --- driver/others/dynamic_power.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 252e409b37..7341221780 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -241,13 +241,13 @@ static int __builtin_cpu_is(char *arg) ipinfo = cpuid(); } if (ipinfo == CPU_POWER10) { - if (!strcmp(arg, "power10") return 1; + if (!strcmp(arg, "power10")) return 1; } else if (ipinfo == CPU_POWER9) { - if (!strcmp(arg, "power9") return 1; + if (!strcmp(arg, "power9")) return 1; } else if (ipinfo == CPU_POWER8) { - if (!strcmp(arg, "power8") return 1; + if (!strcmp(arg, "power8")) return 1; } else if (ipinfo == CPU_POWER6) { - if (!strcmp(arg, "power6") return 1; + if (!strcmp(arg, "power6")) return 1; } else { return 0; } From 09212f84bff0ca8173f928c59ec81da3ab00933b Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Tue, 3 Oct 2023 12:23:21 -0500 Subject: [PATCH 12/21] Fix default case for cpu_is. --- driver/others/dynamic_power.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 7341221780..252baaeeba 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -248,9 +248,8 @@ static int __builtin_cpu_is(char *arg) if (!strcmp(arg, "power8")) return 1; } else if (ipinfo == CPU_POWER6) { if (!strcmp(arg, "power6")) return 1; - } else { - return 0; } + return 0; } #endif #endif /* _AIX */ From 3cc72a3797ac050841975ff38d317f34ecfeb503 Mon Sep 17 00:00:00 2001 From: Chip Kerchner Date: Wed, 4 Oct 2023 09:54:37 -0500 Subject: [PATCH 13/21] Only include cpu_id and cpu_supports in AIX and fix parameter types. --- driver/others/dynamic_power.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 252baaeeba..c01d112bc1 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -36,7 +36,7 @@ char *gotoblas_corename(void) { return corename[0]; } -#if defined(__clang__) +#if defined(__clang__) && !defined(_AIX) static int __builtin_cpu_supports(char* arg) { return 0; @@ -50,7 +50,7 @@ static int __builtin_cpu_supports(char* arg) #define CPU_POWER9 9 #define CPU_POWER10 10 -#if defined(C_PGI) || defined(__clang__) +#if defined(C_PGI) || (defined(__clang__) && !defined(_AIX)) /* * NV HPC compilers do not yet implement __builtin_cpu_is(). * Fake a version here for use in the CPU detection code below. @@ -213,7 +213,7 @@ static int cpuid(void) } #ifndef __BUILTIN_CPU_SUPPORTS__ -static int __builtin_cpu_supports(char* arg) +static int __builtin_cpu_supports(const char* arg) { static int ipinfo = -1; if (ipinfo < 0) { @@ -234,7 +234,7 @@ static int __builtin_cpu_supports(char* arg) return 0; } -static int __builtin_cpu_is(char *arg) +static int __builtin_cpu_is(const char *arg) { static int ipinfo = -1; if (ipinfo < 0) { From c60f9d9c084a97d1c416d63d921a8fcb30b090ac Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 09:49:17 -0500 Subject: [PATCH 14/21] Add missing CPU_POWER5. --- driver/others/dynamic_power.c | 1 + 1 file changed, 1 insertion(+) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index c01d112bc1..8c5caada08 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -44,6 +44,7 @@ static int __builtin_cpu_supports(char* arg) #endif #define CPU_UNKNOWN 0 +#define CPU_POWER5 5 #define CPU_POWER6 6 #define CPU_POWER7 7 #define CPU_POWER8 8 From 71c6689af4e61cc4891eba3d996fb39920798e37 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 12:20:40 -0500 Subject: [PATCH 15/21] Fix dynamic dispatch to work for clang. --- driver/others/dynamic_power.c | 141 ++++++++++++++-------------------- 1 file changed, 59 insertions(+), 82 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 8c5caada08..c43738ef45 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -36,13 +36,6 @@ char *gotoblas_corename(void) { return corename[0]; } -#if defined(__clang__) && !defined(_AIX) -static int __builtin_cpu_supports(char* arg) -{ - return 0; -} -#endif - #define CPU_UNKNOWN 0 #define CPU_POWER5 5 #define CPU_POWER6 6 @@ -51,7 +44,31 @@ static int __builtin_cpu_supports(char* arg) #define CPU_POWER9 9 #define CPU_POWER10 10 -#if defined(C_PGI) || (defined(__clang__) && !defined(_AIX)) +#ifdef _AIX +#include + +static int cpuid(void) +{ + int arch = _system_configuration.implementation; +#ifdef POWER_6 + if (arch == POWER_6) return CPU_POWER6; +#endif +#ifdef POWER_7 + else if (arch == POWER_7) return CPU_POWER7; +#endif +#ifdef POWER_8 + else if (arch == POWER_8) return CPU_POWER8; +#endif +#ifdef POWER_9 + else if (arch == POWER_9) return CPU_POWER9; +#endif +#ifdef POWER_10 + else if (arch == POWER_10) return CPU_POWER10; +#endif + return CPU_UNKNOWN; +} +#else +#if defined(C_PGI) || defined(__clang__) /* * NV HPC compilers do not yet implement __builtin_cpu_is(). * Fake a version here for use in the CPU detection code below. @@ -61,8 +78,6 @@ static int __builtin_cpu_supports(char* arg) * what was requested. */ -#include - /* * Define POWER processor version table. * @@ -161,79 +176,32 @@ static struct { }, }; -static int __builtin_cpu_is(const char *cpu) { - int i; - uint32_t pvr; - uint32_t cpu_type; +static int cpuid(void) +{ + int i; + uint32_t pvr; + uint32_t cpu_type; - asm("mfpvr %0" : "=r"(pvr)); + asm("mfpvr %0" : "=r"(pvr)); - for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { - if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { - break; - } - } + for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { + if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { + break; + } + } #if defined(DEBUG) - printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, - pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); + printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, + pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); #endif - cpu_type = pvrPOWER[i].cpu_type; - - if (!strcmp(cpu, "power8")) - return cpu_type == CPU_POWER8; - if (!strcmp(cpu, "power9")) - return cpu_type == CPU_POWER9; - return 0; + cpu_type = pvrPOWER[i].cpu_type; + return (int)(cpu_type); } - #endif /* C_PGI */ - -#ifdef _AIX -#include - -static int cpuid(void) -{ - int arch = _system_configuration.implementation; -#ifdef POWER_6 - if (arch == POWER_6) return CPU_POWER6; -#endif -#ifdef POWER_7 - else if (arch == POWER_7) return CPU_POWER7; -#endif -#ifdef POWER_8 - else if (arch == POWER_8) return CPU_POWER8; -#endif -#ifdef POWER_9 - else if (arch == POWER_9) return CPU_POWER9; -#endif -#ifdef POWER_10 - else if (arch == POWER_10) return CPU_POWER10; -#endif - return CPU_UNKNOWN; -} +#endif /* _AIX */ #ifndef __BUILTIN_CPU_SUPPORTS__ -static int __builtin_cpu_supports(const char* arg) -{ - static int ipinfo = -1; - if (ipinfo < 0) { - ipinfo = cpuid(); - } - if (ipinfo >= CPU_POWER10) { - if (!strcmp(arg, "power10")) return 1; - } - if (ipinfo >= CPU_POWER9) { - if (!strcmp(arg, "power9")) return 1; - } - if (ipinfo >= CPU_POWER8) { - if (!strcmp(arg, "power8")) return 1; - } - if (ipinfo >= CPU_POWER6) { - if (!strcmp(arg, "power6")) return 1; - } - return 0; -} +#include static int __builtin_cpu_is(const char *arg) { @@ -241,19 +209,28 @@ static int __builtin_cpu_is(const char *arg) if (ipinfo < 0) { ipinfo = cpuid(); } +#ifdef HAVE_P10_SUPPORT if (ipinfo == CPU_POWER10) { if (!strcmp(arg, "power10")) return 1; - } else if (ipinfo == CPU_POWER9) { + } +#endif + if (ipinfo == CPU_POWER9) { if (!strcmp(arg, "power9")) return 1; } else if (ipinfo == CPU_POWER8) { if (!strcmp(arg, "power8")) return 1; +#ifndef C_PGI } else if (ipinfo == CPU_POWER6) { if (!strcmp(arg, "power6")) return 1; +#endif } return 0; } + +static int __builtin_cpu_supports(const char *arg) +{ + return 0; +} #endif -#endif /* _AIX */ static gotoblas_t *get_coretype(void) { @@ -268,18 +245,18 @@ static gotoblas_t *get_coretype(void) { return &gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT -#ifdef _AIX - if (__builtin_cpu_supports("power10")) +#if defined(_AIX) || defined(__clang__) + if (__builtin_cpu_is("power10")) #else if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) #endif return &gotoblas_POWER10; #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ + /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ #if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) - if (__builtin_cpu_is("power10")) - return &gotoblas_POWER9; -#endif + if (__builtin_cpu_is("power10")) + return &gotoblas_POWER9; +#endif return NULL; } From 298bf1f240afcac73d306f4c2da35b314c39dba6 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 12:50:28 -0500 Subject: [PATCH 16/21] Reduce differences. --- driver/others/dynamic_power.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index c43738ef45..4e8710bc71 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -178,23 +178,23 @@ static struct { static int cpuid(void) { - int i; - uint32_t pvr; - uint32_t cpu_type; + int i; + uint32_t pvr; + uint32_t cpu_type; - asm("mfpvr %0" : "=r"(pvr)); + asm("mfpvr %0" : "=r"(pvr)); - for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { - if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { - break; - } - } + for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { + if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { + break; + } + } #if defined(DEBUG) - printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, - pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); + printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, + pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); #endif - cpu_type = pvrPOWER[i].cpu_type; + cpu_type = pvrPOWER[i].cpu_type; return (int)(cpu_type); } #endif /* C_PGI */ @@ -252,10 +252,10 @@ static gotoblas_t *get_coretype(void) { #endif return &gotoblas_POWER10; #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ + /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ #if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) - if (__builtin_cpu_is("power10")) - return &gotoblas_POWER9; + if (__builtin_cpu_is("power10")) + return &gotoblas_POWER9; #endif return NULL; } From 36e08f69946321a7ca3f9ef495d198802e1b5b17 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 13:08:41 -0500 Subject: [PATCH 17/21] One more small change. --- driver/others/dynamic_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 4e8710bc71..311987d31a 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -195,7 +195,7 @@ static int cpuid(void) pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); #endif cpu_type = pvrPOWER[i].cpu_type; - return (int)(cpu_type); + return (int)(cpu_type); } #endif /* C_PGI */ #endif /* _AIX */ From 3655632611173f191c22a36d7c9e0950cdcc202e Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 13:11:40 -0500 Subject: [PATCH 18/21] Another small change. --- driver/others/dynamic_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 311987d31a..f98fedd457 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -246,7 +246,7 @@ static gotoblas_t *get_coretype(void) { #endif #ifdef HAVE_P10_SUPPORT #if defined(_AIX) || defined(__clang__) - if (__builtin_cpu_is("power10")) + if (__builtin_cpu_is("power10")) #else if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) #endif From 880af052dde230595328d8a19d10e42f39369a43 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 13:41:49 -0500 Subject: [PATCH 19/21] Fix dynamic dispatch P9 for clang. --- driver/others/dynamic_power.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index f98fedd457..db04e635fa 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -3,7 +3,7 @@ extern gotoblas_t gotoblas_POWER6; extern gotoblas_t gotoblas_POWER8; -#if (!defined __GNUC__) || ( __GNUC__ >= 6) +#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) extern gotoblas_t gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT @@ -27,7 +27,7 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_POWER6) return corename[1]; #endif if (gotoblas == &gotoblas_POWER8) return corename[2]; -#if (!defined __GNUC__) || ( __GNUC__ >= 6) +#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) if (gotoblas == &gotoblas_POWER9) return corename[3]; #endif #ifdef HAVE_P10_SUPPORT @@ -240,7 +240,7 @@ static gotoblas_t *get_coretype(void) { #endif if (__builtin_cpu_is("power8")) return &gotoblas_POWER8; -#if (!defined __GNUC__) || ( __GNUC__ >= 6) +#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) if (__builtin_cpu_is("power9")) return &gotoblas_POWER9; #endif @@ -281,7 +281,7 @@ static gotoblas_t *force_coretype(char * coretype) { case 1: return (&gotoblas_POWER6); #endif case 2: return (&gotoblas_POWER8); -#if (!defined __GNUC__) || ( __GNUC__ >= 6) +#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) case 3: return (&gotoblas_POWER9); #endif #ifdef HAVE_P10_SUPPORT From c8882bd9d890c332adaf992a0b9da6be8384bb97 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Wed, 1 Nov 2023 14:53:55 -0500 Subject: [PATCH 20/21] Remove POWER7 from cpu list. --- driver/others/dynamic_power.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index db04e635fa..b4a1cc6bed 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -39,7 +39,6 @@ char *gotoblas_corename(void) { #define CPU_UNKNOWN 0 #define CPU_POWER5 5 #define CPU_POWER6 6 -#define CPU_POWER7 7 #define CPU_POWER8 8 #define CPU_POWER9 9 #define CPU_POWER10 10 @@ -53,9 +52,6 @@ static int cpuid(void) #ifdef POWER_6 if (arch == POWER_6) return CPU_POWER6; #endif -#ifdef POWER_7 - else if (arch == POWER_7) return CPU_POWER7; -#endif #ifdef POWER_8 else if (arch == POWER_8) return CPU_POWER8; #endif From 7dcb2d67f23caa8b70df4ea37c05a12ff8c15898 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Wed, 1 Nov 2023 15:23:28 -0500 Subject: [PATCH 21/21] Have POWER7 return arch=POWER6. --- driver/others/dynamic_power.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index b4a1cc6bed..10a5d64b38 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -52,6 +52,9 @@ static int cpuid(void) #ifdef POWER_6 if (arch == POWER_6) return CPU_POWER6; #endif +#ifdef POWER_7 + else if (arch == POWER_7) return CPU_POWER6; +#endif #ifdef POWER_8 else if (arch == POWER_8) return CPU_POWER8; #endif