diff --git a/CMSIS/Core/Include/a-profile/cmsis_gcc_a.h b/CMSIS/Core/Include/a-profile/cmsis_gcc_a.h index e43606c99..d25bdedca 100644 --- a/CMSIS/Core/Include/a-profile/cmsis_gcc_a.h +++ b/CMSIS/Core/Include/a-profile/cmsis_gcc_a.h @@ -622,227 +622,125 @@ __STATIC_FORCEINLINE void __set_FPSCR(uint32_t fpscr) /* ################### Compiler specific Intrinsics ########################### */ -/** \defgroup CMSIS_SIMD_intrinsics CMSIS SIMD Intrinsics - Access to dedicated SIMD instructions - @{ -*/ - -__STATIC_FORCEINLINE uint32_t __QADD8(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("qadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - - -__STATIC_FORCEINLINE uint32_t __QSUB8(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("qsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - - -__STATIC_FORCEINLINE uint32_t __QADD16(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("qadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SHADD16(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("shadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __QSUB16(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("qsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SHSUB16(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("shsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __QASX(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("qasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SHASX(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("shasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __QSAX(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("qsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SHSAX(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("shsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SXTB16(uint32_t op1) -{ - uint32_t result; - - __ASM ("sxtb16 %0, %1" : "=r" (result) : "r" (op1)); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SMUADX (uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("smuadx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SMLADX (uint32_t op1, uint32_t op2, uint32_t op3) -{ - uint32_t result; - - __ASM volatile ("smladx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); - return(result); -} -__STATIC_FORCEINLINE uint64_t __SMLALD (uint32_t op1, uint32_t op2, uint64_t acc) -{ - union llreg_u{ - uint32_t w32[2]; - uint64_t w64; - } llr; - llr.w64 = acc; - -#ifndef __ARMEB__ /* Little endian */ - __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); -#else /* Big endian */ - __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); -#endif - - return(llr.w64); -} - -__STATIC_FORCEINLINE uint64_t __SMLALDX (uint32_t op1, uint32_t op2, uint64_t acc) -{ - union llreg_u{ - uint32_t w32[2]; - uint64_t w64; - } llr; - llr.w64 = acc; - -#ifndef __ARMEB__ /* Little endian */ - __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); -#else /* Big endian */ - __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); -#endif - - return(llr.w64); -} - -__STATIC_FORCEINLINE uint32_t __SMUSD (uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("smusd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SMUSDX (uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("smusdx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SMLSDX (uint32_t op1, uint32_t op2, uint32_t op3) -{ - uint32_t result; - - __ASM volatile ("smlsdx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); - return(result); -} - -__STATIC_FORCEINLINE int32_t __QADD( int32_t op1, int32_t op2) -{ - int32_t result; +#if (__ARM_FEATURE_DSP == 1) +#define __SADD8 __builtin_arm_sadd8 +#define __QADD8 __builtin_arm_qadd8 +#define __SHADD8 __builtin_arm_shadd8 +#define __UADD8 __builtin_arm_uadd8 +#define __UQADD8 __builtin_arm_uqadd8 +#define __UHADD8 __builtin_arm_uhadd8 +#define __SSUB8 __builtin_arm_ssub8 +#define __QSUB8 __builtin_arm_qsub8 +#define __SHSUB8 __builtin_arm_shsub8 +#define __USUB8 __builtin_arm_usub8 +#define __UQSUB8 __builtin_arm_uqsub8 +#define __UHSUB8 __builtin_arm_uhsub8 +#define __SADD16 __builtin_arm_sadd16 +#define __QADD16 __builtin_arm_qadd16 +#define __SHADD16 __builtin_arm_shadd16 +#define __UADD16 __builtin_arm_uadd16 +#define __UQADD16 __builtin_arm_uqadd16 +#define __UHADD16 __builtin_arm_uhadd16 +#define __SSUB16 __builtin_arm_ssub16 +#define __QSUB16 __builtin_arm_qsub16 +#define __SHSUB16 __builtin_arm_shsub16 +#define __USUB16 __builtin_arm_usub16 +#define __UQSUB16 __builtin_arm_uqsub16 +#define __UHSUB16 __builtin_arm_uhsub16 +#define __SASX __builtin_arm_sasx +#define __QASX __builtin_arm_qasx +#define __SHASX __builtin_arm_shasx +#define __UASX __builtin_arm_uasx +#define __UQASX __builtin_arm_uqasx +#define __UHASX __builtin_arm_uhasx +#define __SSAX __builtin_arm_ssax +#define __QSAX __builtin_arm_qsax +#define __SHSAX __builtin_arm_shsax +#define __USAX __builtin_arm_usax +#define __UQSAX __builtin_arm_uqsax +#define __UHSAX __builtin_arm_uhsax +#define __USAD8 __builtin_arm_usad8 +#define __USADA8 __builtin_arm_usada8 +#define __SSAT16 __builtin_arm_ssat16 +#define __USAT16 __builtin_arm_usat16 +#define __UXTB16 __builtin_arm_uxtb16 +#define __UXTAB16 __builtin_arm_uxtab16 +#define __SXTB16 __builtin_arm_sxtb16 +#define __SXTAB16 __builtin_arm_sxtab16 +#define __SMUAD __builtin_arm_smuad +#define __SMUADX __builtin_arm_smuadx +#define __SMLAD __builtin_arm_smlad +#define __SMLADX __builtin_arm_smladx +#define __SMLALD __builtin_arm_smlald +#define __SMLALDX __builtin_arm_smlaldx +#define __SMUSD __builtin_arm_smusd +#define __SMUSDX __builtin_arm_smusdx +#define __SMLSD __builtin_arm_smlsd +#define __SMLSDX __builtin_arm_smlsdx +#define __SMLSLD __builtin_arm_smlsld +#define __SMLSLDX __builtin_arm_smlsldx +#define __SEL __builtin_arm_sel +#define __QADD __builtin_arm_qadd +#define __QSUB __builtin_arm_qsub + +#define __PKHBT(ARG1,ARG2,ARG3) \ +__extension__ \ +({ \ + uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \ + __ASM ("pkhbt %0, %1, %2, lsl %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \ + __RES; \ + }) - __ASM volatile ("qadd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} +#define __PKHTB(ARG1,ARG2,ARG3) \ +__extension__ \ +({ \ + uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \ + if (ARG3 == 0) \ + __ASM ("pkhtb %0, %1, %2" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2) ); \ + else \ + __ASM ("pkhtb %0, %1, %2, asr %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \ + __RES; \ + }) -__STATIC_FORCEINLINE int32_t __QSUB( int32_t op1, int32_t op2) +__STATIC_FORCEINLINE uint32_t __SXTB16_RORn(uint32_t op1, uint32_t rotate) { - int32_t result; - - __ASM volatile ("qsub %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); + uint32_t result; + if (__builtin_constant_p(rotate) && ((rotate == 8U) || (rotate == 16U) || (rotate == 24U))) + { + __ASM volatile("sxtb16 %0, %1, ROR %2" : "=r"(result) : "r"(op1), "i"(rotate)); + } + else + { + result = __SXTB16(__ROR(op1, rotate)); + } + return result; } - -__STATIC_FORCEINLINE uint32_t __SMUAD (uint32_t op1, uint32_t op2) +__STATIC_FORCEINLINE uint32_t __SXTAB16_RORn(uint32_t op1, uint32_t op2, uint32_t rotate) { - uint32_t result; - - __ASM volatile ("smuad %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); + uint32_t result; + if (__builtin_constant_p(rotate) && ((rotate == 8U) || (rotate == 16U) || (rotate == 24U))) + { + __ASM volatile("sxtab16 %0, %1, %2, ROR %3" : "=r"(result) : "r"(op1), "r"(op2), "i"(rotate)); + } + else + { + result = __SXTAB16(op1, __ROR(op2, rotate)); + } + return result; } - - -#define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \ - ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) ) - -#define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \ - ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) ) - __STATIC_FORCEINLINE int32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3) { - int32_t result; - - __ASM ("smmla %0, %1, %2, %3" : "=r" (result): "r" (op1), "r" (op2), "r" (op3) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SMLAD (uint32_t op1, uint32_t op2, uint32_t op3) -{ - uint32_t result; + int32_t result; - __ASM volatile ("smlad %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); - return(result); + __ASM volatile ("smmla %0, %1, %2, %3" : "=r" (result): "r" (op1), "r" (op2), "r" (op3) ); + return (result); } -/*@} end of group CMSIS_SIMD_intrinsics */ - - +#endif /* (__ARM_FEATURE_DSP == 1) */ +/** @} end of group CMSIS_SIMD_intrinsics */ /** \defgroup CMSIS_Core_intrinsics CMSIS Core Intrinsics Access to dedicated SIMD instructions