Skip to content

Commit

Permalink
Core: GCC compiler add missing simd/neon instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
JonatanAntoni committed Oct 12, 2023
1 parent 30e56da commit c2dc2d0
Showing 1 changed file with 105 additions and 207 deletions.
312 changes: 105 additions & 207 deletions CMSIS/Core/Include/a-profile/cmsis_gcc_a.h
Original file line number Diff line number Diff line change
Expand Up @@ -622,227 +622,125 @@ __STATIC_FORCEINLINE void __set_FPSCR(uint32_t fpscr)


/* ################### Compiler specific Intrinsics ########################### */
/** \defgroup CMSIS_SIMD_intrinsics CMSIS SIMD Intrinsics
Access to dedicated SIMD instructions
@{
*/

__STATIC_FORCEINLINE uint32_t __QADD8(uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM ("qadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}


__STATIC_FORCEINLINE uint32_t __QSUB8(uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM ("qsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}


__STATIC_FORCEINLINE uint32_t __QADD16(uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM ("qadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __SHADD16(uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM ("shadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __QSUB16(uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM ("qsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __SHSUB16(uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM ("shsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __QASX(uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM ("qasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __SHASX(uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM ("shasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __QSAX(uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM ("qsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __SHSAX(uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM ("shsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __SXTB16(uint32_t op1)
{
uint32_t result;

__ASM ("sxtb16 %0, %1" : "=r" (result) : "r" (op1));
return(result);
}

__STATIC_FORCEINLINE uint32_t __SMUADX (uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM volatile ("smuadx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __SMLADX (uint32_t op1, uint32_t op2, uint32_t op3)
{
uint32_t result;

__ASM volatile ("smladx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
return(result);
}

__STATIC_FORCEINLINE uint64_t __SMLALD (uint32_t op1, uint32_t op2, uint64_t acc)
{
union llreg_u{
uint32_t w32[2];
uint64_t w64;
} llr;
llr.w64 = acc;

#ifndef __ARMEB__ /* Little endian */
__ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) );
#else /* Big endian */
__ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) );
#endif

return(llr.w64);
}

__STATIC_FORCEINLINE uint64_t __SMLALDX (uint32_t op1, uint32_t op2, uint64_t acc)
{
union llreg_u{
uint32_t w32[2];
uint64_t w64;
} llr;
llr.w64 = acc;

#ifndef __ARMEB__ /* Little endian */
__ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) );
#else /* Big endian */
__ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) );
#endif

return(llr.w64);
}

__STATIC_FORCEINLINE uint32_t __SMUSD (uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM volatile ("smusd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __SMUSDX (uint32_t op1, uint32_t op2)
{
uint32_t result;

__ASM volatile ("smusdx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __SMLSDX (uint32_t op1, uint32_t op2, uint32_t op3)
{
uint32_t result;

__ASM volatile ("smlsdx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
return(result);
}

__STATIC_FORCEINLINE int32_t __QADD( int32_t op1, int32_t op2)
{
int32_t result;
#if (__ARM_FEATURE_DSP == 1)
#define __SADD8 __builtin_arm_sadd8
#define __QADD8 __builtin_arm_qadd8
#define __SHADD8 __builtin_arm_shadd8
#define __UADD8 __builtin_arm_uadd8
#define __UQADD8 __builtin_arm_uqadd8
#define __UHADD8 __builtin_arm_uhadd8
#define __SSUB8 __builtin_arm_ssub8
#define __QSUB8 __builtin_arm_qsub8
#define __SHSUB8 __builtin_arm_shsub8
#define __USUB8 __builtin_arm_usub8
#define __UQSUB8 __builtin_arm_uqsub8
#define __UHSUB8 __builtin_arm_uhsub8
#define __SADD16 __builtin_arm_sadd16
#define __QADD16 __builtin_arm_qadd16
#define __SHADD16 __builtin_arm_shadd16
#define __UADD16 __builtin_arm_uadd16
#define __UQADD16 __builtin_arm_uqadd16
#define __UHADD16 __builtin_arm_uhadd16
#define __SSUB16 __builtin_arm_ssub16
#define __QSUB16 __builtin_arm_qsub16
#define __SHSUB16 __builtin_arm_shsub16
#define __USUB16 __builtin_arm_usub16
#define __UQSUB16 __builtin_arm_uqsub16
#define __UHSUB16 __builtin_arm_uhsub16
#define __SASX __builtin_arm_sasx
#define __QASX __builtin_arm_qasx
#define __SHASX __builtin_arm_shasx
#define __UASX __builtin_arm_uasx
#define __UQASX __builtin_arm_uqasx
#define __UHASX __builtin_arm_uhasx
#define __SSAX __builtin_arm_ssax
#define __QSAX __builtin_arm_qsax
#define __SHSAX __builtin_arm_shsax
#define __USAX __builtin_arm_usax
#define __UQSAX __builtin_arm_uqsax
#define __UHSAX __builtin_arm_uhsax
#define __USAD8 __builtin_arm_usad8
#define __USADA8 __builtin_arm_usada8
#define __SSAT16 __builtin_arm_ssat16
#define __USAT16 __builtin_arm_usat16
#define __UXTB16 __builtin_arm_uxtb16
#define __UXTAB16 __builtin_arm_uxtab16
#define __SXTB16 __builtin_arm_sxtb16
#define __SXTAB16 __builtin_arm_sxtab16
#define __SMUAD __builtin_arm_smuad
#define __SMUADX __builtin_arm_smuadx
#define __SMLAD __builtin_arm_smlad
#define __SMLADX __builtin_arm_smladx
#define __SMLALD __builtin_arm_smlald
#define __SMLALDX __builtin_arm_smlaldx
#define __SMUSD __builtin_arm_smusd
#define __SMUSDX __builtin_arm_smusdx
#define __SMLSD __builtin_arm_smlsd
#define __SMLSDX __builtin_arm_smlsdx
#define __SMLSLD __builtin_arm_smlsld
#define __SMLSLDX __builtin_arm_smlsldx
#define __SEL __builtin_arm_sel
#define __QADD __builtin_arm_qadd
#define __QSUB __builtin_arm_qsub

#define __PKHBT(ARG1,ARG2,ARG3) \
__extension__ \
({ \
uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \
__ASM ("pkhbt %0, %1, %2, lsl %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \
__RES; \
})

__ASM volatile ("qadd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
}
#define __PKHTB(ARG1,ARG2,ARG3) \
__extension__ \
({ \
uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \
if (ARG3 == 0) \
__ASM ("pkhtb %0, %1, %2" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2) ); \
else \
__ASM ("pkhtb %0, %1, %2, asr %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \
__RES; \
})

__STATIC_FORCEINLINE int32_t __QSUB( int32_t op1, int32_t op2)
__STATIC_FORCEINLINE uint32_t __SXTB16_RORn(uint32_t op1, uint32_t rotate)
{
int32_t result;

__ASM volatile ("qsub %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
uint32_t result;
if (__builtin_constant_p(rotate) && ((rotate == 8U) || (rotate == 16U) || (rotate == 24U)))
{
__ASM volatile("sxtb16 %0, %1, ROR %2" : "=r"(result) : "r"(op1), "i"(rotate));
}
else
{
result = __SXTB16(__ROR(op1, rotate));
}
return result;
}


__STATIC_FORCEINLINE uint32_t __SMUAD (uint32_t op1, uint32_t op2)
__STATIC_FORCEINLINE uint32_t __SXTAB16_RORn(uint32_t op1, uint32_t op2, uint32_t rotate)
{
uint32_t result;

__ASM volatile ("smuad %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return(result);
uint32_t result;
if (__builtin_constant_p(rotate) && ((rotate == 8U) || (rotate == 16U) || (rotate == 24U)))
{
__ASM volatile("sxtab16 %0, %1, %2, ROR %3" : "=r"(result) : "r"(op1), "r"(op2), "i"(rotate));
}
else
{
result = __SXTAB16(op1, __ROR(op2, rotate));
}
return result;
}



#define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \
((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) )

#define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \
((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) )

__STATIC_FORCEINLINE int32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3)
{
int32_t result;

__ASM ("smmla %0, %1, %2, %3" : "=r" (result): "r" (op1), "r" (op2), "r" (op3) );
return(result);
}

__STATIC_FORCEINLINE uint32_t __SMLAD (uint32_t op1, uint32_t op2, uint32_t op3)
{
uint32_t result;
int32_t result;

__ASM volatile ("smlad %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
return(result);
__ASM volatile ("smmla %0, %1, %2, %3" : "=r" (result): "r" (op1), "r" (op2), "r" (op3) );
return (result);
}

/*@} end of group CMSIS_SIMD_intrinsics */


#endif /* (__ARM_FEATURE_DSP == 1) */
/** @} end of group CMSIS_SIMD_intrinsics */

/** \defgroup CMSIS_Core_intrinsics CMSIS Core Intrinsics
Access to dedicated SIMD instructions
Expand Down

0 comments on commit c2dc2d0

Please sign in to comment.