From 0a5dbf13d3b82c07ed98d5a79935274b9fb6b1fd Mon Sep 17 00:00:00 2001 From: "tingbo.liao" Date: Wed, 8 Jan 2025 11:00:35 +0800 Subject: [PATCH] Optimize the omatcopy_cn and zomatcopy_cn kernels with RVV 1.0 intrinsic. Signed-off-by: tingbo.liao --- kernel/riscv64/KERNEL.x280 | 6 ++ kernel/riscv64/omatcopy_cn_rvv.c | 109 ++++++++++++++++++++++++++++++ kernel/riscv64/zomatcopy_cn_rvv.c | 100 +++++++++++++++++++++++++++ 3 files changed, 215 insertions(+) create mode 100644 kernel/riscv64/omatcopy_cn_rvv.c create mode 100644 kernel/riscv64/zomatcopy_cn_rvv.c diff --git a/kernel/riscv64/KERNEL.x280 b/kernel/riscv64/KERNEL.x280 index 86708fe015..e909ca9599 100644 --- a/kernel/riscv64/KERNEL.x280 +++ b/kernel/riscv64/KERNEL.x280 @@ -279,3 +279,9 @@ endif ifndef ZGEMM_BETA ZGEMM_BETA = zgemm_beta_rvv.c endif + +ZOMATCOPY_CN = zomatcopy_cn_rvv.c +COMATCOPY_CN = zomatcopy_cn_rvv.c + +DOMATCOPY_CN = omatcopy_cn_rvv.c +SOMATCOPY_CN = omatcopy_cn_rvv.c \ No newline at end of file diff --git a/kernel/riscv64/omatcopy_cn_rvv.c b/kernel/riscv64/omatcopy_cn_rvv.c new file mode 100644 index 0000000000..8cd1fb545e --- /dev/null +++ b/kernel/riscv64/omatcopy_cn_rvv.c @@ -0,0 +1,109 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define VSETVL_MAX __riscv_vsetvlmax_e32m8() +#define VSETVL(n) __riscv_vsetvl_e32m8(n) +#define FLOAT_V_T vfloat32m8_t +#define VLEV_FLOAT __riscv_vle32_v_f32m8 +#define VSEV_FLOAT __riscv_vse32_v_f32m8 +#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 +#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 +#else +#define VSETVL_MAX __riscv_vsetvlmax_e64m8() +#define VSETVL(n) __riscv_vsetvl_e64m8(n) +#define FLOAT_V_T vfloat64m8_t +#define VLEV_FLOAT __riscv_vle64_v_f64m8 +#define VSEV_FLOAT __riscv_vse64_v_f64m8 +#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 +#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 +#endif + + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i,j; + FLOAT *aptr,*bptr; + size_t vl; + + FLOAT_V_T va, vb; + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + + aptr = a; + bptr = b; + + if ( alpha == 0.0 ) + { + vl = VSETVL_MAX; + va = VFMVVF_FLOAT(0, vl); + for ( i=0; i + +#if defined(DOUBLE) +#define VLSEG2_FLOAT __riscv_vlseg2e64_v_f64m4x2 +#define VSSEG2_FLOAT __riscv_vsseg2e64_v_f64m4x2 +#define VSETVL __riscv_vsetvl_e64m4 +#define FLOAT_VX2_T vfloat64m4x2_t +#define VGET_VX2 __riscv_vget_v_f64m4x2_f64m4 +#define VSET_VX2 __riscv_vset_v_f64m4_f64m4x2 +#define FLOAT_V vfloat64m4_t +#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4 +#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 +#else +#define VLSEG2_FLOAT __riscv_vlseg2e32_v_f32m4x2 +#define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m4x2 +#define VSETVL __riscv_vsetvl_e32m4 +#define FLOAT_VX2_T vfloat32m4x2_t +#define VGET_VX2 __riscv_vget_v_f32m4x2_f32m4 +#define VSET_VX2 __riscv_vset_v_f32m4_f32m4x2 +#define FLOAT_V vfloat32m4_t +#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4 +#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 +#endif + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i,j,ia; + FLOAT *aptr,*bptr; + size_t vl; + FLOAT_VX2_T va, vb; + FLOAT_V va0, va1, vb0, vb1, vtemp; + + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + + aptr = a; + bptr = b; + + lda *= 2; + ldb *= 2; + + for ( i=0; i