From 83ce97a4ca44c1aedc9f825bcb11f3a999f09c60 Mon Sep 17 00:00:00 2001 From: gxw Date: Tue, 30 Jan 2024 16:54:14 +0800 Subject: [PATCH] LoongArch64: Handle NAN and INF --- kernel/loongarch64/cscal_lasx.S | 149 +------------------------------- kernel/loongarch64/cscal_lsx.S | 130 +--------------------------- 2 files changed, 4 insertions(+), 275 deletions(-) diff --git a/kernel/loongarch64/cscal_lasx.S b/kernel/loongarch64/cscal_lasx.S index 3605a6c0e8..f535266630 100644 --- a/kernel/loongarch64/cscal_lasx.S +++ b/kernel/loongarch64/cscal_lasx.S @@ -99,7 +99,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. b .L113 //alpha_r != 0.0 && alpha_i == 0.0 .L14: - bceqz $fcc1, .L112 //alpha_r == 0.0 && alpha_i != 0.0 + bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0 b .L111 //alpha_r == 0.0 && alpha_i == 0.0 .align 3 @@ -117,38 +117,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. b .L997 .align 3 -.L112: //alpha_r == 0.0 && alpha_i != 0.0 - xvld VX0, X, 0 * SIZE -#ifdef DOUBLE - xvld VX1, X, 4 * SIZE - xvpickev.d x1, VX1, VX0 - xvpickod.d x2, VX1, VX0 - xvfmul.d x3, VXAI, x2 - xvfsub.d x3, VXZ, x3 - xvfmul.d x4, VXAI, x1 - xvilvl.d VX2, x4 ,x3 - xvilvh.d VX3, x4, x3 - xvst VX2, X, 0 * SIZE - xvst VX3, X, 4 * SIZE - addi.d X, X, 8 * SIZE -#else - xvld VX1, X, 8 * SIZE - xvpickev.w x1, VX1, VX0 - xvpickod.w x2, VX1, VX0 - xvfmul.s x3, VXAI, x2 - xvfsub.s x3, VXZ, x3 - xvfmul.s x4, VXAI, x1 - xvilvl.w VX2, x4 ,x3 - xvilvh.w VX3, x4, x3 - xvst VX2, X, 0 * SIZE - xvst VX3, X, 8 * SIZE - addi.d X, X, 16 * SIZE -#endif - addi.d I, I, -1 - blt $r0, I, .L112 - b .L997 - .align 3 - .L113: //alpha_r != 0.0 && alpha_i == 0.0 xvld VX0, X, 0 * SIZE #ifdef DOUBLE @@ -227,7 +195,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. b .L223 //alpha_r != 0.0 && alpha_i == 0.0 .L24: - bceqz $fcc1, .L222 //alpha_r == 0.0 && alpha_i != 0.0 + bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0 b .L221 //alpha_r == 0.0 && alpha_i == 0.0 .align 3 @@ -275,119 +243,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. b .L997 .align 3 -.L222: //alpha_r == 0.0 && alpha_i != 0.0 -#ifdef DOUBLE - ld.d t1, X, 0 * SIZE - ld.d t2, X, 1 * SIZE - add.d X, X, INCX - ld.d t3, X, 0 * SIZE - ld.d t4, X, 1 * SIZE - add.d X, X, INCX - xvinsgr2vr.d x1, t1, 0 - xvinsgr2vr.d x2, t2, 0 - xvinsgr2vr.d x1, t3, 1 - xvinsgr2vr.d x2, t4, 1 - ld.d t1, X, 0 * SIZE - ld.d t2, X, 1 * SIZE - add.d X, X, INCX - ld.d t3, X, 0 * SIZE - ld.d t4, X, 1 * SIZE - xvinsgr2vr.d x1, t1, 2 - xvinsgr2vr.d x2, t2, 2 - xvinsgr2vr.d x1, t3, 3 - xvinsgr2vr.d x2, t4, 3 - add.d X, X, INCX - - xvfmul.d x3, VXAI, x2 - xvfsub.d x3, VXZ, x3 - xvfmul.d x4, VXAI, x1 - addi.d I, I, -1 - xvstelm.d x3, XX, 0 * SIZE, 0 - xvstelm.d x4, XX, 1 * SIZE, 0 - add.d XX, XX, INCX - xvstelm.d x3, XX, 0 * SIZE, 1 - xvstelm.d x4, XX, 1 * SIZE, 1 - add.d XX, XX, INCX - xvstelm.d x3, XX, 0 * SIZE, 2 - xvstelm.d x4, XX, 1 * SIZE, 2 - add.d XX, XX, INCX - xvstelm.d x3, XX, 0 * SIZE, 3 - xvstelm.d x4, XX, 1 * SIZE, 3 -#else - ld.w t1, X, 0 * SIZE - ld.w t2, X, 1 * SIZE - add.d X, X, INCX - ld.w t3, X, 0 * SIZE - ld.w t4, X, 1 * SIZE - add.d X, X, INCX - xvinsgr2vr.w x1, t1, 0 - xvinsgr2vr.w x2, t2, 0 - xvinsgr2vr.w x1, t3, 1 - xvinsgr2vr.w x2, t4, 1 - ld.w t1, X, 0 * SIZE - ld.w t2, X, 1 * SIZE - add.d X, X, INCX - ld.w t3, X, 0 * SIZE - ld.w t4, X, 1 * SIZE - xvinsgr2vr.w x1, t1, 2 - xvinsgr2vr.w x2, t2, 2 - xvinsgr2vr.w x1, t3, 3 - xvinsgr2vr.w x2, t4, 3 - add.d X, X, INCX - ld.w t1, X, 0 * SIZE - ld.w t2, X, 1 * SIZE - add.d X, X, INCX - ld.w t3, X, 0 * SIZE - ld.w t4, X, 1 * SIZE - add.d X, X, INCX - xvinsgr2vr.w x1, t1, 4 - xvinsgr2vr.w x2, t2, 4 - xvinsgr2vr.w x1, t3, 5 - xvinsgr2vr.w x2, t4, 5 - ld.w t1, X, 0 * SIZE - ld.w t2, X, 1 * SIZE - add.d X, X, INCX - ld.w t3, X, 0 * SIZE - ld.w t4, X, 1 * SIZE - xvinsgr2vr.w x1, t1, 6 - xvinsgr2vr.w x2, t2, 6 - xvinsgr2vr.w x1, t3, 7 - xvinsgr2vr.w x2, t4, 7 - add.d X, X, INCX - - xvfmul.s x3, VXAI, x2 - xvfsub.s x3, VXZ, x3 - xvfmul.s x4, VXAI, x1 - addi.d I, I, -1 - xvstelm.w x3, XX, 0 * SIZE, 0 - xvstelm.w x4, XX, 1 * SIZE, 0 - add.d XX, XX, INCX - xvstelm.w x3, XX, 0 * SIZE, 1 - xvstelm.w x4, XX, 1 * SIZE, 1 - add.d XX, XX, INCX - xvstelm.w x3, XX, 0 * SIZE, 2 - xvstelm.w x4, XX, 1 * SIZE, 2 - add.d XX, XX, INCX - xvstelm.w x3, XX, 0 * SIZE, 3 - xvstelm.w x4, XX, 1 * SIZE, 3 - add.d XX, XX, INCX - xvstelm.w x3, XX, 0 * SIZE, 4 - xvstelm.w x4, XX, 1 * SIZE, 4 - add.d XX, XX, INCX - xvstelm.w x3, XX, 0 * SIZE, 5 - xvstelm.w x4, XX, 1 * SIZE, 5 - add.d XX, XX, INCX - xvstelm.w x3, XX, 0 * SIZE, 6 - xvstelm.w x4, XX, 1 * SIZE, 6 - add.d XX, XX, INCX - xvstelm.w x3, XX, 0 * SIZE, 7 - xvstelm.w x4, XX, 1 * SIZE, 7 -#endif - add.d XX, XX, INCX - blt $r0, I, .L222 - b .L997 - .align 3 - .L223: //alpha_r != 0.0 && alpha_i == 0.0 #ifdef DOUBLE ld.d t1, X, 0 * SIZE diff --git a/kernel/loongarch64/cscal_lsx.S b/kernel/loongarch64/cscal_lsx.S index f442a754fb..241d3d16e4 100644 --- a/kernel/loongarch64/cscal_lsx.S +++ b/kernel/loongarch64/cscal_lsx.S @@ -97,7 +97,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. b .L113 //alpha_r != 0.0 && alpha_i == 0.0 .L14: - bceqz $fcc1, .L112 //alpha_r == 0.0 && alpha_i != 0.0 + bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0 b .L111 //alpha_r == 0.0 && alpha_i == 0.0 .align 3 @@ -116,48 +116,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. b .L997 .align 3 -.L112: //alpha_r == 0.0 && alpha_i != 0.0 - vld VX0, X, 0 * SIZE -#ifdef DOUBLE - vld VX1, X, 2 * SIZE - vpickev.d x1, VX1, VX0 - vpickod.d x2, VX1, VX0 - vfmul.d x3, VXAI, x2 - vfsub.d x3, VXZ, x3 - vfmul.d x4, VXAI, x1 - vilvl.d VX2, x4 ,x3 - vilvh.d VX3, x4, x3 - vst VX2, X, 0 * SIZE - vst VX3, X, 2 * SIZE - vld VX0, X, 4 * SIZE - vld VX1, X, 6 * SIZE - vpickev.d x1, VX1, VX0 - vpickod.d x2, VX1, VX0 - vfmul.d x3, VXAI, x2 - vfsub.d x3, VXZ, x3 - vfmul.d x4, VXAI, x1 - vilvl.d VX2, x4 ,x3 - vilvh.d VX3, x4, x3 - vst VX2, X, 4 * SIZE - vst VX3, X, 6 * SIZE -#else - vld VX1, X, 4 * SIZE - vpickev.w x1, VX1, VX0 - vpickod.w x2, VX1, VX0 - vfmul.s x3, VXAI, x2 - vfsub.s x3, VXZ, x3 - vfmul.s x4, VXAI, x1 - vilvl.w VX2, x4 ,x3 - vilvh.w VX3, x4, x3 - vst VX2, X, 0 * SIZE - vst VX3, X, 4 * SIZE -#endif - addi.d X, X, 8 * SIZE - addi.d I, I, -1 - blt $r0, I, .L112 - b .L997 - .align 3 - .L113: //alpha_r != 0.0 && alpha_i == 0.0 vld VX0, X, 0 * SIZE #ifdef DOUBLE @@ -256,7 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. b .L223 //alpha_r != 0.0 && alpha_i == 0.0 .L24: - bceqz $fcc1, .L222 //alpha_r == 0.0 && alpha_i != 0.0 + bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0 b .L221 //alpha_r == 0.0 && alpha_i == 0.0 .align 3 @@ -292,90 +250,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. b .L997 .align 3 -.L222: //alpha_r == 0.0 && alpha_i != 0.0 -#ifdef DOUBLE - ld.d t1, X, 0 * SIZE - ld.d t2, X, 1 * SIZE - add.d X, X, INCX - ld.d t3, X, 0 * SIZE - ld.d t4, X, 1 * SIZE - add.d X, X, INCX - vinsgr2vr.d x1, t1, 0 - vinsgr2vr.d x2, t2, 0 - vinsgr2vr.d x1, t3, 1 - vinsgr2vr.d x2, t4, 1 - vfmul.d x3, VXAI, x2 - vfsub.d x3, VXZ, x3 - vfmul.d x4, VXAI, x1 - vstelm.d x3, XX, 0 * SIZE, 0 - vstelm.d x4, XX, 1 * SIZE, 0 - add.d XX, XX, INCX - vstelm.d x3, XX, 0 * SIZE, 1 - vstelm.d x4, XX, 1 * SIZE, 1 - add.d XX, XX, INCX - - ld.d t1, X, 0 * SIZE - ld.d t2, X, 1 * SIZE - add.d X, X, INCX - ld.d t3, X, 0 * SIZE - ld.d t4, X, 1 * SIZE - vinsgr2vr.d x1, t1, 0 - vinsgr2vr.d x2, t2, 0 - vinsgr2vr.d x1, t3, 1 - vinsgr2vr.d x2, t4, 1 - add.d X, X, INCX - vfmul.d x3, VXAI, x2 - vfsub.d x3, VXZ, x3 - vfmul.d x4, VXAI, x1 - addi.d I, I, -1 - vstelm.d x3, XX, 0 * SIZE, 0 - vstelm.d x4, XX, 1 * SIZE, 0 - add.d XX, XX, INCX - vstelm.d x3, XX, 0 * SIZE, 1 - vstelm.d x4, XX, 1 * SIZE, 1 -#else - ld.w t1, X, 0 * SIZE - ld.w t2, X, 1 * SIZE - add.d X, X, INCX - ld.w t3, X, 0 * SIZE - ld.w t4, X, 1 * SIZE - add.d X, X, INCX - vinsgr2vr.w x1, t1, 0 - vinsgr2vr.w x2, t2, 0 - vinsgr2vr.w x1, t3, 1 - vinsgr2vr.w x2, t4, 1 - ld.w t1, X, 0 * SIZE - ld.w t2, X, 1 * SIZE - add.d X, X, INCX - ld.w t3, X, 0 * SIZE - ld.w t4, X, 1 * SIZE - vinsgr2vr.w x1, t1, 2 - vinsgr2vr.w x2, t2, 2 - vinsgr2vr.w x1, t3, 3 - vinsgr2vr.w x2, t4, 3 - add.d X, X, INCX - - vfmul.s x3, VXAI, x2 - vfsub.s x3, VXZ, x3 - vfmul.s x4, VXAI, x1 - addi.d I, I, -1 - vstelm.w x3, XX, 0 * SIZE, 0 - vstelm.w x4, XX, 1 * SIZE, 0 - add.d XX, XX, INCX - vstelm.w x3, XX, 0 * SIZE, 1 - vstelm.w x4, XX, 1 * SIZE, 1 - add.d XX, XX, INCX - vstelm.w x3, XX, 0 * SIZE, 2 - vstelm.w x4, XX, 1 * SIZE, 2 - add.d XX, XX, INCX - vstelm.w x3, XX, 0 * SIZE, 3 - vstelm.w x4, XX, 1 * SIZE, 3 -#endif - add.d XX, XX, INCX - blt $r0, I, .L222 - b .L997 - .align 3 - .L223: //alpha_r != 0.0 && alpha_i == 0.0 #ifdef DOUBLE ld.d t1, X, 0 * SIZE