Skip to content

Commit

Permalink
Merge pull request #4464 from XiWeiGu/loongarch64-zscal
Browse files Browse the repository at this point in the history
LoongArch64: Handle NAN and INF
  • Loading branch information
martin-frbg authored Jan 30, 2024
2 parents 9f06301 + 83ce97a commit 98c9ff3
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 275 deletions.
149 changes: 2 additions & 147 deletions kernel/loongarch64/cscal_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
b .L113 //alpha_r != 0.0 && alpha_i == 0.0

.L14:
bceqz $fcc1, .L112 //alpha_r == 0.0 && alpha_i != 0.0
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
.align 3

Expand All @@ -117,38 +117,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
b .L997
.align 3

.L112: //alpha_r == 0.0 && alpha_i != 0.0
xvld VX0, X, 0 * SIZE
#ifdef DOUBLE
xvld VX1, X, 4 * SIZE
xvpickev.d x1, VX1, VX0
xvpickod.d x2, VX1, VX0
xvfmul.d x3, VXAI, x2
xvfsub.d x3, VXZ, x3
xvfmul.d x4, VXAI, x1
xvilvl.d VX2, x4 ,x3
xvilvh.d VX3, x4, x3
xvst VX2, X, 0 * SIZE
xvst VX3, X, 4 * SIZE
addi.d X, X, 8 * SIZE
#else
xvld VX1, X, 8 * SIZE
xvpickev.w x1, VX1, VX0
xvpickod.w x2, VX1, VX0
xvfmul.s x3, VXAI, x2
xvfsub.s x3, VXZ, x3
xvfmul.s x4, VXAI, x1
xvilvl.w VX2, x4 ,x3
xvilvh.w VX3, x4, x3
xvst VX2, X, 0 * SIZE
xvst VX3, X, 8 * SIZE
addi.d X, X, 16 * SIZE
#endif
addi.d I, I, -1
blt $r0, I, .L112
b .L997
.align 3

.L113: //alpha_r != 0.0 && alpha_i == 0.0
xvld VX0, X, 0 * SIZE
#ifdef DOUBLE
Expand Down Expand Up @@ -227,7 +195,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
b .L223 //alpha_r != 0.0 && alpha_i == 0.0

.L24:
bceqz $fcc1, .L222 //alpha_r == 0.0 && alpha_i != 0.0
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
.align 3

Expand Down Expand Up @@ -275,119 +243,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
b .L997
.align 3

.L222: //alpha_r == 0.0 && alpha_i != 0.0
#ifdef DOUBLE
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d X, X, INCX
ld.d t3, X, 0 * SIZE
ld.d t4, X, 1 * SIZE
add.d X, X, INCX
xvinsgr2vr.d x1, t1, 0
xvinsgr2vr.d x2, t2, 0
xvinsgr2vr.d x1, t3, 1
xvinsgr2vr.d x2, t4, 1
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d X, X, INCX
ld.d t3, X, 0 * SIZE
ld.d t4, X, 1 * SIZE
xvinsgr2vr.d x1, t1, 2
xvinsgr2vr.d x2, t2, 2
xvinsgr2vr.d x1, t3, 3
xvinsgr2vr.d x2, t4, 3
add.d X, X, INCX

xvfmul.d x3, VXAI, x2
xvfsub.d x3, VXZ, x3
xvfmul.d x4, VXAI, x1
addi.d I, I, -1
xvstelm.d x3, XX, 0 * SIZE, 0
xvstelm.d x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
xvstelm.d x3, XX, 0 * SIZE, 1
xvstelm.d x4, XX, 1 * SIZE, 1
add.d XX, XX, INCX
xvstelm.d x3, XX, 0 * SIZE, 2
xvstelm.d x4, XX, 1 * SIZE, 2
add.d XX, XX, INCX
xvstelm.d x3, XX, 0 * SIZE, 3
xvstelm.d x4, XX, 1 * SIZE, 3
#else
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
ld.w t3, X, 0 * SIZE
ld.w t4, X, 1 * SIZE
add.d X, X, INCX
xvinsgr2vr.w x1, t1, 0
xvinsgr2vr.w x2, t2, 0
xvinsgr2vr.w x1, t3, 1
xvinsgr2vr.w x2, t4, 1
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
ld.w t3, X, 0 * SIZE
ld.w t4, X, 1 * SIZE
xvinsgr2vr.w x1, t1, 2
xvinsgr2vr.w x2, t2, 2
xvinsgr2vr.w x1, t3, 3
xvinsgr2vr.w x2, t4, 3
add.d X, X, INCX
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
ld.w t3, X, 0 * SIZE
ld.w t4, X, 1 * SIZE
add.d X, X, INCX
xvinsgr2vr.w x1, t1, 4
xvinsgr2vr.w x2, t2, 4
xvinsgr2vr.w x1, t3, 5
xvinsgr2vr.w x2, t4, 5
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
ld.w t3, X, 0 * SIZE
ld.w t4, X, 1 * SIZE
xvinsgr2vr.w x1, t1, 6
xvinsgr2vr.w x2, t2, 6
xvinsgr2vr.w x1, t3, 7
xvinsgr2vr.w x2, t4, 7
add.d X, X, INCX

xvfmul.s x3, VXAI, x2
xvfsub.s x3, VXZ, x3
xvfmul.s x4, VXAI, x1
addi.d I, I, -1
xvstelm.w x3, XX, 0 * SIZE, 0
xvstelm.w x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
xvstelm.w x3, XX, 0 * SIZE, 1
xvstelm.w x4, XX, 1 * SIZE, 1
add.d XX, XX, INCX
xvstelm.w x3, XX, 0 * SIZE, 2
xvstelm.w x4, XX, 1 * SIZE, 2
add.d XX, XX, INCX
xvstelm.w x3, XX, 0 * SIZE, 3
xvstelm.w x4, XX, 1 * SIZE, 3
add.d XX, XX, INCX
xvstelm.w x3, XX, 0 * SIZE, 4
xvstelm.w x4, XX, 1 * SIZE, 4
add.d XX, XX, INCX
xvstelm.w x3, XX, 0 * SIZE, 5
xvstelm.w x4, XX, 1 * SIZE, 5
add.d XX, XX, INCX
xvstelm.w x3, XX, 0 * SIZE, 6
xvstelm.w x4, XX, 1 * SIZE, 6
add.d XX, XX, INCX
xvstelm.w x3, XX, 0 * SIZE, 7
xvstelm.w x4, XX, 1 * SIZE, 7
#endif
add.d XX, XX, INCX
blt $r0, I, .L222
b .L997
.align 3

.L223: //alpha_r != 0.0 && alpha_i == 0.0
#ifdef DOUBLE
ld.d t1, X, 0 * SIZE
Expand Down
130 changes: 2 additions & 128 deletions kernel/loongarch64/cscal_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
b .L113 //alpha_r != 0.0 && alpha_i == 0.0

.L14:
bceqz $fcc1, .L112 //alpha_r == 0.0 && alpha_i != 0.0
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
.align 3

Expand All @@ -116,48 +116,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
b .L997
.align 3

.L112: //alpha_r == 0.0 && alpha_i != 0.0
vld VX0, X, 0 * SIZE
#ifdef DOUBLE
vld VX1, X, 2 * SIZE
vpickev.d x1, VX1, VX0
vpickod.d x2, VX1, VX0
vfmul.d x3, VXAI, x2
vfsub.d x3, VXZ, x3
vfmul.d x4, VXAI, x1
vilvl.d VX2, x4 ,x3
vilvh.d VX3, x4, x3
vst VX2, X, 0 * SIZE
vst VX3, X, 2 * SIZE
vld VX0, X, 4 * SIZE
vld VX1, X, 6 * SIZE
vpickev.d x1, VX1, VX0
vpickod.d x2, VX1, VX0
vfmul.d x3, VXAI, x2
vfsub.d x3, VXZ, x3
vfmul.d x4, VXAI, x1
vilvl.d VX2, x4 ,x3
vilvh.d VX3, x4, x3
vst VX2, X, 4 * SIZE
vst VX3, X, 6 * SIZE
#else
vld VX1, X, 4 * SIZE
vpickev.w x1, VX1, VX0
vpickod.w x2, VX1, VX0
vfmul.s x3, VXAI, x2
vfsub.s x3, VXZ, x3
vfmul.s x4, VXAI, x1
vilvl.w VX2, x4 ,x3
vilvh.w VX3, x4, x3
vst VX2, X, 0 * SIZE
vst VX3, X, 4 * SIZE
#endif
addi.d X, X, 8 * SIZE
addi.d I, I, -1
blt $r0, I, .L112
b .L997
.align 3

.L113: //alpha_r != 0.0 && alpha_i == 0.0
vld VX0, X, 0 * SIZE
#ifdef DOUBLE
Expand Down Expand Up @@ -256,7 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
b .L223 //alpha_r != 0.0 && alpha_i == 0.0

.L24:
bceqz $fcc1, .L222 //alpha_r == 0.0 && alpha_i != 0.0
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
.align 3

Expand Down Expand Up @@ -292,90 +250,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
b .L997
.align 3

.L222: //alpha_r == 0.0 && alpha_i != 0.0
#ifdef DOUBLE
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d X, X, INCX
ld.d t3, X, 0 * SIZE
ld.d t4, X, 1 * SIZE
add.d X, X, INCX
vinsgr2vr.d x1, t1, 0
vinsgr2vr.d x2, t2, 0
vinsgr2vr.d x1, t3, 1
vinsgr2vr.d x2, t4, 1
vfmul.d x3, VXAI, x2
vfsub.d x3, VXZ, x3
vfmul.d x4, VXAI, x1
vstelm.d x3, XX, 0 * SIZE, 0
vstelm.d x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
vstelm.d x3, XX, 0 * SIZE, 1
vstelm.d x4, XX, 1 * SIZE, 1
add.d XX, XX, INCX

ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d X, X, INCX
ld.d t3, X, 0 * SIZE
ld.d t4, X, 1 * SIZE
vinsgr2vr.d x1, t1, 0
vinsgr2vr.d x2, t2, 0
vinsgr2vr.d x1, t3, 1
vinsgr2vr.d x2, t4, 1
add.d X, X, INCX
vfmul.d x3, VXAI, x2
vfsub.d x3, VXZ, x3
vfmul.d x4, VXAI, x1
addi.d I, I, -1
vstelm.d x3, XX, 0 * SIZE, 0
vstelm.d x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
vstelm.d x3, XX, 0 * SIZE, 1
vstelm.d x4, XX, 1 * SIZE, 1
#else
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
ld.w t3, X, 0 * SIZE
ld.w t4, X, 1 * SIZE
add.d X, X, INCX
vinsgr2vr.w x1, t1, 0
vinsgr2vr.w x2, t2, 0
vinsgr2vr.w x1, t3, 1
vinsgr2vr.w x2, t4, 1
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
ld.w t3, X, 0 * SIZE
ld.w t4, X, 1 * SIZE
vinsgr2vr.w x1, t1, 2
vinsgr2vr.w x2, t2, 2
vinsgr2vr.w x1, t3, 3
vinsgr2vr.w x2, t4, 3
add.d X, X, INCX

vfmul.s x3, VXAI, x2
vfsub.s x3, VXZ, x3
vfmul.s x4, VXAI, x1
addi.d I, I, -1
vstelm.w x3, XX, 0 * SIZE, 0
vstelm.w x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
vstelm.w x3, XX, 0 * SIZE, 1
vstelm.w x4, XX, 1 * SIZE, 1
add.d XX, XX, INCX
vstelm.w x3, XX, 0 * SIZE, 2
vstelm.w x4, XX, 1 * SIZE, 2
add.d XX, XX, INCX
vstelm.w x3, XX, 0 * SIZE, 3
vstelm.w x4, XX, 1 * SIZE, 3
#endif
add.d XX, XX, INCX
blt $r0, I, .L222
b .L997
.align 3

.L223: //alpha_r != 0.0 && alpha_i == 0.0
#ifdef DOUBLE
ld.d t1, X, 0 * SIZE
Expand Down

0 comments on commit 98c9ff3

Please sign in to comment.