Skip to content

Commit

Permalink
arm64: satd: 8 bpc NEON implementation of 8x8
Browse files Browse the repository at this point in the history
  • Loading branch information
barrbrain committed Nov 7, 2023
1 parent bc01c7f commit 13439d0
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 1 deletion.
137 changes: 137 additions & 0 deletions src/arm/64/satd.S
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,21 @@
zip2 \r1\().4s, \r2\().4s, \r3\().4s
.endm

.macro interleave_quads r0, r1, r2, r3
zip1 \r0\().2d, \r2\().2d, \r3\().2d
zip2 \r1\().2d, \r2\().2d, \r3\().2d
.endm

.macro normalize_4
add w0, w0, 2
lsr w0, w0, 2
.endm

.macro normalize_8
add w0, w0, 4
lsr w0, w0, 3
.endm

// x0: src: *const u8,
// x1: src_stride: isize,
// x2: dst: *const u8,
Expand Down Expand Up @@ -578,3 +588,130 @@ function satd4x16_neon, export=1
#undef dst
#undef dst_stride
endfunc

.macro load_rows n0, n1, n2, src, dst, src_stride, dst_stride, should_add=1
ldr d\n0, [\src]
ldr d\n1, [\dst]

usubl v\n0\().8h, v\n0\().8b, v\n1\().8b

ldr d\n1, [\src, \src_stride]
ldr d\n2, [\dst, \dst_stride]

usubl v\n1\().8h, v\n1\().8b, v\n2\().8b

.if \should_add != 0
add \src, \src, \src_stride, lsl 1
add \dst, \dst, \dst_stride, lsl 1
.endif
.endm

.macro HADAMARD_8X8
// Horizontal transform

butterfly v2, v3, v0, v1
butterfly v6, v7, v4, v5
butterfly v20, v21, v16, v17
butterfly v22, v23, v18, v19

interleave v0, v1, v2, v3
interleave v4, v5, v6, v7
interleave v16, v17, v20, v21
interleave v18, v19, v22, v23

butterfly v2, v6, v0, v4
butterfly v3, v7, v1, v5
butterfly v20, v22, v16, v18
butterfly v21, v23, v17, v19

interleave_pairs v0, v4, v2, v6
interleave_pairs v1, v5, v3, v7
interleave_pairs v16, v18, v20, v22
interleave_pairs v17, v19, v21, v23

butterfly v2, v20, v0, v16
butterfly v3, v21, v1, v17
butterfly v6, v22, v4, v18
butterfly v7, v23, v5, v19

interleave_quads v0, v16, v2, v20
interleave_quads v1, v17, v3, v21
interleave_quads v4, v18, v6, v22
interleave_quads v5, v19, v7, v23

// Vertical transform

butterfly v2, v3, v0, v1
butterfly v6, v7, v4, v5
butterfly v20, v21, v16, v17
butterfly v22, v23, v18, v19

butterfly v0, v4, v2, v6
butterfly v1, v5, v3, v7
butterfly v16, v18, v20, v22
butterfly v17, v19, v21, v23

butterfly v2, v20, v0, v16
butterfly v3, v21, v1, v17
butterfly v6, v22, v4, v18
butterfly v7, v23, v5, v19
.endm

.macro SUM_HADAMARD_8X8
// absolute value of transform coefficients
abs v2.8h, v2.8h
abs v3.8h, v3.8h
abs v6.8h, v6.8h
abs v7.8h, v7.8h
abs v20.8h, v20.8h
abs v21.8h, v21.8h
abs v22.8h, v22.8h
abs v23.8h, v23.8h

// stage 1 sum
sxtl v0.4s, v2.4h
sxtl v1.4s, v3.4h
saddw2 v0.4s, v0.4s, v2.8h
saddw2 v1.4s, v1.4s, v3.8h
saddw v0.4s, v0.4s, v6.4h
saddw2 v1.4s, v1.4s, v6.8h
saddw v0.4s, v0.4s, v7.4h
saddw2 v1.4s, v1.4s, v7.8h
saddw v0.4s, v0.4s, v20.4h
saddw2 v1.4s, v1.4s, v20.8h
saddw v0.4s, v0.4s, v21.4h
saddw2 v1.4s, v1.4s, v21.8h
saddw v0.4s, v0.4s, v22.4h
saddw2 v1.4s, v1.4s, v22.8h
saddw v0.4s, v0.4s, v23.4h
saddw2 v1.4s, v1.4s, v23.8h

// stage 2 sum
add v0.4s, v0.4s, v1.4s
addv s0, v0.4s

fmov w0, s0
normalize_8
.endm

function satd8x8_neon, export=1
#define src x0
#define src_stride x1
#define dst x2
#define dst_stride x3

load_rows 0, 1, 2, src, dst, src_stride, dst_stride
load_rows 4, 5, 6, src, dst, src_stride, dst_stride
load_rows 16, 17, 20, src, dst, src_stride, dst_stride
load_rows 18, 19, 22, src, dst, src_stride, dst_stride, 0

HADAMARD_8X8

SUM_HADAMARD_8X8
ret

#undef src
#undef src_stride
#undef dst
#undef dst_stride
endfunc
2 changes: 1 addition & 1 deletion src/asm/aarch64/dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ declare_asm_dist_fn![
(rav1e_satd4x8_neon, u8),
(rav1e_satd4x16_neon, u8),
(rav1e_satd8x4_neon, u8),
(rav1e_satd8x8_neon, u8),
(rav1e_satd16x4_neon, u8)
];

Expand Down Expand Up @@ -122,7 +123,6 @@ macro_rules! impl_satd_fn {
}

impl_satd_fn![
(rav1e_satd8x8_neon, u8, 0, 0),
(rav1e_satd8x16_neon, u8, 0, 1),
(rav1e_satd8x32_neon, u8, 0, 2),
(rav1e_satd16x8_neon, u8, 1, 0),
Expand Down

0 comments on commit 13439d0

Please sign in to comment.