From 28e700718a7155b2db7b4a4a5bcb757c32344dba Mon Sep 17 00:00:00 2001 From: bluss Date: Wed, 5 Dec 2018 10:21:33 +0100 Subject: [PATCH] FEAT: In dgemm sse2 and fallback, use a 4x4 kernel --- src/dgemm_kernel.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dgemm_kernel.rs b/src/dgemm_kernel.rs index 7458469..9abfbe6 100644 --- a/src/dgemm_kernel.rs +++ b/src/dgemm_kernel.rs @@ -92,8 +92,8 @@ impl GemmKernel for KernelAvx { impl GemmKernel for KernelSse2 { type Elem = T; - const MR: usize = MR; - const NR: usize = NR; + const MR: usize = 4; + const NR: usize = 4; #[inline(always)] fn align_to() -> usize { 16 } @@ -130,8 +130,8 @@ impl GemmKernel for KernelSse2 { impl GemmKernel for KernelFallback { type Elem = T; - const MR: usize = MR; - const NR: usize = NR; + const MR: usize = 4; + const NR: usize = 4; #[inline(always)] fn align_to() -> usize { 0 } @@ -752,7 +752,7 @@ unsafe fn kernel_fallback_impl(k: usize, alpha: T, a: *const T, b: *const T, // Compute matrix multiplication into ab[i][j] unroll_by!(4 => k, { - loop_m!(i, loop_n!(j, ab[i][j] += at(a, i) * at(b, j))); + loop4!(i, loop4!(j, ab[i][j] += at(a, i) * at(b, j))); a = a.offset(MR as isize); b = b.offset(NR as isize); @@ -763,7 +763,7 @@ unsafe fn kernel_fallback_impl(k: usize, alpha: T, a: *const T, b: *const T, } // set C = α A B + β C - loop_n!(j, loop_m!(i, *c![i, j] = alpha * ab[i][j])); + loop4!(j, loop4!(i, *c![i, j] = alpha * ab[i][j])); } #[inline(always)]