Deactivating AVX for now. For more information see discussion on huon…

…w/simd#18
liebharc · Jan 3, 2016 · bdaf5a9 · bdaf5a9
1 parent 51cf9f5
commit bdaf5a9
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 151 deletions.
diff --git a/rustc.bat b/rustc.bat
@@ -1,4 +1,6 @@
 @echo off
 rem Using a hard coded rust path right now
 rem In future check the path variable for a rust path
-"C:\Program Files\Rust nightly 1.7\bin\rustc.exe" -C target-cpu=native -C target-feature=+sse2,+sse3,+avx2,+avx %*
+rem f32x8/avx causes a crash right now. See comment on https://github.com/huonw/simd/pull/18
+rem "C:\Program Files\Rust nightly 1.7\bin\rustc.exe" -C target-cpu=native -C target-feature=+sse2,+sse3,+avx2,+avx %*
+"C:\Program Files\Rust nightly 1.7\bin\rustc.exe" -C target-cpu=native -C target-feature=+sse2,+sse3 %*
diff --git a/rustc.sh b/rustc.sh
@@ -1,3 +1,5 @@
 #!/usr/bin/env bash
 # Runs rustc and enables SIMD CPU features
-rustc -C target-cpu=native -C target-feature=+sse2,+sse3,+avx2,+avx $@
+# f32x8/avx causes a crash right now. See comment on https://github.com/huonw/simd/pull/18
+#rustc -C target-cpu=native -C target-feature=+sse2,+sse3,+avx2,+avx $@
+rustc -C target-cpu=native -C target-feature=+sse2,+sse3 $@
diff --git a/src/simd_extensions/avx.rs b/src/simd_extensions/avx.rs
@@ -1,160 +1,13 @@
 use num::complex::Complex;
 use super::Simd;
-use simd::f32x4;
 use simd::x86::sse3::Sse3F32x4;
 use simd::x86::avx::{f32x8,f64x4,AvxF32x8,AvxF64x4};
 use std::mem;
 
-//pub type Reg32 = f32x8;
-// f32x8/avx causes a crash right now. See comment on https://github.com/huonw/simd/pull/18
-pub type Reg32 = f32x4;
+pub type Reg32 = f32x8;
 
 pub type Reg64 = f64x4;
 
-impl Simd<f32> for f32x4
-{
-    fn array_to_regs(array: &[f32]) -> &[Self] {
-        unsafe { 
-			let len = array.len();
-            let reg_len = Self::len();
-            if len % reg_len != 0 {
-                panic!("Argument must be dividable by {}", reg_len);
-            }
-			let trans: &[Self] = mem::transmute(array);
-			&trans[0 .. len / reg_len]
-		}
-    }
-
-    fn array_to_regs_mut(array: &mut [f32]) -> &mut [Self] {
-        unsafe { 
-			let len = array.len();
-            let reg_len = Self::len();
-            if len % reg_len != 0 {
-                panic!("Argument must be dividable by {}", reg_len);
-            }
-			let trans: &mut [Self] = mem::transmute(array);
-			&mut trans[0 .. len / reg_len]
-		}
-    }
-
-    fn len() -> usize {
-        4
-    }
-
-    fn load(array: &[f32], idx: usize) -> f32x4 {
-        f32x4::load(array, idx)
-    }
-
-    fn load_wrap(array: &[f32], idx: usize) -> f32x4 {
-        let mut temp = [0.0; 4];
-        for i in 0..temp.len() {
-            temp[i] = array[(idx + i) % array.len()];
-        }
-        f32x4::load(&temp, 0)
-    }
-
-    fn from_complex(value: Complex<f32>) -> f32x4 {
-        f32x4::new(value.re, value.im, value.re, value.im)
-    }
-
-	fn add_real(self, value: f32) -> f32x4
-	{
-		let increment = f32x4::splat(value);
-		self + increment
-	}
-
-	fn add_complex(self, value: Complex<f32>) -> f32x4
-	{
-		let increment = f32x4::new(value.re, value.im, value.re, value.im);
-		self + increment
-	}
-
-	fn scale_real(self, value: f32) -> f32x4
-	{
-		let scale_vector = f32x4::splat(value); 
-		self * scale_vector
-	}
-
-	fn scale_complex(self, value: Complex<f32>) -> f32x4
-	{
-		let scaling_real = f32x4::splat(value.re);
-		let scaling_imag = f32x4::splat(value.im);
-		let parallel = scaling_real * self;
-		// There should be a shufps operation which shuffles the vector self
-		let shuffled = f32x4::new(self.extract(1), self.extract(0), self.extract(3), self.extract(2)); 
-		let cross = scaling_imag * shuffled;
-		parallel.addsub(cross)
-	}
-
-	fn mul_complex(self, value: f32x4) -> f32x4
-	{
-		let scaling_real = f32x4::new(value.extract(0), value.extract(0), value.extract(2), value.extract(2));
-		let scaling_imag = f32x4::new(value.extract(1), value.extract(1), value.extract(3), value.extract(3));
-		let parallel = scaling_real * self;
-		// There should be a shufps operation which shuffles the vector self
-		let shuffled = f32x4::new(self.extract(1), self.extract(0), self.extract(3), self.extract(2)); 
-		let cross = scaling_imag * shuffled;
-		parallel.addsub(cross)
-	}
-
-	fn div_complex(self, value: f32x4) -> f32x4
-	{
-		let scaling_imag = f32x4::new(self.extract(0), self.extract(0), self.extract(2), self.extract(2));
-		let scaling_real = f32x4::new(self.extract(1), self.extract(1), self.extract(3), self.extract(3));
-		let parallel = scaling_real * value;
-		// There should be a shufps operation which shuffles the vector self
-		let shuffled = f32x4::new(value.extract(1), value.extract(0), value.extract(3), value.extract(2)); 
-		let cross = scaling_imag * shuffled;
-		let mul = parallel.addsub(cross);
-		let square = shuffled * shuffled;
-		let square_shuffled = f32x4::new(square.extract(1), square.extract(0), square.extract(3), square.extract(2));
-		let sum = square + square_shuffled;
-		let div = mul / sum;
-		f32x4::new(div.extract(1), div.extract(0), div.extract(3), div.extract(2))
-	}
-
-	fn complex_abs_squared(self) -> f32x4
-	{
-		let squared = self * self;
-		squared.hadd(squared)
-	}
-
-	fn complex_abs(self) -> f32x4
-	{
-		let squared = self * self;
-		let squared_sum = squared.hadd(squared);
-		squared_sum.sqrt()
-	}
-
-    fn sqrt(self) -> f32x4 {
-        self.sqrt()
-    }
-
-    fn store(self, target: &mut [f32], index: usize)
-	{
-		self.store(target, index);
-	} 
-
-	fn store_half(self, target: &mut [f32], index: usize)
-	{
-		let mut temp = [0.0; 4];
-		self.store(&mut temp, 0);
-		target[index] = temp[0];
-		target[index + 1] = temp[1];
-	}
-
-    fn sum_real(&self) -> f32 {
-        self.extract(0) +
-        self.extract(1) +
-        self.extract(2) +
-        self.extract(3)
-    }
-
-    fn sum_complex(&self) -> Complex<f32> {
-        Complex::<f32>::new(self.extract(0) + self.extract(2), self.extract(1) + self.extract(3))
-    }
-}
-/*
 impl Simd<f32> for f32x8
 {
     fn len() -> usize {
@@ -290,7 +143,7 @@ impl Simd<f32> for f32x8
         Complex::<f32>::new(self.extract(0) + self.extract(2) + self.extract(4) + self.extract(6),
                             self.extract(1) + self.extract(3) + self.extract(6) + self.extract(7))
     }
-}*/
+}
 
 impl Simd<f64> for f64x4
 {