Skip to content

Commit

Permalink
Deactivating AVX for now. For more information see discussion on huon…
Browse files Browse the repository at this point in the history
  • Loading branch information
liebharc committed Jan 3, 2016
1 parent 51cf9f5 commit bdaf5a9
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 151 deletions.
4 changes: 3 additions & 1 deletion rustc.bat
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
@echo off
rem Using a hard coded rust path right now
rem In future check the path variable for a rust path
"C:\Program Files\Rust nightly 1.7\bin\rustc.exe" -C target-cpu=native -C target-feature=+sse2,+sse3,+avx2,+avx %*
rem f32x8/avx causes a crash right now. See comment on https://github.com/huonw/simd/pull/18
rem "C:\Program Files\Rust nightly 1.7\bin\rustc.exe" -C target-cpu=native -C target-feature=+sse2,+sse3,+avx2,+avx %*
"C:\Program Files\Rust nightly 1.7\bin\rustc.exe" -C target-cpu=native -C target-feature=+sse2,+sse3 %*
4 changes: 3 additions & 1 deletion rustc.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env bash
# Runs rustc and enables SIMD CPU features
rustc -C target-cpu=native -C target-feature=+sse2,+sse3,+avx2,+avx $@
# f32x8/avx causes a crash right now. See comment on https://github.com/huonw/simd/pull/18
#rustc -C target-cpu=native -C target-feature=+sse2,+sse3,+avx2,+avx $@
rustc -C target-cpu=native -C target-feature=+sse2,+sse3 $@
151 changes: 2 additions & 149 deletions src/simd_extensions/avx.rs
Original file line number Diff line number Diff line change
@@ -1,160 +1,13 @@
use num::complex::Complex;
use super::Simd;
use simd::f32x4;
use simd::x86::sse3::Sse3F32x4;
use simd::x86::avx::{f32x8,f64x4,AvxF32x8,AvxF64x4};
use std::mem;

//pub type Reg32 = f32x8;
// f32x8/avx causes a crash right now. See comment on https://github.com/huonw/simd/pull/18
pub type Reg32 = f32x4;
pub type Reg32 = f32x8;

pub type Reg64 = f64x4;

impl Simd<f32> for f32x4
{
fn array_to_regs(array: &[f32]) -> &[Self] {
unsafe {
let len = array.len();
let reg_len = Self::len();
if len % reg_len != 0 {
panic!("Argument must be dividable by {}", reg_len);
}
let trans: &[Self] = mem::transmute(array);
&trans[0 .. len / reg_len]
}
}

fn array_to_regs_mut(array: &mut [f32]) -> &mut [Self] {
unsafe {
let len = array.len();
let reg_len = Self::len();
if len % reg_len != 0 {
panic!("Argument must be dividable by {}", reg_len);
}
let trans: &mut [Self] = mem::transmute(array);
&mut trans[0 .. len / reg_len]
}
}

fn len() -> usize {
4
}

fn load(array: &[f32], idx: usize) -> f32x4 {
f32x4::load(array, idx)
}

fn load_wrap(array: &[f32], idx: usize) -> f32x4 {
let mut temp = [0.0; 4];
for i in 0..temp.len() {
temp[i] = array[(idx + i) % array.len()];
}
f32x4::load(&temp, 0)
}

fn from_complex(value: Complex<f32>) -> f32x4 {
f32x4::new(value.re, value.im, value.re, value.im)
}

fn add_real(self, value: f32) -> f32x4
{
let increment = f32x4::splat(value);
self + increment
}

fn add_complex(self, value: Complex<f32>) -> f32x4
{
let increment = f32x4::new(value.re, value.im, value.re, value.im);
self + increment
}

fn scale_real(self, value: f32) -> f32x4
{
let scale_vector = f32x4::splat(value);
self * scale_vector
}

fn scale_complex(self, value: Complex<f32>) -> f32x4
{
let scaling_real = f32x4::splat(value.re);
let scaling_imag = f32x4::splat(value.im);
let parallel = scaling_real * self;
// There should be a shufps operation which shuffles the vector self
let shuffled = f32x4::new(self.extract(1), self.extract(0), self.extract(3), self.extract(2));
let cross = scaling_imag * shuffled;
parallel.addsub(cross)
}

fn mul_complex(self, value: f32x4) -> f32x4
{
let scaling_real = f32x4::new(value.extract(0), value.extract(0), value.extract(2), value.extract(2));
let scaling_imag = f32x4::new(value.extract(1), value.extract(1), value.extract(3), value.extract(3));
let parallel = scaling_real * self;
// There should be a shufps operation which shuffles the vector self
let shuffled = f32x4::new(self.extract(1), self.extract(0), self.extract(3), self.extract(2));
let cross = scaling_imag * shuffled;
parallel.addsub(cross)
}

fn div_complex(self, value: f32x4) -> f32x4
{
let scaling_imag = f32x4::new(self.extract(0), self.extract(0), self.extract(2), self.extract(2));
let scaling_real = f32x4::new(self.extract(1), self.extract(1), self.extract(3), self.extract(3));
let parallel = scaling_real * value;
// There should be a shufps operation which shuffles the vector self
let shuffled = f32x4::new(value.extract(1), value.extract(0), value.extract(3), value.extract(2));
let cross = scaling_imag * shuffled;
let mul = parallel.addsub(cross);
let square = shuffled * shuffled;
let square_shuffled = f32x4::new(square.extract(1), square.extract(0), square.extract(3), square.extract(2));
let sum = square + square_shuffled;
let div = mul / sum;
f32x4::new(div.extract(1), div.extract(0), div.extract(3), div.extract(2))
}

fn complex_abs_squared(self) -> f32x4
{
let squared = self * self;
squared.hadd(squared)
}

fn complex_abs(self) -> f32x4
{
let squared = self * self;
let squared_sum = squared.hadd(squared);
squared_sum.sqrt()
}

fn sqrt(self) -> f32x4 {
self.sqrt()
}

fn store(self, target: &mut [f32], index: usize)
{
self.store(target, index);
}

fn store_half(self, target: &mut [f32], index: usize)
{
let mut temp = [0.0; 4];
self.store(&mut temp, 0);
target[index] = temp[0];
target[index + 1] = temp[1];
}

fn sum_real(&self) -> f32 {
self.extract(0) +
self.extract(1) +
self.extract(2) +
self.extract(3)
}

fn sum_complex(&self) -> Complex<f32> {
Complex::<f32>::new(self.extract(0) + self.extract(2), self.extract(1) + self.extract(3))
}
}
/*
impl Simd<f32> for f32x8
{
fn len() -> usize {
Expand Down Expand Up @@ -290,7 +143,7 @@ impl Simd<f32> for f32x8
Complex::<f32>::new(self.extract(0) + self.extract(2) + self.extract(4) + self.extract(6),
self.extract(1) + self.extract(3) + self.extract(6) + self.extract(7))
}
}*/
}

impl Simd<f64> for f64x4
{
Expand Down

0 comments on commit bdaf5a9

Please sign in to comment.