From 432e33c1f281b47af5c7d342bdf59081fc188b0f Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Sun, 20 Mar 2016 15:31:59 +0300 Subject: [PATCH 1/3] refactor/layer: remove redundant .enumerate() calls [SKIP_CHANGELOG] --- src/layer.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/layer.rs b/src/layer.rs index d45e304a..feee31b3 100644 --- a/src/layer.rs +++ b/src/layer.rs @@ -756,15 +756,15 @@ pub trait ILayer : ComputeOutput + ComputeInputGradient>]) { // aquire all the locks let inp: Vec<_> = input_data.iter().map(|b| b.read().unwrap()).collect(); - let input_data_: Vec<&SharedTensor> = inp.iter().enumerate().map(|(_, val)| &**val).collect(); + let input_data_: Vec<&SharedTensor> = inp.iter().map(|val| &**val).collect(); let wgts: Vec<_> = weights_data.iter().map(|w| w.read().unwrap()).collect(); - let weights_data_: Vec<&SharedTensor> = wgts.iter().enumerate().map(|(_, val)| &**val).collect(); + let weights_data_: Vec<&SharedTensor> = wgts.iter().map(|val| &**val).collect(); let out_ref = output_data.iter().cloned().collect::>(); let mut out = &mut out_ref.iter().map(|b| b.write().unwrap()).collect::>(); let mut output_w = &mut out.iter_mut().map(|a| a).collect::>(); - let mut output_data_: Vec<&mut SharedTensor> = output_w.iter_mut().enumerate().map(|(_, val)| &mut ***val).collect(); + let mut output_data_: Vec<&mut SharedTensor> = output_w.iter_mut().map(|val| &mut ***val).collect(); self.compute_output(backend, &weights_data_, &input_data_, &mut output_data_); } @@ -785,17 +785,17 @@ pub trait ILayer : ComputeOutput + ComputeInputGradient>], input_gradients: &mut [ArcLock>]) { let wgts_data: Vec<_> = weights_data.iter().map(|b| b.read().unwrap()).collect(); - let weights_data_: Vec<&SharedTensor> = wgts_data.iter().enumerate().map(|(_, val)| &**val).collect(); + let weights_data_: Vec<&SharedTensor> = wgts_data.iter().map(|val| &**val).collect(); let out_data: Vec<_> = output_data.iter().map(|b| b.read().unwrap()).collect(); - let output_data_: Vec<&SharedTensor> = out_data.iter().enumerate().map(|(_, val)| &**val).collect(); + let output_data_: Vec<&SharedTensor> = out_data.iter().map(|val| &**val).collect(); let out_gradient: Vec<_> = output_gradients.iter().map(|b| b.read().unwrap()).collect(); - let output_gradients_: Vec<&SharedTensor> = out_gradient.iter().enumerate().map(|(_, val)| &**val).collect(); + let output_gradients_: Vec<&SharedTensor> = out_gradient.iter().map(|val| &**val).collect(); let inp_data: Vec<_> = input_data.iter().map(|b| b.read().unwrap()).collect(); - let input_data_: Vec<&SharedTensor> = inp_data.iter().enumerate().map(|(_, val)| &**val).collect(); + let input_data_: Vec<&SharedTensor> = inp_data.iter().map(|val| &**val).collect(); let btm_gradient_ref = input_gradients.iter().cloned().collect::>(); let mut btm_gradient = &mut btm_gradient_ref.iter().map(|b| b.write().unwrap()).collect::>(); let mut input_gradient = &mut btm_gradient.iter_mut().map(|a| a).collect::>(); - let mut input_gradients_: Vec<&mut SharedTensor> = input_gradient.iter_mut().enumerate().map(|(_, val)| &mut ***val).collect(); + let mut input_gradients_: Vec<&mut SharedTensor> = input_gradient.iter_mut().map(|val| &mut ***val).collect(); self.compute_input_gradient(backend, &weights_data_, &output_data_, &output_gradients_, &input_data_, &mut input_gradients_); } @@ -815,15 +815,15 @@ pub trait ILayer : ComputeOutput + ComputeInputGradient>], weights_gradients: &mut [ArcLock>]) { let out_data: Vec<_> = output_data.iter().map(|b| b.read().unwrap()).collect(); - let output_data_: Vec<&SharedTensor> = out_data.iter().enumerate().map(|(_, val)| &**val).collect(); + let output_data_: Vec<&SharedTensor> = out_data.iter().map(|val| &**val).collect(); let out_gradients: Vec<_> = output_gradients.iter().map(|b| b.read().unwrap()).collect(); - let output_gradients_: Vec<&SharedTensor> = out_gradients.iter().enumerate().map(|(_, val)| &**val).collect(); + let output_gradients_: Vec<&SharedTensor> = out_gradients.iter().map(|val| &**val).collect(); let inp_data: Vec<_> = input_data.iter().map(|b| b.read().unwrap()).collect(); - let input_data_: Vec<&SharedTensor> = inp_data.iter().enumerate().map(|(_, val)| &**val).collect(); + let input_data_: Vec<&SharedTensor> = inp_data.iter().map(|val| &**val).collect(); let wgt_gradient_ref = weights_gradients.iter().cloned().collect::>(); let mut wgt_gradient = &mut wgt_gradient_ref.iter().map(|b| b.write().unwrap()).collect::>(); let mut weights_gradient = &mut wgt_gradient.iter_mut().map(|a| a).collect::>(); - let mut weights_gradients_: Vec<&mut SharedTensor> = weights_gradient.iter_mut().enumerate().map(|(_, val)| &mut ***val).collect(); + let mut weights_gradients_: Vec<&mut SharedTensor> = weights_gradient.iter_mut().map(|val| &mut ***val).collect(); self.compute_parameters_gradient(backend, &output_data_, &output_gradients_, &input_data_, &mut weights_gradients_); } From 4a21001466da7e5766b124db1a9565dcaf70f383 Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Sun, 20 Mar 2016 17:29:00 +0300 Subject: [PATCH 2/3] fix/benches: fix `cargo bench` compilation [SKIP_CHANGELOG] Looks like benchmarks are superseded by examples/benchmarks.rs and should be removed altogether, but while they are here they should at least compile cleanly. Well, now benches compile but panic on tensor dimension mismatch. --- benches/network_benches.rs | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/benches/network_benches.rs b/benches/network_benches.rs index 0ce8236b..e27253d7 100644 --- a/benches/network_benches.rs +++ b/benches/network_benches.rs @@ -14,14 +14,8 @@ mod cuda { use std::sync::{Arc, RwLock}; use leaf::layers::*; use leaf::layer::*; - use leaf::network::*; use std::rc::Rc; - #[cfg(feature = "native")] - fn native_backend() -> Rc> { - Rc::new(Backend::::default().unwrap()) - } - #[cfg(feature = "cuda")] fn cuda_backend() -> Rc> { Rc::new(Backend::::default().unwrap()) @@ -76,7 +70,7 @@ mod cuda { #[ignore] #[cfg(feature = "cuda")] fn bench_mnsit_forward_1(b: &mut Bencher) { - let mut cfg = NetworkConfig::default(); + let mut cfg = SequentialConfig::default(); // set up input cfg.add_input("in", &vec![1, 30, 30]); cfg.add_input("label", &vec![1, 1, 10]); @@ -98,18 +92,14 @@ mod cuda { // cfg.add_layer(loss_cfg); let backend = cuda_backend(); - let native_backend = native_backend(); - let mut network = Network::from_config(backend.clone(), &cfg); - let loss = &mut 0f32; + let mut network = Layer::from_config( + backend.clone(), &LayerConfig::new("network", LayerType::Sequential(cfg))); let _ = timeit_loops!(10, { let inp = SharedTensor::::new(backend.device(), &vec![1, 30, 30]).unwrap(); - let label = SharedTensor::::new(native_backend.device(), &vec![1, 1, 10]).unwrap(); - let inp_lock = Arc::new(RwLock::new(inp)); - let label_lock = Arc::new(RwLock::new(label)); - network.forward(&[inp_lock, label_lock], loss); + network.forward(&[inp_lock]); }); // b.iter(|| { // for _ in 0..1 { @@ -128,7 +118,7 @@ mod cuda { // #[ignore] #[cfg(feature = "cuda")] fn alexnet_forward(b: &mut Bencher) { - let mut cfg = NetworkConfig::default(); + let mut cfg = SequentialConfig::default(); // Layer: data cfg.add_input("data", &vec![128, 3, 224, 224]); // Layer: conv1 @@ -265,15 +255,15 @@ mod cuda { let backend = cuda_backend(); // let native_backend = native_backend(); - let mut network = Network::from_config(backend.clone(), &cfg); + let mut network = Layer::from_config( + backend.clone(), &LayerConfig::new("network", LayerType::Sequential(cfg))); let func = || { let forward_time = timeit_loops!(1, { - let loss = &mut 0f32; let inp = SharedTensor::::new(backend.device(), &vec![128, 3, 112, 112]).unwrap(); let inp_lock = Arc::new(RwLock::new(inp)); - network.forward(&[inp_lock], loss); + network.forward(&[inp_lock]); }); println!("Forward step: {}", forward_time); }; @@ -285,7 +275,7 @@ mod cuda { #[cfg(feature = "cuda")] fn small_alexnet_forward(b: &mut Bencher) { // let _ = env_logger::init(); - let mut cfg = NetworkConfig::default(); + let mut cfg = SequentialConfig::default(); // Layer: data cfg.add_input("data", &vec![128, 3, 112, 112]); // Layer: conv1 @@ -422,14 +412,14 @@ mod cuda { let backend = cuda_backend(); // let native_backend = native_backend(); - let mut network = Network::from_config(backend.clone(), &cfg); + let mut network = Layer::from_config( + backend.clone(), &LayerConfig::new("network", LayerType::Sequential(cfg))); let mut func = || { - let loss = &mut 0f32; let inp = SharedTensor::::new(backend.device(), &vec![128, 3, 112, 112]).unwrap(); let inp_lock = Arc::new(RwLock::new(inp)); - network.forward(&[inp_lock], loss); + network.forward(&[inp_lock]); }; { func(); bench_profile(b, func, 10); } } From b1e3879409baaa0e5c00b291357836b7e720eace Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Sun, 20 Mar 2016 20:27:07 +0300 Subject: [PATCH 3/3] refactor/cleanup: replace some &vec![_] with &[_], fix redundant clone() [SKIP_CHANGELOG] --- Cargo.toml | 2 +- benches/network_benches.rs | 14 +++++++------- examples/benchmarks.rs | 12 ++++++------ src/layer.rs | 14 +++++++------- src/layers/common/convolution.rs | 6 +++--- src/layers/common/sequential.rs | 2 +- src/util.rs | 2 +- tests/layer_specs.rs | 8 ++++---- 8 files changed, 30 insertions(+), 30 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fc0530c0..a8a12f3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ keywords = ["deep-learning", "neural-networks", "machine-learning", "framework"] license = "MIT OR Apache-2.0" [dependencies] -collenchyma = { version = "0.0.8", default-features = false, features = ["native"] } # native feature to read/write data into tensors +collenchyma = { version = "0.0.9", default-features = false, features = ["native"] } # native feature to read/write data into tensors collenchyma-blas = { version = "0.2.0", default-features = false, features = ["native"] } # only compiles with native feature collenchyma-nn = { version = "0.3.2", default-features = false } diff --git a/benches/network_benches.rs b/benches/network_benches.rs index e27253d7..50fc39e3 100644 --- a/benches/network_benches.rs +++ b/benches/network_benches.rs @@ -72,8 +72,8 @@ mod cuda { fn bench_mnsit_forward_1(b: &mut Bencher) { let mut cfg = SequentialConfig::default(); // set up input - cfg.add_input("in", &vec![1, 30, 30]); - cfg.add_input("label", &vec![1, 1, 10]); + cfg.add_input("in", &[1, 30, 30]); + cfg.add_input("label", &[1, 1, 10]); // set up sigmoid let mut sig_cfg = LayerConfig::new("sig", LayerType::Sigmoid); sig_cfg.add_input("in"); @@ -96,7 +96,7 @@ mod cuda { backend.clone(), &LayerConfig::new("network", LayerType::Sequential(cfg))); let _ = timeit_loops!(10, { - let inp = SharedTensor::::new(backend.device(), &vec![1, 30, 30]).unwrap(); + let inp = SharedTensor::::new(backend.device(), &[1, 30, 30]).unwrap(); let inp_lock = Arc::new(RwLock::new(inp)); network.forward(&[inp_lock]); @@ -120,7 +120,7 @@ mod cuda { fn alexnet_forward(b: &mut Bencher) { let mut cfg = SequentialConfig::default(); // Layer: data - cfg.add_input("data", &vec![128, 3, 224, 224]); + cfg.add_input("data", &[128, 3, 224, 224]); // Layer: conv1 let conv1_layer_cfg = ConvolutionConfig { num_output: 64, @@ -260,7 +260,7 @@ mod cuda { let func = || { let forward_time = timeit_loops!(1, { - let inp = SharedTensor::::new(backend.device(), &vec![128, 3, 112, 112]).unwrap(); + let inp = SharedTensor::::new(backend.device(), &[128, 3, 112, 112]).unwrap(); let inp_lock = Arc::new(RwLock::new(inp)); network.forward(&[inp_lock]); @@ -277,7 +277,7 @@ mod cuda { // let _ = env_logger::init(); let mut cfg = SequentialConfig::default(); // Layer: data - cfg.add_input("data", &vec![128, 3, 112, 112]); + cfg.add_input("data", &[128, 3, 112, 112]); // Layer: conv1 let conv1_layer_cfg = ConvolutionConfig { num_output: 32, @@ -416,7 +416,7 @@ mod cuda { backend.clone(), &LayerConfig::new("network", LayerType::Sequential(cfg))); let mut func = || { - let inp = SharedTensor::::new(backend.device(), &vec![128, 3, 112, 112]).unwrap(); + let inp = SharedTensor::::new(backend.device(), &[128, 3, 112, 112]).unwrap(); let inp_lock = Arc::new(RwLock::new(inp)); network.forward(&[inp_lock]); diff --git a/examples/benchmarks.rs b/examples/benchmarks.rs index 5075a63f..d8eb076b 100644 --- a/examples/benchmarks.rs +++ b/examples/benchmarks.rs @@ -120,7 +120,7 @@ fn bench_alexnet() { #[cfg(all(feature="cuda", not(feature="native")))] fn bench_alexnet() { let mut cfg = SequentialConfig::default(); - cfg.add_input("data", &vec![128, 3, 224, 224]); + cfg.add_input("data", &[128, 3, 224, 224]); let conv1_layer_cfg = ConvolutionConfig { num_output: 64, filter_shape: vec![11], padding: vec![2], stride: vec![4] }; cfg.add_layer(LayerConfig::new("conv1", conv1_layer_cfg)); @@ -160,7 +160,7 @@ fn bench_alexnet() { let func = || { let forward_time = timeit_loops!(1, { { - let inp = SharedTensor::::new(backend.device(), &vec![128, 3, 224, 224]).unwrap(); + let inp = SharedTensor::::new(backend.device(), &[128, 3, 224, 224]).unwrap(); let inp_lock = Arc::new(RwLock::new(inp)); network.forward(&[inp_lock.clone()]); @@ -202,7 +202,7 @@ fn bench_overfeat() { #[cfg(all(feature="cuda", not(feature="native")))] fn bench_overfeat() { let mut cfg = SequentialConfig::default(); - cfg.add_input("data", &vec![128, 3, 231, 231]); + cfg.add_input("data", &[128, 3, 231, 231]); let conv1_layer_cfg = ConvolutionConfig { num_output: 96, filter_shape: vec![11], padding: vec![0], stride: vec![4] }; cfg.add_layer(LayerConfig::new("conv1", conv1_layer_cfg)); @@ -242,7 +242,7 @@ fn bench_overfeat() { let func = || { let forward_time = timeit_loops!(1, { { - let inp = SharedTensor::::new(backend.device(), &vec![128, 3, 231, 231]).unwrap(); + let inp = SharedTensor::::new(backend.device(), &[128, 3, 231, 231]).unwrap(); let inp_lock = Arc::new(RwLock::new(inp)); network.forward(&[inp_lock.clone()]); @@ -284,7 +284,7 @@ fn bench_vgg_a() { #[cfg(all(feature="cuda", not(feature="native")))] fn bench_vgg_a() { let mut cfg = SequentialConfig::default(); - cfg.add_input("data", &vec![64, 3, 224, 224]); + cfg.add_input("data", &[64, 3, 224, 224]); let conv1_layer_cfg = ConvolutionConfig { num_output: 64, filter_shape: vec![3], padding: vec![1], stride: vec![1] }; cfg.add_layer(LayerConfig::new("conv1", conv1_layer_cfg)); @@ -339,7 +339,7 @@ fn bench_vgg_a() { let func = || { let forward_time = timeit_loops!(1, { { - let inp = SharedTensor::::new(backend.device(), &vec![64, 3, 224, 224]).unwrap(); + let inp = SharedTensor::::new(backend.device(), &[64, 3, 224, 224]).unwrap(); let inp_lock = Arc::new(RwLock::new(inp)); network.forward(&[inp_lock.clone()]); diff --git a/src/layer.rs b/src/layer.rs index feee31b3..38a2bacd 100644 --- a/src/layer.rs +++ b/src/layer.rs @@ -203,8 +203,8 @@ impl Layer { } let backend: Rc> = self.backend.clone(); - blob_data = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &vec![1,1,1]).unwrap())); // [1,1,1] for CUDA - blob_gradient = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &vec![1,1,1]).unwrap())); // [1,1,1] for CUDA + blob_data = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &[1,1,1]).unwrap())); // [1,1,1] for CUDA + blob_gradient = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &[1,1,1]).unwrap())); // [1,1,1] for CUDA } self.output_blob_names.push(blob_name.clone()); self.output_blobs_data.push(blob_data.clone()); @@ -227,8 +227,8 @@ impl Layer { info!("{} -> {}", self.name, blob_name); let backend: Rc> = self.backend.clone(); - let output_data = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &vec![1,1,1]).unwrap())); // [1,1,1] for CUDA - let output_gradient = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &vec![1,1,1]).unwrap())); // [1,1,1] for CUDA + let output_data = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &[1,1,1]).unwrap())); // [1,1,1] for CUDA + let output_gradient = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &[1,1,1]).unwrap())); // [1,1,1] for CUDA self.output_blobs_data.push(output_data); self.output_blobs_gradient.push(output_gradient); } @@ -460,7 +460,7 @@ impl Layer { let forward_time = timeit_loops!(1, { if self.is_using_in_place() { - self.worker.forward(&self.backend, &vec![], &self.weights_data, &mut self.output_blobs_data); + self.worker.forward(&self.backend, &[], &self.weights_data, &mut self.output_blobs_data); } else { self.worker.forward(&self.backend, &self.input_blobs_data, &self.weights_data, &mut self.output_blobs_data); } @@ -498,8 +498,8 @@ impl Layer { if self.is_using_in_place() { self.worker.backward_input(&self.backend, &self.weights_data, - &vec![], - &vec![], + &[], + &[], &self.input_blobs_data, &mut self.input_blobs_gradient) } else { diff --git a/src/layers/common/convolution.rs b/src/layers/common/convolution.rs index d7caac0e..e6c18583 100644 --- a/src/layers/common/convolution.rs +++ b/src/layers/common/convolution.rs @@ -252,12 +252,12 @@ mod tests { stride: vec![4], }; let layer = Convolution::>::from_config(&cfg); - let num_spatial_dims = layer.num_spatial_dims(&vec![1, 3, 224, 224]); + let num_spatial_dims = layer.num_spatial_dims(&[1, 3, 224, 224]); assert_eq!(2, num_spatial_dims); assert_eq!(vec![11, 11], layer.spatial_filter_dims(2)); assert_eq!(vec![2, 2], layer.padding_dims(2)); assert_eq!(vec![4, 4], layer.stride_dims(2)); - assert_eq!(vec![64, 3, 11, 11], layer.calculate_filter_shape(&vec![1, 3, 224, 224])); - assert_eq!(vec![1, 64, 55, 55], layer.calculate_output_shape(&vec![1, 3, 224, 224])); + assert_eq!(vec![64, 3, 11, 11], layer.calculate_filter_shape(&[1, 3, 224, 224])); + assert_eq!(vec![1, 64, 55, 55], layer.calculate_output_shape(&[1, 3, 224, 224])); } } diff --git a/src/layers/common/sequential.rs b/src/layers/common/sequential.rs index b10d4120..782c5f8a 100644 --- a/src/layers/common/sequential.rs +++ b/src/layers/common/sequential.rs @@ -43,7 +43,7 @@ impl + 'static> Sequential { pub fn from_config(backend: Rc, config: &SequentialConfig) -> Sequential { let mut layer = Self::empty(); - layer.init_layers(backend, &config.clone()); + layer.init_layers(backend, config); layer } diff --git a/src/util.rs b/src/util.rs index b87f8225..1dca48e0 100644 --- a/src/util.rs +++ b/src/util.rs @@ -59,7 +59,7 @@ pub fn write_batch_sample(tensor: &mut SharedT /// Create a Collenchyma SharedTensor for a scalar value. pub fn native_scalar(scalar: T) -> SharedTensor { let native = native_backend(); - let mut shared_scalar = SharedTensor::::new(native.device(), &vec![1]).unwrap(); + let mut shared_scalar = SharedTensor::::new(native.device(), &1).unwrap(); write_to_memory(shared_scalar.get_mut(native.device()).unwrap(), &[scalar]); shared_scalar diff --git a/tests/layer_specs.rs b/tests/layer_specs.rs index dba2be6f..0aa28f5e 100644 --- a/tests/layer_specs.rs +++ b/tests/layer_specs.rs @@ -60,7 +60,7 @@ mod layer_spec { #[test] fn can_create_single_layer_sequential_layer() { let mut model = SequentialConfig::default(); - model.add_input("data", &vec![28, 28]); + model.add_input("data", &[28, 28]); model.add_layer(LayerConfig::new("sigmoid", LayerType::Sigmoid)); Layer::from_config(cuda_backend(), &LayerConfig::new("model", LayerType::Sequential(model))); @@ -69,7 +69,7 @@ mod layer_spec { #[test] fn can_create_simple_network_sequential_layer() { let mut model = SequentialConfig::default(); - model.add_input("data", &vec![1, 784]); + model.add_input("data", &[1, 784]); model.add_layer(LayerConfig::new("linear1", LinearConfig { output_size: 1568 })); model.add_layer(LayerConfig::new("sigmoid", LayerType::Sigmoid)); model.add_layer(LayerConfig::new("linear2", LinearConfig { output_size: 10 })); @@ -83,12 +83,12 @@ mod layer_spec { let cuda_backend = cuda_backend(); let mut normal_model = SequentialConfig::default(); - normal_model.add_input("data", &vec![3]); + normal_model.add_input("data", &[3]); normal_model.add_layer(LayerConfig::new("sigmoid", LayerType::Sigmoid)); let mut normal_network = Layer::from_config(cuda_backend.clone(), &LayerConfig::new("normal_model", LayerType::Sequential(normal_model))); let mut reshape_model = SequentialConfig::default(); - reshape_model.add_input("data", &vec![3]); + reshape_model.add_input("data", &[3]); reshape_model.add_layer(LayerConfig::new("reshape", ReshapeConfig { shape: vec![1, 1, 3] })); reshape_model.add_layer(LayerConfig::new("sigmoid", LayerType::Sigmoid)); let mut reshape_network = Layer::from_config(cuda_backend.clone(), &LayerConfig::new("reshape_model", LayerType::Sequential(reshape_model)));