From b1727b569a05cb5f4454801a0bfa58663d88b2f4 Mon Sep 17 00:00:00 2001 From: Guillaume Endignoux Date: Fri, 20 Sep 2024 10:47:41 +0200 Subject: [PATCH] Add criterion-based benchmarks. --- Cargo.toml | 5 ++ benches/criterion.rs | 113 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 benches/criterion.rs diff --git a/Cargo.toml b/Cargo.toml index b48282f..2926088 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,9 +25,14 @@ log = { optional = true, version = "0.4" } nix = { version = "0.29", features = ["sched"] } [dev-dependencies] +criterion = "0.5.1" divan = "0.1.14" rayon = "1.10.0" +[[bench]] +name = "criterion" +harness = false + [[bench]] name = "divan" harness = false diff --git a/benches/criterion.rs b/benches/criterion.rs new file mode 100644 index 0000000..c18b7ec --- /dev/null +++ b/benches/criterion.rs @@ -0,0 +1,113 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use ::paralight::RangeStrategy; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use std::mem::size_of; + +const NUM_THREADS: &[usize] = &[1, 2, 4, 8]; +const LENGTHS: &[usize] = &[10_000, 100_000, 1_000_000, 10_000_000]; + +fn sum(c: &mut Criterion) { + let mut group = c.benchmark_group("sum"); + for len in LENGTHS { + group.throughput(Throughput::Bytes((len * size_of::()) as u64)); + group.bench_with_input(BenchmarkId::new("serial", len), len, serial::sum); + for &num_threads in NUM_THREADS { + group.bench_with_input( + BenchmarkId::new(&format!("rayon@{num_threads}"), len), + len, + |bencher, len| rayon::sum(bencher, num_threads, len), + ); + for (range_strategy, range_name) in [ + (RangeStrategy::Fixed, "fixed"), + (RangeStrategy::WorkStealing, "work-stealing"), + ] { + group.bench_with_input( + BenchmarkId::new(&format!("paralight_{range_name}@{num_threads}"), len), + len, + |bencher, len| paralight::sum(bencher, range_strategy, num_threads, len), + ); + } + } + } + group.finish(); +} + +/// Baseline benchmarks using serial iterators (without any multi-threading +/// involved). +mod serial { + use criterion::{black_box, Bencher}; + + pub fn sum(bencher: &mut Bencher, len: &usize) { + let input = (0..=*len as u64).collect::>(); + let input_slice = input.as_slice(); + bencher.iter(|| black_box(input_slice).iter().sum::()); + } +} + +/// Benchmarks using Rayon. +mod rayon { + use criterion::{black_box, Bencher}; + use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; + + pub fn sum(bencher: &mut Bencher, num_threads: usize, len: &usize) { + let input = (0..=*len as u64).collect::>(); + let input_slice = input.as_slice(); + let thread_pool = rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); + thread_pool.install(|| bencher.iter(|| black_box(input_slice).par_iter().sum::())); + } +} + +/// Benchmarks using Paralight. +mod paralight { + use criterion::Bencher; + use paralight::{RangeStrategy, ThreadAccumulator, ThreadPoolBuilder}; + use std::num::NonZeroUsize; + + pub fn sum( + bencher: &mut Bencher, + range_strategy: RangeStrategy, + num_threads: usize, + len: &usize, + ) { + let input = (0..=*len as u64).collect::>(); + let pool_builder = ThreadPoolBuilder { + num_threads: NonZeroUsize::try_from(num_threads).unwrap(), + range_strategy, + }; + pool_builder.scope( + &input, + || SumAccumulator, + |thread_pool| { + bencher.iter(|| thread_pool.process_inputs().reduce(|a, b| a + b).unwrap()); + }, + ); + } + + struct SumAccumulator; + + impl ThreadAccumulator for SumAccumulator { + type Accumulator<'a> = u64; + fn init(&self) -> u64 { + 0 + } + fn process_item(&self, accumulator: &mut u64, _index: usize, x: &u64) { + *accumulator += *x; + } + fn finalize(&self, accumulator: u64) -> u64 { + accumulator + } + } +} + +criterion_group!(benches, sum); +criterion_main!(benches);