Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Proof of Concept] Initial support for parallelization using Rayon #74

Merged
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 28 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,22 @@ categories = ["science"]
readme = "README.md"
documentation = "https://axect.github.io/Peroxide_Doc"
keywords = ["Numeric", "Science", "Dataframe", "Plot", "LinearAlgebra"]
exclude = ["example_data/", "src/bin/", "benches/", "example/", "test_data/", "peroxide-ad2"]
exclude = [
"example_data/",
"src/bin/",
# "benches/",
"example/",
"test_data/",
"peroxide-ad2",
]

[badges]
travis-ci = { repository = "axect/peroxide" }
maintenance = { status = "actively-developed" }

[dev-dependencies]
float-cmp = "0.9"
criterion = { version = "0.5.1", features = ["html_reports"] }

[dependencies]
csv = { version = "1.3", optional = true, default-features = false }
Expand All @@ -32,17 +40,27 @@ anyhow = "1.0"
paste = "1.0"
#num-complex = "0.3"
netcdf = { version = "0.7", optional = true, default-features = false }
pyo3 = { version = "0.22", optional = true, features = ["auto-initialize", "gil-refs"] }
pyo3 = { version = "0.22", optional = true, features = [
"auto-initialize",
"gil-refs",
] }
blas = { version = "0.22", optional = true }
lapack = { version = "0.19", optional = true }
serde = { version = "1.0", features = ["derive"], optional = true }
json = { version = "0.12", optional = true }
arrow2 = { version = "0.18", features = ["io_parquet", "io_parquet_compression"], optional = true }
arrow2 = { version = "0.18", features = [
"io_parquet",
"io_parquet_compression",
], optional = true }
num-complex = { version = "0.4", optional = true }
lambert_w = { version = "0.3.0", default-features = false, features = ["24bits", "50bits"] }
lambert_w = { version = "0.3.0", default-features = false, features = [
"24bits",
"50bits",
] }
rayon = "1.10"

[package.metadata.docs.rs]
rustdoc-args = [ "--html-in-header", "katex-header.html", "--cfg", "docsrs"]
rustdoc-args = ["--html-in-header", "katex-header.html", "--cfg", "docsrs"]

[features]
default = []
Expand All @@ -51,3 +69,8 @@ plot = ["pyo3"]
nc = ["netcdf"]
parquet = ["arrow2"]
complex = ["num-complex", "matrixmultiply/cgemm"]

[[bench]]
path = "benches/parallel_rayon/matrix_benchmark.rs"
name = "matrix_benchmark"
harness = false
194 changes: 194 additions & 0 deletions benches/data/rayon_matrix_benchmark_results.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
Lib used for benchmarking: Criterion
Matrix size: 1000x1000

Running benches/parallel_rayon/matrix_benchmark.rs

ser_matrix_bench time: [535.12 µs 544.51 µs 556.68 µs]
Found 11 outliers among 100 measurements (11.00%)
4 (4.00%) high mild
7 (7.00%) high severe

par_matrix_bench time: [5.0912 ms 5.1431 ms 5.1995 ms]
Found 7 outliers among 100 measurements (7.00%)
1 (1.00%) low mild
5 (5.00%) high mild
1 (1.00%) high severe

ser_py_matrix_bench time: [4.3100 ms 4.3309 ms 4.3544 ms]
Found 7 outliers among 100 measurements (7.00%)
2 (2.00%) high mild
5 (5.00%) high severe

par_py_matrix_bench time: [11.667 ms 11.789 ms 11.920 ms]
Found 10 outliers among 100 measurements (10.00%)
6 (6.00%) high mild
4 (4.00%) high severe

ser_matrix_change_shape_bench
time: [7.3630 ms 7.4075 ms 7.4608 ms]
Found 5 outliers among 100 measurements (5.00%)
1 (1.00%) high mild
4 (4.00%) high severe

par_matrix_change_shape_bench
time: [10.276 ms 10.385 ms 10.499 ms]
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe

ser_matrix_extract_row_bench
time: [613.39 µs 622.44 µs 633.72 µs]
Found 7 outliers among 100 measurements (7.00%)
7 (7.00%) high severe

par_matrix_extract_row_bench
time: [5.4321 ms 5.4851 ms 5.5399 ms]
Found 4 outliers among 100 measurements (4.00%)
4 (4.00%) high mild

ser_matrix_from_index_bench
time: [2.4174 ms 2.4490 ms 2.4851 ms]
Found 14 outliers among 100 measurements (14.00%)
1 (1.00%) high mild
13 (13.00%) high severe

par_matrix_from_index_bench
time: [2.3912 ms 2.4090 ms 2.4304 ms]
Found 9 outliers among 100 measurements (9.00%)
2 (2.00%) high mild
7 (7.00%) high severe

ser_matrix_to_vec_bench time: [2.4800 ms 2.5082 ms 2.5423 ms]
Found 10 outliers among 100 measurements (10.00%)
4 (4.00%) high mild
6 (6.00%) high severe

par_matrix_to_vec_bench time: [6.4041 ms 6.4618 ms 6.5250 ms]
Found 6 outliers among 100 measurements (6.00%)
5 (5.00%) high mild
1 (1.00%) high severe

ser_matrix_to_diag_bench
time: [2.4335 ms 2.4526 ms 2.4750 ms]
Found 14 outliers among 100 measurements (14.00%)
6 (6.00%) high mild
8 (8.00%) high severe

par_matrix_to_diag_bench
time: [13.514 ms 13.684 ms 13.868 ms]
Found 10 outliers among 100 measurements (10.00%)
7 (7.00%) high mild
3 (3.00%) high severe

Benchmarking ser_matrix_submat_bench: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.3s, enable flat sampling, or reduce sample count to 50.
ser_matrix_submat_bench time: [1.6077 ms 1.6243 ms 1.6451 ms]
Found 16 outliers among 100 measurements (16.00%)
3 (3.00%) high mild
13 (13.00%) high severe

par_matrix_submat_bench time: [10.611 ms 10.761 ms 10.942 ms]
Found 5 outliers among 100 measurements (5.00%)
3 (3.00%) high mild
2 (2.00%) high severe

ser_matrix_add_vec_bench
time: [7.3077 ms 7.3485 ms 7.3946 ms]
Found 12 outliers among 100 measurements (12.00%)
2 (2.00%) high mild
10 (10.00%) high severe

par_matrix_add_vec_bench
time: [11.331 ms 11.480 ms 11.636 ms]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high mild

ser_matrix_norm_bench time: [5.1600 ms 5.1864 ms 5.2165 ms]
Found 7 outliers among 100 measurements (7.00%)
1 (1.00%) high mild
6 (6.00%) high severe

par_matrix_norm_bench time: [2.6565 ms 2.6810 ms 2.7091 ms]
Found 5 outliers among 100 measurements (5.00%)
2 (2.00%) high mild
3 (3.00%) high severe

Benchmarking ser_matrix_norm_bench #2: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.9s, enable flat sampling, or reduce sample count to 50.
ser_matrix_norm_bench #2
time: [1.7262 ms 1.7391 ms 1.7541 ms]
Found 15 outliers among 100 measurements (15.00%)
10 (10.00%) high mild
5 (5.00%) high severe

par_matrix_norm_bench #2
time: [6.7071 ms 6.7883 ms 6.8703 ms]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild

ser_matrix_norm_bench #3
time: [9.7582 ms 9.9006 ms 10.057 ms]
Found 12 outliers among 100 measurements (12.00%)
5 (5.00%) high mild
7 (7.00%) high severe

par_matrix_norm_bench #3
time: [9.3004 ms 9.4088 ms 9.5239 ms]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild

ser_matrix_inner_prod_bench
time: [5.2730 ms 5.3590 ms 5.4583 ms]
Found 14 outliers among 100 measurements (14.00%)
3 (3.00%) high mild
11 (11.00%) high severe

par_matrix_inner_prod_bench
time: [5.0987 ms 5.1644 ms 5.2402 ms]
Found 7 outliers among 100 measurements (7.00%)
3 (3.00%) high mild
4 (4.00%) high severe

ser_matrix_hadamard_bench
time: [5.6521 ms 5.6870 ms 5.7262 ms]
Found 12 outliers among 100 measurements (12.00%)
3 (3.00%) high mild
9 (9.00%) high severe

par_matrix_hadamard_bench
time: [14.155 ms 14.335 ms 14.527 ms]
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe

ser_matrix_take_row_bench
time: [3.7894 ms 3.8234 ms 3.8613 ms]
Found 15 outliers among 100 measurements (15.00%)
7 (7.00%) high mild
8 (8.00%) high severe

par_matrix_take_row_bench
time: [8.4008 ms 8.5171 ms 8.6523 ms]
Found 9 outliers among 100 measurements (9.00%)
6 (6.00%) high mild
3 (3.00%) high severe

ser_matrix_fpmap_bench time: [3.2526 ms 3.2739 ms 3.2977 ms]
Found 12 outliers among 100 measurements (12.00%)
2 (2.00%) high mild
10 (10.00%) high severe

par_matrix_fpmap_bench time: [10.604 ms 10.765 ms 10.937 ms]
Found 11 outliers among 100 measurements (11.00%)
8 (8.00%) high mild
3 (3.00%) high severe

ser_matrix_reduce_bench time: [2.6748 ms 2.6964 ms 2.7201 ms]
Found 9 outliers among 100 measurements (9.00%)
6 (6.00%) high mild
3 (3.00%) high severe

par_matrix_reduce_bench time: [6.2453 ms 6.3198 ms 6.4034 ms]
Found 6 outliers among 100 measurements (6.00%)
4 (4.00%) high mild
2 (2.00%) high severe
121 changes: 121 additions & 0 deletions benches/parallel_rayon/matrix_benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use peroxide::{
fuga::*,
traits::math::{ParallelInnerProduct, ParallelNormed},
};

pub fn par_matrix_benchmark(cr: &mut Criterion) {
let v: Vec<f64> = (0..1000000)
.into_iter()
.map(|i: i32| 2.0 * (i as f64))
.collect::<Vec<f64>>();

// Result: 1000x1000 matrix: 630.92 µs
cr.bench_function("ser_matrix_bench", |b| {
b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row)))
});

// Result: 1000x1000 matrix: 9.6995 ms
cr.bench_function("par_matrix_bench", |b| {
b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row)))
});
}

pub fn par_matrix_from_index_benchmark(cr: &mut Criterion) {
let f = |x: usize, y: usize| 2.0 * (x as f64) * (y as f64);
let size: (usize, usize) = (1000, 1000);

// Result: 1000x1000 matrix: 2.3662 ms
cr.bench_function("ser_matrix_from_index_bench", |b| {
b.iter(|| black_box(Matrix::from_index(f, size)))
});

// Result: 1000x1000 matrix: 2.3355 ms
cr.bench_function("par_matrix_from_index_bench", |b| {
b.iter(|| black_box(Matrix::from_index(f, size)))
});
}

// Check: better parallel results (ran test 6 times)
pub fn par_matrix_norm_lpq_benchmark(cr: &mut Criterion) {
let v: Vec<f64> = (0..1000000)
.into_iter()
.map(|i: i32| 2.0 * (i as f64))
.collect::<Vec<f64>>();

// Result: 1000x1000 matrix: [5.5969 ms 5.7555 ms 5.9515 ms 6.0843 ms 6.3072 ms 6.5636 ms]
cr.bench_function("ser_matrix_norm_bench", |b| {
b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::Lpq(4.0, 2.0))))
});

// Result: 1000x1000 matrix: [3.1796 ms 3.2714 ms 3.3714 ms 3.6123 ms 3.7398 ms 3.8761 ms]
cr.bench_function("par_matrix_norm_bench", |b| {
b.iter(|| {
black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::Lpq(4.0, 2.0)))
})
});
}

pub fn par_matrix_norm_l1_benchmark(cr: &mut Criterion) {
let v: Vec<f64> = (0..1000000)
.into_iter()
.map(|i: i32| 2.0 * (i as f64))
.collect::<Vec<f64>>();

// Result: 1000x1000 matrix: 9.0287 ms
cr.bench_function("ser_matrix_norm_bench", |b| {
b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::L1)))
});

// Result: 1000x1000 matrix: 10.393 ms
cr.bench_function("par_matrix_norm_bench", |b| {
b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::L1)))
});
}

// Check: better parallel results (ran test 6 times)
pub fn par_matrix_inner_prod_benchmark(cr: &mut Criterion) {
let v: Vec<f64> = (0..1000000)
.into_iter()
.map(|i: i32| 2.0 * (i as f64))
.collect::<Vec<f64>>();

let w: Vec<f64> = (0..1000000)
.into_iter()
.map(|i: i32| 3.0 * (i as f64))
.collect::<Vec<f64>>();

// Result: 1000x1000 matrix: [5.1075 ms 5.1505 ms 5.2013 ms 5.7617 ms 6.0196 ms 6.3009 ms]
cr.bench_function("ser_matrix_inner_prod_bench", |b| {
b.iter(|| {
black_box(matrix(v.clone(), 1000, 1000, Shape::Row).dot(&matrix(
w.clone(),
1000,
1000,
Shape::Row,
)))
})
});

// Result: 1000x1000 matrix: [4.9931 ms 5.0244 ms 5.0642 ms 5.0322 ms 5.0819 ms 5.1404 ms]
cr.bench_function("par_matrix_inner_prod_bench", |b| {
b.iter(|| {
black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_dot(&matrix(
w.clone(),
1000,
1000,
Shape::Row,
)))
})
});
}

criterion_group!(
benches,
par_matrix_benchmark,
par_matrix_from_index_benchmark,
par_matrix_norm_lpq_benchmark,
par_matrix_norm_l1_benchmark,
par_matrix_inner_prod_benchmark,
);
criterion_main!(benches);
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,5 @@ pub mod util;

#[cfg(feature = "complex")]
pub mod complex;

extern crate rayon;
Loading