Skip to content

Commit

Permalink
Some basic benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
jltsiren committed Sep 15, 2021
1 parent 4951925 commit 7b73d8e
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 3 deletions.
15 changes: 14 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,21 @@ license = "MIT"
readme = "README.md"
repository = "https://github.com/jltsiren/gbwt-rs"

[features]
bench = ["getopts", "libc", "rand"]

[dependencies]
simple-sds = { git = "https://github.com/jltsiren/simple-sds", branch = "main" }
getopts = { version = "0.2", optional = true }
libc = { version = "0.2", optional = true }
rand = { version = "0.8", optional = true }

[dev-dependencies]
rand = "0.7"
rand = "0.8"

[[bin]]
name = "benchmark"
required-features = ["bench"]
test = false
bench = false
doc = false
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ It is based on the [Simple-SDS](https://github.com/jltsiren/simple-sds) library.
- [x] Iteration over paths
- [x] Unidirectional search
- [x] Bidirectional search
- [ ] Metadata
- [x] Metadata
- [ ] Locate queries

### GBWTGraph / GBZ
Expand Down
165 changes: 165 additions & 0 deletions src/bin/benchmark/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// use
use gbwt::GBWT;
use simple_sds::serialize::Serialize;

use simple_sds::serialize;

use std::time::Instant;
use std::{env, process};

use getopts::Options;
use rand::Rng;

mod utils;

//-----------------------------------------------------------------------------

fn main() {
let config = Config::new();

if let Some(filename) = config.filename.as_ref() {
println!("Loading GBWT index {}", filename);
let index: GBWT = serialize::load_from(filename).unwrap();
let (size, units) = utils::readable_size(index.size_in_bytes());
println!("Index size: {:.3} {}", size, units);
if index.is_empty() {
eprintln!("Cannot perform benchmarks with an empty index");
process::exit(1);
}
println!("");

let queries = generate_queries(&index, &config);
unidirectional_search(&index, &queries);
}

utils::report_memory_usage();
}

//-----------------------------------------------------------------------------

pub struct Config {
pub filename: Option<String>,
pub queries: usize,
pub query_len: usize,
}

impl Config {
const QUERIES: usize = 1000000;
const QUERY_LEN: usize = 10;

pub fn new() -> Config {
let args: Vec<String> = env::args().collect();
let program = args[0].clone();

let mut opts = Options::new();
opts.optflag("h", "help", "print this help");
opts.optopt("n", "queries", "number of queries (default 1000000)", "INT");
opts.optopt("l", "query-len", "query length (default 10)", "INT");
let matches = match opts.parse(&args[1..]) {
Ok(m) => m,
Err(f) => {
eprintln!("{}", f.to_string());
process::exit(1);
}
};

let mut config = Config {
filename: None,
queries: Self::QUERIES,
query_len: Self::QUERY_LEN,
};
if matches.opt_present("h") {
let header = format!("Usage: {} [options] gbwt_file", program);
print!("{}", opts.usage(&header));
process::exit(0);
}
if let Some(s) = matches.opt_str("n") {
match s.parse::<usize>() {
Ok(n) => {
if n == 0 {
eprintln!("--queries: number of queries must be non-zero");
process::exit(1);
}
config.queries = n;
},
Err(f) => {
eprintln!("--queries: {}", f.to_string());
process::exit(1);
},
}
}
if let Some(s) = matches.opt_str("l") {
match s.parse::<usize>() {
Ok(n) => {
if n == 0 {
eprintln!("--query-len: query length must be non-zero");
process::exit(1);
}
config.query_len = n;
},
Err(f) => {
eprintln!("--query-len: {}", f.to_string());
process::exit(1);
},
}
}
if !matches.free.is_empty() {
config.filename = Some(matches.free[0].clone());
}

config
}
}

//-----------------------------------------------------------------------------

fn generate_queries(index: &GBWT, config: &Config) -> Vec<Vec<usize>> {
println!("Generating {} queries of length {}", config.queries, config.query_len);
let mut queries: Vec<Vec<usize>> = Vec::new();
let mut rng = rand::thread_rng();

while queries.len() < config.queries {
let mut query: Vec<usize> = Vec::new();
let mut node = rng.gen_range(index.first_node()..index.alphabet_size());
let mut offset;
if let Some(state) = index.find(node) {
offset = rng.gen_range(0..state.len());
} else {
continue;
}
query.push(node);
while query.len() < config.query_len {
if let Some(pos) = index.forward((node, offset)) {
node = pos.0;
offset = pos.1;
query.push(node);
} else {
break;
}
}
if query.len() == config.query_len {
queries.push(query);
}
}

println!("");
queries
}

fn unidirectional_search(index: &GBWT, queries: &[Vec<usize>]) {
println!("Running {} unidirectional queries", queries.len());
let now = Instant::now();
let mut total_len = 0;
let mut total_occs = 0;
for query in queries {
let mut state = index.find(query[0]).unwrap();
for i in 1..query.len() {
state = index.extend(&state, query[i]).unwrap();
}
total_len += query.len();
total_occs += state.len();
}
utils::report_results(queries.len(), total_len, total_occs, now.elapsed());
}

//-----------------------------------------------------------------------------
80 changes: 80 additions & 0 deletions src/bin/benchmark/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
use std::time::Duration;

//-----------------------------------------------------------------------------

pub fn readable_size(bytes: usize) -> (f64, &'static str) {
let units: Vec<(f64, &'static str)> = vec![
(1.0, "B"),
(1024.0, "KiB"),
(1024.0 * 1024.0, "MiB"),
(1024.0 * 1024.0 * 1024.0, "GiB"),
(1024.0 * 1024.0 * 1024.0 * 1024.0, "TiB"),
];

let value = bytes as f64;
let mut unit = 0;
for i in 1..units.len() {
if value >= units[i].0 {
unit = i;
} else {
break;
}
}

(value / units[unit].0, units[unit].1)
}

#[cfg(target_os = "linux")]
pub fn peak_memory_usage() -> Result<usize, &'static str> {
unsafe {
let mut rusage: libc::rusage = std::mem::zeroed();
let retval = libc::getrusage(libc::RUSAGE_SELF, &mut rusage as *mut _);
match retval {
0 => Ok(rusage.ru_maxrss as usize * 1024),
_ => Err("libc::getrusage call failed"),
}
}
}

#[cfg(target_os = "macos")]
pub fn peak_memory_usage() -> Result<usize, &'static str> {
unsafe {
let mut rusage: libc::rusage = std::mem::zeroed();
let retval = libc::getrusage(libc::RUSAGE_SELF, &mut rusage as *mut _);
match retval {
0 => Ok(rusage.ru_maxrss as usize),
_ => Err("libc::getrusage call failed"),
}
}
}

#[cfg(not(any(target_os = "linux", target_os = "macos")))]
pub fn peak_memory_usage() -> Result<usize, &'static str> {
Err("No peak_memory_usage implementation for this OS")
}

//-----------------------------------------------------------------------------

pub fn report_results(queries: usize, total_len: usize, total_occs: usize, duration: Duration) {
let us = (duration.as_micros() as f64) / (queries as f64);
let ns = (duration.as_nanos() as f64) / (total_len as f64);
let occs = (total_occs as f64) / (queries as f64);
println!("Time: {:.3} seconds ({:.3} us/query, {:.1} ns/node)", duration.as_secs_f64(), us, ns);
println!("Occurrences: {} total ({:.3} per query)", total_occs, occs);
println!("");
}

pub fn report_memory_usage() {
match peak_memory_usage() {
Ok(bytes) => {
let (size, unit) = readable_size(bytes);
println!("Peak memory usage: {:.3} {}", size, unit);
},
Err(f) => {
println!("{}", f);
},
}
println!("");
}

//-----------------------------------------------------------------------------
2 changes: 1 addition & 1 deletion src/support/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ fn generate_runs(n: usize, sigma: usize, w: usize) -> Vec<(usize, usize)> {
let mut result = Vec::with_capacity(n);
let mut rng = rand::thread_rng();
for _ in 0..n {
let c: usize = rng.gen_range(0, sigma);
let c: usize = rng.gen_range(0..sigma);
let len = generate_value(&mut rng, w) + 1;
result.push((c, len));
}
Expand Down

0 comments on commit 7b73d8e

Please sign in to comment.