Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

impg query/partition: Add min_distance_between_ranges parameter for transitive queries #42

Merged
merged 1 commit into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 41 additions & 6 deletions src/impg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ impl Impg {
masked_regions: Option<&FxHashMap<u32, SortedRanges>>,
max_depth: u16,
min_interval_size: u32,
min_distance_between_ranges: u32,
) -> Vec<AdjustedInterval> {
let mut results = Vec::new();
// Add the input range to the results
Expand Down Expand Up @@ -440,14 +441,48 @@ impl Impg {
// Only add non-overlapping portions to the stack for further exploration
if metadata.query_id != current_target_id {
let ranges = visited_ranges.entry(metadata.query_id)
.or_default(); // Note the closure here
.or_default();

let mut should_add = true;

if min_distance_between_ranges > 0 {
let (new_min, new_max) = if adjusted_query_start <= adjusted_query_end {
(adjusted_query_start, adjusted_query_end)
} else {
(adjusted_query_end, adjusted_query_start)
};

// Find insertion point in sorted ranges
let idx = match ranges.ranges.binary_search_by_key(&new_min, |&(start, _)| start) {
Ok(i) => i,
Err(i) => i,
};

// Only need to check adjacent ranges due to sorting
if idx > 0 {
// Check previous range
let (_, prev_end) = ranges.ranges[idx - 1];
if ((new_min - prev_end).abs() as u32) < min_distance_between_ranges {
should_add = false;
}
}
if should_add && idx < ranges.ranges.len() {
// Check next range
let (next_start, _) = ranges.ranges[idx];
if ((next_start - new_max).abs() as u32) < min_distance_between_ranges {
should_add = false;
}
}
}

let new_ranges = ranges.insert((adjusted_query_start, adjusted_query_end));
if should_add {
let new_ranges = ranges.insert((adjusted_query_start, adjusted_query_end));

// Add non-overlapping portions to stack
for (new_start, new_end) in new_ranges {
if (new_end - new_start).abs() as u32 >= min_interval_size {
stack.push((metadata.query_id, new_start, new_end, current_depth + 1));
// Add non-overlapping portions to stack
for (new_start, new_end) in new_ranges {
if ((new_end - new_start).abs() as u32) >= min_interval_size {
stack.push((metadata.query_id, new_start, new_end, current_depth + 1));
}
}
}
}
Expand Down
23 changes: 17 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ enum Args {
/// Minimum size of intervals to consider for transitive queries
#[clap(long, value_parser, default_value_t = 0)]
min_interval_size: u32,

/// Minimum distance between transitive ranges to consider on the same sequence
#[clap(long, value_parser, default_value_t = 0)]
min_distance_between_ranges: u32,
},
/// Query overlaps in the alignment.
Query {
Expand All @@ -88,6 +92,10 @@ enum Args {
/// Minimum size of intervals to consider for transitive queries
#[clap(long, value_parser, default_value_t = 0)]
min_interval_size: u32,

/// Minimum distance between transitive ranges to consider on the same sequence
#[clap(long, value_parser, default_value_t = 0)]
min_distance_between_ranges: u32,

/// Output results in PAF format.
#[clap(short = 'P', long, action)]
Expand Down Expand Up @@ -116,9 +124,10 @@ fn main() -> io::Result<()> {
merge_distance,
max_depth,
min_interval_size,
min_distance_between_ranges,
} => {
let impg = initialize_impg(&common)?;
partition_alignments(&impg, window_size, &sequence_prefix, min_length, merge_distance, max_depth, min_interval_size, common.verbose > 1)?;
partition_alignments(&impg, window_size, &sequence_prefix, min_length, merge_distance, max_depth, min_interval_size, min_distance_between_ranges, common.verbose > 1)?;
},
Args::Query {
common,
Expand All @@ -128,13 +137,14 @@ fn main() -> io::Result<()> {
output_paf,
check_intervals,
max_depth,
min_interval_size
min_interval_size,
min_distance_between_ranges
} => {
let impg = initialize_impg(&common)?;

if let Some(target_range) = target_range {
let (target_name, target_range) = parse_target_range(&target_range)?;
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth, min_interval_size);
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth, min_interval_size, min_distance_between_ranges);
if check_intervals {
let invalid_cigars = impg::impg::check_intervals(&impg, &results);
if !invalid_cigars.is_empty() {
Expand All @@ -155,7 +165,7 @@ fn main() -> io::Result<()> {
let targets = parse_bed_file(&target_bed)?;
info!("Parsed {} target ranges from BED file", targets.len());
for (target_name, target_range, name) in targets {
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth, min_interval_size);
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth, min_interval_size, min_distance_between_ranges);
if check_intervals {
let invalid_cigars = impg::impg::check_intervals(&impg, &results);
if !invalid_cigars.is_empty() {
Expand Down Expand Up @@ -316,7 +326,8 @@ fn perform_query(
target_range: (i32, i32),
transitive: bool,
max_depth: u16,
min_interval_size: u32
min_interval_size: u32,
min_distance_between_ranges: u32
) -> Vec<AdjustedInterval> {
let (target_start, target_end) = target_range;
let target_id = impg.seq_index.get_id(target_name).expect("Target name not found in index");
Expand All @@ -325,7 +336,7 @@ fn perform_query(
panic!("Target range end ({}) exceeds the target sequence length ({})", target_end, target_length);
}
if transitive {
impg.query_transitive(target_id, target_start, target_end, None, max_depth, min_interval_size)
impg.query_transitive(target_id, target_start, target_end, None, max_depth, min_interval_size, min_distance_between_ranges)
} else {
impg.query(target_id, target_start, target_end)
}
Expand Down
3 changes: 2 additions & 1 deletion src/partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub fn partition_alignments(
merge_distance: usize,
max_depth: u16,
min_interval_size: u32,
min_distance_between_ranges: u32,
debug: bool,
) -> io::Result<()> {
// Get all sequences with the given prefix
Expand Down Expand Up @@ -115,7 +116,7 @@ pub fn partition_alignments(

// Query overlaps for current window
//let query_start = Instant::now();
let mut overlaps = impg.query_transitive(*seq_id, *start, *end, Some(&masked_regions), max_depth, min_interval_size);
let mut overlaps = impg.query_transitive(*seq_id, *start, *end, Some(&masked_regions), max_depth, min_interval_size, min_distance_between_ranges);
//let query_time = query_start.elapsed();
debug!(" Collected {} query overlaps", overlaps.len());

Expand Down
Loading