Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cr_overlap_any to test if there are any overlaps #10

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion cgranges.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <stdio.h>
#include <stdbool.h>
#include <assert.h>
#include "cgranges.h"
#include "khash.h"
Expand Down Expand Up @@ -265,7 +266,7 @@ int64_t cr_overlap_int(const cgranges_t *cr, int32_t ctg_id, int32_t st, int32_t
r = &cr->r[c->off];
p = &stack[t++];
p->k = c->root_k, p->x = (1LL<<p->k) - 1, p->w = 0; // push the root into the stack
while (t) { // stack is not empyt
while (t) { // stack is not empty
istack_t z = stack[--t];
if (z.k <= 3) { // the subtree is no larger than (1<<(z.k+1))-1; do a linear scan
int64_t i, i0 = z.x >> z.k << z.k, i1 = i0 + (1LL<<(z.k+1)) - 1;
Expand Down Expand Up @@ -296,6 +297,47 @@ int64_t cr_overlap_int(const cgranges_t *cr, int32_t ctg_id, int32_t st, int32_t
return n;
}

bool cr_overlap_any_bool(const cgranges_t *cr, int32_t ctg_id, int32_t st, int32_t en)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is duplicated from cr_overlap_int. I could add an option to cr_overlap_int to just return 1 when the first overlapping interval is found, so that we have less code duplication.

{
int32_t t = 0;
const cr_ctg_t *c;
const cr_intv_t *r;
int64_t n = 0;
istack_t stack[64], *p;

if (ctg_id < 0 || ctg_id >= cr->n_ctg) return false;
c = &cr->ctg[ctg_id];
r = &cr->r[c->off];
p = &stack[t++];
p->k = c->root_k, p->x = (1LL<<p->k) - 1, p->w = 0; // push the root into the stack
while (t) { // stack is not empty
istack_t z = stack[--t];
if (z.k <= 3) { // the subtree is no larger than (1<<(z.k+1))-1; do a linear scan
int64_t i, i0 = z.x >> z.k << z.k, i1 = i0 + (1LL<<(z.k+1)) - 1;
if (i1 >= c->n) i1 = c->n;
for (i = i0; i < i1 && cr_st(&r[i]) < en; ++i)
if (st < cr_en(&r[i])) {
return true;
}
} else if (z.w == 0) { // if left child not processed
int64_t y = z.x - (1LL<<(z.k-1));
p = &stack[t++];
p->k = z.k, p->x = z.x, p->w = 1;
if (y >= c->n || r[y].y > st) {
p = &stack[t++];
p->k = z.k - 1, p->x = y, p->w = 0; // push the left child to the stack
}
} else if (z.x < c->n && cr_st(&r[z.x]) < en) {
if (st < cr_en(&r[z.x])) { // then z.x overlaps the query; write to the output array
return true;
}
p = &stack[t++];
p->k = z.k - 1, p->x = z.x + (1LL<<(z.k-1)), p->w = 0; // push the right child
}
}
return false;
}

int64_t cr_contain_int(const cgranges_t *cr, int32_t ctg_id, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_)
{
int64_t n = 0, i, s, e, *b = *b_, m_b = *m_b_;
Expand Down Expand Up @@ -324,6 +366,11 @@ int64_t cr_overlap(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en
return cr_overlap_int(cr, cr_get_ctg(cr, ctg), st, en, b_, m_b_);
}

bool cr_overlap_any(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en)
{
return cr_overlap_any_bool(cr, cr_get_ctg(cr, ctg), st, en);
}

int64_t cr_contain(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_)
{
return cr_contain_int(cr, cr_get_ctg(cr, ctg), st, en, b_, m_b_);
Expand Down
2 changes: 2 additions & 0 deletions cgranges.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#define CRANGES_H

#include <stdint.h>
#include <stdbool.h>

typedef struct { // a contig
char *name; // name of the contig
Expand Down Expand Up @@ -72,6 +73,7 @@ cr_intv_t *cr_add(cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int32
void cr_index(cgranges_t *cr);

int64_t cr_overlap(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_);
bool cr_overlap_any(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en);
int64_t cr_contain(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_);

// Add a contig and length. Call this for desired contig ordering. _len_ can be 0.
Expand Down
6 changes: 6 additions & 0 deletions python/cgranges.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ cdef extern from "cgranges.h":
cr_intv_t *cr_add(cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int32_t label_int)
void cr_index(cgranges_t *cr)
int64_t cr_overlap(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_)
bint cr_overlap_any(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en)
int32_t cr_start(const cgranges_t *cr, int64_t i)
int32_t cr_end(const cgranges_t *cr, int64_t i)
int32_t cr_label(const cgranges_t *cr, int64_t i)
Expand Down Expand Up @@ -47,6 +48,11 @@ cdef class cgranges:
for i in range(n):
yield cr_start(self.cr, b[i]), cr_end(self.cr, b[i]), cr_label(self.cr, b[i])
free(b)

def overlap_any(self, ctg, st, en):
cdef int64_t n
if not self.indexed: return None
return cr_overlap_any(self.cr, str.encode(ctg), st, en)

def coverage(self, ctg, st, en):
cdef int64_t *b = NULL
Expand Down