Skip to content

Commit

Permalink
feat: primer removal process now works with reference-specific primer…
Browse files Browse the repository at this point in the history
… coordinates
  • Loading branch information
florianzwagemaker committed Aug 7, 2023
1 parent 49d0506 commit 7a1a899
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions AmpliGone/cut_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,21 @@ def CutReads(
workers,
):
Frame, _threadnumber = data
RVSet = set()
FWSet = set()
for _, start, end, strand in primer_df[["start", "end", "strand"]].itertuples():
RVDict = {}
FWDict = {}

reference_ids = set(primer_df["ref"].unique())
for refid in reference_ids:
RVDict[refid] = set()
FWDict[refid] = set()

for _, refid, start, end, strand in primer_df[["ref", "start", "end", "strand"]].itertuples():

for coord in range(start + 1, end): # +1 because reference is 1-based
if strand == "+":
FWSet.add(coord)
FWDict[refid].add(coord)
elif strand == "-":
RVSet.add(coord)
FWList = tuple(FWSet) # Since tuples are hashable
RVList = tuple(RVSet)
RVDict[refid].add(coord)

Aln = mp.Aligner(
reference,
Expand Down Expand Up @@ -127,6 +132,11 @@ def CutReads(

previous_seq = seq

# Fetch the primer coordinates that correspond to the reference that the read maps to
# we're using tuples here because they are hashable
FWTuple = tuple(FWDict[hit.ctg])
RVTuple = tuple(RVDict[hit.ctg])

qstart = hit.q_st
qend = hit.q_en

Expand All @@ -139,7 +149,7 @@ def CutReads(
seq,
qual,
PositionNeedsCutting=PositionInOrBeforePrimer,
primer_list=FWList,
primer_list=FWTuple,
position_on_reference=hit.r_st,
cut_direction=1,
read_direction=hit.strand,
Expand All @@ -159,7 +169,7 @@ def CutReads(
seq,
qual,
PositionNeedsCutting=PositionInOrAfterPrimer,
primer_list=RVList,
primer_list=RVTuple,
position_on_reference=hit.r_en,
cut_direction=-1,
read_direction=hit.strand,
Expand Down

0 comments on commit 7a1a899

Please sign in to comment.