From 7a1a8991d455d8eb8767f09b3ea80356ddf42ca3 Mon Sep 17 00:00:00 2001
From: Florian Zwagemaker <f.zwagemaker@gmail.com>
Date: Mon, 7 Aug 2023 16:38:40 +0200
Subject: [PATCH] feat: primer removal process now works with
 reference-specific primer coordinates

---
 AmpliGone/cut_reads.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/AmpliGone/cut_reads.py b/AmpliGone/cut_reads.py
index 518a064..664f722 100644
--- a/AmpliGone/cut_reads.py
+++ b/AmpliGone/cut_reads.py
@@ -70,16 +70,21 @@ def CutReads(
     workers,
 ):
     Frame, _threadnumber = data
-    RVSet = set()
-    FWSet = set()
-    for _, start, end, strand in primer_df[["start", "end", "strand"]].itertuples():
+    RVDict = {}
+    FWDict = {}
+    
+    reference_ids = set(primer_df["ref"].unique())
+    for refid in reference_ids:
+        RVDict[refid] = set()
+        FWDict[refid] = set()
+
+    for _, refid, start, end, strand in primer_df[["ref", "start", "end", "strand"]].itertuples():
+
         for coord in range(start + 1, end):  # +1 because reference is 1-based
             if strand == "+":
-                FWSet.add(coord)
+                FWDict[refid].add(coord) 
             elif strand == "-":
-                RVSet.add(coord)
-    FWList = tuple(FWSet)  # Since tuples are hashable
-    RVList = tuple(RVSet)
+                RVDict[refid].add(coord)
 
     Aln = mp.Aligner(
         reference,
@@ -127,6 +132,11 @@ def CutReads(
 
                 previous_seq = seq
 
+                # Fetch the primer coordinates that correspond to the reference that the read maps to
+                # we're using tuples here because they are hashable
+                FWTuple = tuple(FWDict[hit.ctg])
+                RVTuple = tuple(RVDict[hit.ctg])
+
                 qstart = hit.q_st
                 qend = hit.q_en
 
@@ -139,7 +149,7 @@ def CutReads(
                         seq,
                         qual,
                         PositionNeedsCutting=PositionInOrBeforePrimer,
-                        primer_list=FWList,
+                        primer_list=FWTuple,
                         position_on_reference=hit.r_st,
                         cut_direction=1,
                         read_direction=hit.strand,
@@ -159,7 +169,7 @@ def CutReads(
                         seq,
                         qual,
                         PositionNeedsCutting=PositionInOrAfterPrimer,
-                        primer_list=RVList,
+                        primer_list=RVTuple,
                         position_on_reference=hit.r_en,
                         cut_direction=-1,
                         read_direction=hit.strand,