Fix dedup output order for unmapped pairs to the same file

open2c · Apr 9, 2024 · 6000a2d · 6000a2d
1 parent 0c1a0c7
commit 6000a2d
Showing 1 changed file with 12 additions and 12 deletions.
diff --git a/pairtools/lib/dedup.py b/pairtools/lib/dedup.py
@@ -89,9 +89,19 @@ def streaming_dedup(
         # Clean up dataframe:
         df_chunk = df_chunk.drop(columns=["duplicate"])
 
-        # Stream the pairs:
-        # If outstream_dups is the same as outstream, we save all mapped pairs to the same file
+        # Save the pairs:
 
+        # Stream unmapped:
+        if outstream_unmapped:
+            df_chunk.loc[~mask_mapped, :].to_csv(
+                outstream_unmapped,
+                index=False,
+                header=False,
+                sep="\t",
+                quoting=QUOTE_NONE,
+            )
+
+        # If outstream_dups is the same as outstream, we save the mapped pairs to the same file
         if outstream_dups == outstream:
             df_chunk.loc[mask_mapped, :].to_csv(
                 outstream, index=False, header=False, sep="\t", quoting=QUOTE_NONE
@@ -116,16 +126,6 @@ def streaming_dedup(
                     outstream, index=False, header=False, sep="\t", quoting=QUOTE_NONE
                 )
 
-        # Stream unmapped:
-        if outstream_unmapped:
-            df_chunk.loc[~mask_mapped, :].to_csv(
-                outstream_unmapped,
-                index=False,
-                header=False,
-                sep="\t",
-                quoting=QUOTE_NONE,
-            )
-
     t1 = time.time()
     t = t1 - t0
     logger.debug(f"total time: {t}")