diff --git a/pairtools/lib/dedup.py b/pairtools/lib/dedup.py index 47f57ff..4ad0b2c 100644 --- a/pairtools/lib/dedup.py +++ b/pairtools/lib/dedup.py @@ -89,9 +89,19 @@ def streaming_dedup( # Clean up dataframe: df_chunk = df_chunk.drop(columns=["duplicate"]) - # Stream the pairs: - # If outstream_dups is the same as outstream, we save all mapped pairs to the same file + # Save the pairs: + # Stream unmapped: + if outstream_unmapped: + df_chunk.loc[~mask_mapped, :].to_csv( + outstream_unmapped, + index=False, + header=False, + sep="\t", + quoting=QUOTE_NONE, + ) + + # If outstream_dups is the same as outstream, we save the mapped pairs to the same file if outstream_dups == outstream: df_chunk.loc[mask_mapped, :].to_csv( outstream, index=False, header=False, sep="\t", quoting=QUOTE_NONE @@ -116,16 +126,6 @@ def streaming_dedup( outstream, index=False, header=False, sep="\t", quoting=QUOTE_NONE ) - # Stream unmapped: - if outstream_unmapped: - df_chunk.loc[~mask_mapped, :].to_csv( - outstream_unmapped, - index=False, - header=False, - sep="\t", - quoting=QUOTE_NONE, - ) - t1 = time.time() t = t1 - t0 logger.debug(f"total time: {t}")