From 94a77013440a22be6b3a14b91f9516425f4f86eb Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Sat, 9 Mar 2024 15:30:24 +0100
Subject: [PATCH 1/8] refactor parse with read_alignment_block()

---
 pairtools/lib/parse.py | 195 +++++++++++++++++++++++------------------
 1 file changed, 108 insertions(+), 87 deletions(-)

diff --git a/pairtools/lib/parse.py b/pairtools/lib/parse.py
index a144fa46..7c907df5 100644
--- a/pairtools/lib/parse.py
+++ b/pairtools/lib/parse.py
@@ -38,6 +38,35 @@
 from .parse_pysam import get_mismatches_c
 
 
+def group_alignments_by_side(sams):
+    return [sam for sam in sams if sam.is_read1], [sam for sam in sams if sam.is_read2]
+
+
+def read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True):
+    sams = []
+
+    prev_readID = None
+    while True:
+        sam_entry = next(instream, None)
+        readID = sam_entry.query_name if sam_entry else None
+
+        # Read is fully populated, then parse and write:
+        if not (sam_entry) or ((readID != prev_readID) and prev_readID):
+            if sort:
+                sams = sorted(sams, key=lambda a: (a.is_read2, a.query_alignment_start))
+            out = sams if not group_by_side else group_alignments_by_side(sams)
+            out = out if not return_readID else (prev_readID, out)
+            yield out
+            
+            sams.clear()
+            
+        if sam_entry is None:
+            break
+        else:
+            sams.append(sam_entry)
+            prev_readID = readID
+
+
 def streaming_classify(
     instream, outstream, chromosomes, out_alignments_stream, out_stat, **kwargs
 ):
@@ -105,102 +134,94 @@ def streaming_classify(
 
     ### Iterate over input pysam:
     instream = iter(instream)
-    while sam_entry is not None:
-        sam_entry = next(instream, None)
-
-        readID = sam_entry.query_name if sam_entry else None
-        if readID_transform is not None and readID is not None:
+    for (readID, (sams1, sams2)) in read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True):
+        if readID_transform is not None:
             readID = eval(readID_transform)
 
-        # Read is fully populated, then parse and write:
-        if not (sam_entry) or ((readID != prev_readID) and prev_readID):
+        ### Parse
+        if not parse2:  # regular parser:
+            pairstream, all_algns1, all_algns2 = parse_read(
+                sams1,
+                sams2,
+                min_mapq=kwargs["min_mapq"],
+                max_molecule_size=kwargs["max_molecule_size"],
+                max_inter_align_gap=kwargs["max_inter_align_gap"],
+                walks_policy=kwargs["walks_policy"],
+                sam_tags=sam_tags,
+                store_seq=store_seq,
+                report_mismatches=True if "mismatches" in add_columns else False,
+            )
+        else:  # parse2 parser:
+            pairstream, all_algns1, all_algns2 = parse2_read(
+                sams1,
+                sams2,
+                min_mapq=kwargs["min_mapq"],
+                max_inter_align_gap=kwargs["max_inter_align_gap"],
+                max_insert_size=kwargs.get("max_insert_size", 500),
+                single_end=kwargs["single_end"],
+                report_position=kwargs["report_position"],
+                report_orientation=kwargs["report_orientation"],
+                sam_tags=sam_tags,
+                dedup_max_mismatch=kwargs["dedup_max_mismatch"],
+                store_seq=store_seq,
+                expand=kwargs["expand"],
+                max_expansion_depth=kwargs["max_expansion_depth"],
+                report_mismatches=True if "mismatches" in add_columns else False,
+            )
 
-            ### Parse
-            if not parse2:  # regular parser:
-                pairstream, all_algns1, all_algns2 = parse_read(
-                    sams1,
-                    sams2,
-                    min_mapq=kwargs["min_mapq"],
-                    max_molecule_size=kwargs["max_molecule_size"],
-                    max_inter_align_gap=kwargs["max_inter_align_gap"],
-                    walks_policy=kwargs["walks_policy"],
-                    sam_tags=sam_tags,
-                    store_seq=store_seq,
-                    report_mismatches=True if "mismatches" in add_columns else False,
-                )
-            else:  # parse2 parser:
-                pairstream, all_algns1, all_algns2 = parse2_read(
-                    sams1,
-                    sams2,
-                    min_mapq=kwargs["min_mapq"],
-                    max_inter_align_gap=kwargs["max_inter_align_gap"],
-                    max_insert_size=kwargs.get("max_insert_size", 500),
-                    single_end=kwargs["single_end"],
-                    report_position=kwargs["report_position"],
-                    report_orientation=kwargs["report_orientation"],
-                    sam_tags=sam_tags,
-                    dedup_max_mismatch=kwargs["dedup_max_mismatch"],
-                    store_seq=store_seq,
-                    expand=kwargs["expand"],
-                    max_expansion_depth=kwargs["max_expansion_depth"],
-                    report_mismatches=True if "mismatches" in add_columns else False,
-                )
+        ### Write:
+        read_has_alignments = False
+        for (algn1, algn2, pair_index) in pairstream:
+            read_has_alignments = True
 
-            ### Write:
-            read_has_alignments = False
-            for (algn1, algn2, pair_index) in pairstream:
-                read_has_alignments = True
+            # Alignment end defaults to 5' if report_alignment_end is unspecified:
+            if kwargs.get("report_alignment_end", "5") == "5":
+                algn1["pos"] = algn1["pos5"]
+                algn2["pos"] = algn2["pos5"]
+            else:
+                algn1["pos"] = algn1["pos3"]
+                algn2["pos"] = algn2["pos3"]
+
+            if kwargs["flip"]:
+                flip_pair = not check_pair_order(algn1, algn2, chrom_enum)
+                if flip_pair:
+                    algn1, algn2 = algn2, algn1
+                    sams1, sams2 = sams2, sams1
+
+            write_pairsam(
+                algn1,
+                algn2,
+                readID=prev_readID,
+                pair_index=pair_index,
+                sams1=sams1,
+                sams2=sams2,
+                out_file=outstream,
+                drop_readid=kwargs["drop_readid"],
+                drop_seq=kwargs["drop_seq"],
+                drop_sam=kwargs["drop_sam"],
+                add_pair_index=kwargs["add_pair_index"],
+                add_columns=add_columns,
+            )
 
-                # Alignment end defaults to 5' if report_alignment_end is unspecified:
-                if kwargs.get("report_alignment_end", "5") == "5":
-                    algn1["pos"] = algn1["pos5"]
-                    algn2["pos"] = algn2["pos5"]
-                else:
-                    algn1["pos"] = algn1["pos3"]
-                    algn2["pos"] = algn2["pos3"]
-
-                if kwargs["flip"]:
-                    flip_pair = not check_pair_order(algn1, algn2, chrom_enum)
-                    if flip_pair:
-                        algn1, algn2 = algn2, algn1
-                        sams1, sams2 = sams2, sams1
-
-                write_pairsam(
-                    algn1,
-                    algn2,
-                    readID=prev_readID,
-                    pair_index=pair_index,
-                    sams1=sams1,
-                    sams2=sams2,
-                    out_file=outstream,
-                    drop_readid=kwargs["drop_readid"],
-                    drop_seq=kwargs["drop_seq"],
-                    drop_sam=kwargs["drop_sam"],
-                    add_pair_index=kwargs["add_pair_index"],
-                    add_columns=add_columns,
+            # add a pair to PairCounter for stats output:
+            if out_stat:
+                out_stat.add_pair(
+                    algn1["chrom"],
+                    int(algn1["pos"]),
+                    algn1["strand"],
+                    algn2["chrom"],
+                    int(algn2["pos"]),
+                    algn2["strand"],
+                    algn1["type"] + algn2["type"],
                 )
 
-                # add a pair to PairCounter for stats output:
-                if out_stat:
-                    out_stat.add_pair(
-                        algn1["chrom"],
-                        int(algn1["pos"]),
-                        algn1["strand"],
-                        algn2["chrom"],
-                        int(algn2["pos"]),
-                        algn2["strand"],
-                        algn1["type"] + algn2["type"],
-                    )
+        # write all alignments:
+        if out_alignments_stream and read_has_alignments:
+            write_all_algnments(
+                prev_readID, all_algns1, all_algns2, out_alignments_stream
+            )
 
-            # write all alignments:
-            if out_alignments_stream and read_has_alignments:
-                write_all_algnments(
-                    prev_readID, all_algns1, all_algns2, out_alignments_stream
-                )
 
-            # Empty read after writing:
-            sams1.clear()
-            sams2.clear()
 
         if sam_entry is not None:
             push_pysam(sam_entry, sams1, sams2)

From 2e444638f5de2aaf1feef17c48221fd660102ac8 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 25 Mar 2024 10:29:59 +0100
Subject: [PATCH 2/8] parse: deprecate lowering pair_type in walks

---
 pairtools/lib/parse.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pairtools/lib/parse.py b/pairtools/lib/parse.py
index 7c907df5..13640094 100644
--- a/pairtools/lib/parse.py
+++ b/pairtools/lib/parse.py
@@ -580,14 +580,14 @@ def parse_read(
                             hic_algn2 = algn
                             break
 
-                # lower-case reported walks on the chimeric side
+                # DEPRECATED: lower-case reported walks on the chimeric side
                 if walks_policy != "mask":
                     if is_chimeric_1:
                         hic_algn1 = dict(hic_algn1)
-                        hic_algn1["type"] = hic_algn1["type"].lower()
+                        # hic_algn1["type"] = hic_algn1["type"].lower()
                     if is_chimeric_2:
                         hic_algn2 = dict(hic_algn2)
-                        hic_algn2["type"] = hic_algn2["type"].lower()
+                        # hic_algn2["type"] = hic_algn2["type"].lower()
 
         else:
             raise ValueError(f"Walks policy {walks_policy} is not supported.")

From 414db7267d1cc12cbec05cd0540ea0f876a393d6 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 25 Mar 2024 10:34:38 +0100
Subject: [PATCH 3/8] parse: finish switching to read_alignment_block()

---
 pairtools/lib/parse.py | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/pairtools/lib/parse.py b/pairtools/lib/parse.py
index 13640094..99a90067 100644
--- a/pairtools/lib/parse.py
+++ b/pairtools/lib/parse.py
@@ -122,16 +122,6 @@ def streaming_classify(
     if readID_transform is not None:
         readID_transform = compile(readID_transform, "<string>", "eval")
 
-    ### Prepare for iterative parsing of the input stream
-    # Each read is represented by readID, sams1 (left alignments) and sams2 (right alignments)
-    readID = ""  # Read id of the current read
-    sams1 = []  # Placeholder for the left alignments
-    sams2 = []  # Placeholder for the right alignments
-    # Each read is comprised of multiple alignments, or sam entries:
-    sam_entry = ""  # Placeholder for each aligned segment
-    # Keep the id of the previous sam entry to detect when the read is completely populated:
-    prev_readID = ""  # Placeholder for the read id
-
     ### Iterate over input pysam:
     instream = iter(instream)
     for (readID, (sams1, sams2)) in read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True):
@@ -191,7 +181,7 @@ def streaming_classify(
             write_pairsam(
                 algn1,
                 algn2,
-                readID=prev_readID,
+                readID=readID,
                 pair_index=pair_index,
                 sams1=sams1,
                 sams2=sams2,
@@ -218,16 +208,10 @@ def streaming_classify(
         # write all alignments:
         if out_alignments_stream and read_has_alignments:
             write_all_algnments(
-                prev_readID, all_algns1, all_algns2, out_alignments_stream
+                readID, all_algns1, all_algns2, out_alignments_stream
             )
 
 
-
-        if sam_entry is not None:
-            push_pysam(sam_entry, sams1, sams2)
-            prev_readID = readID
-
-
 ############################
 ### Alignment utilities: ###
 ############################

From 5d08a415487a66617e5cb3232190547f029543d2 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 25 Mar 2024 11:01:45 +0100
Subject: [PATCH 4/8] parse: refactor alignment list sorting/tagging /
 insertion of empty algns

---
 pairtools/lib/parse.py | 121 ++++++++++++++++++++++-------------------
 1 file changed, 66 insertions(+), 55 deletions(-)

diff --git a/pairtools/lib/parse.py b/pairtools/lib/parse.py
index 99a90067..f6382c20 100644
--- a/pairtools/lib/parse.py
+++ b/pairtools/lib/parse.py
@@ -392,6 +392,66 @@ def flip_alignment(hic_algn):
     return hic_algn
 
 
+
+def _convert_gaps_into_alignments(sorted_algns, max_inter_align_gap):
+    """
+    Inplace conversion of gaps longer than max_inter_align_gap into alignments
+    """
+    if (len(sorted_algns) == 1) and (not sorted_algns[0]["is_mapped"]):
+        return
+
+    last_5_pos = 0
+    for i in range(len(sorted_algns)):
+        algn = sorted_algns[i]
+        if algn["dist_to_5"] - last_5_pos > max_inter_align_gap:
+            new_algn = empty_alignment()
+            new_algn["dist_to_5"] = last_5_pos
+            new_algn["algn_read_span"] = algn["dist_to_5"] - last_5_pos
+            new_algn["read_len"] = algn["read_len"]
+            new_algn["dist_to_3"] = new_algn["read_len"] - algn["dist_to_5"]
+
+            last_5_pos = algn["dist_to_5"] + algn["algn_read_span"]
+
+            sorted_algns.insert(i, new_algn)
+            i += 2
+        else:
+            last_5_pos = max(last_5_pos, algn["dist_to_5"] + algn["algn_read_span"])
+            i += 1
+
+
+def normalize_alignment_list(algns, side, sort_by="dist_to_5", max_inter_align_gap=None):
+    """
+    Normalize the alignment list: insert empty alignments in gaps between alignments, 
+    sort by distance to the 5' end, add read side, alignment index.
+
+    Args:
+        algns (list): The list of alignments.
+        side (str): The side of the alignment.
+        sort_by (str, optional): The key to sort the alignments by. Defaults to "dist_to_5".
+        max_inter_align_gap (int, optional): The maximum allowed gap between alignments. Defaults to None.
+
+    Returns:
+        list: The normalized alignment list.
+
+    """
+    if len(algns) == 0:
+        algns = [empty_alignment()]
+
+    if max_inter_align_gap is not None:
+        _convert_gaps_into_alignments(algns, max_inter_align_gap)
+
+    if sort_by:
+        algns = sorted(algns, key=lambda algn: algn[sort_by])
+    
+
+    for i, algn in enumerate(algns):
+        algn["read_side"] = side
+        algn["align_idx"] = i
+        algn["tot_align_count"] = len(algns)
+
+    return algns
+            
+
 def flip_orientation(hic_algn):
     """
     Flip orientation of a single alignment
@@ -475,18 +535,9 @@ def parse_read(
         for sam in sams2
     ]
 
-    if len(algns1) > 0:
-        algns1 = sorted(algns1, key=lambda algn: algn["dist_to_5"])
-    else:
-        algns1 = [empty_alignment()]  # Empty alignment dummy
-    if len(algns2) > 0:
-        algns2 = sorted(algns2, key=lambda algn: algn["dist_to_5"])
-    else:
-        algns2 = [empty_alignment()]  # Empty alignment dummy
+    algns1 = normalize_alignment_list(algns1, 1, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap)
+    algns2 = normalize_alignment_list(algns2, 2, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap)
 
-    if max_inter_align_gap is not None:
-        _convert_gaps_into_alignments(algns1, max_inter_align_gap)
-        _convert_gaps_into_alignments(algns2, max_inter_align_gap)
 
     # By default, assume each molecule is a single pair with single unconfirmed pair:
     hic_algn1 = algns1[0]
@@ -619,10 +670,8 @@ def parse2_read(
             )
             for sam in sams2  # note sams2, that's how these reads are typically parsed
         ]
-        algns1 = sorted(algns1, key=lambda algn: algn["dist_to_5"])
-        if max_inter_align_gap is not None:
-            _convert_gaps_into_alignments(algns1, max_inter_align_gap)
-
+        
+        algns1 = normalize_alignment_list(algns1, 1, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap)
         algns2 = [empty_alignment()]  # Empty alignment dummy
 
         if len(algns1) > 1:
@@ -678,20 +727,8 @@ def parse2_read(
             for sam in sams2
         ]
 
-        # Sort alignments by the distance to the 5'-end:
-        if len(algns1) > 0:
-            algns1 = sorted(algns1, key=lambda algn: algn["dist_to_5"])
-        else:
-            algns1 = [empty_alignment()]  # Empty alignment dummy
-        if len(algns2) > 0:
-            algns2 = sorted(algns2, key=lambda algn: algn["dist_to_5"])
-        else:
-            algns2 = [empty_alignment()]  # Empty alignment dummy
-
-        # Convert alignment gaps to alignments:
-        if max_inter_align_gap is not None:
-            _convert_gaps_into_alignments(algns1, max_inter_align_gap)
-            _convert_gaps_into_alignments(algns2, max_inter_align_gap)
+        algns1 = normalize_alignment_list(algns1, 1, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap)
+        algns2 = normalize_alignment_list(algns2, 2, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap)
 
         is_chimeric_1 = len(algns1) > 1
         is_chimeric_2 = len(algns2) > 1
@@ -829,32 +866,6 @@ def rescue_walk(algns1, algns2, max_molecule_size):
         return None
 
 
-def _convert_gaps_into_alignments(sorted_algns, max_inter_align_gap):
-    """
-    Inplace conversion of gaps longer than max_inter_align_gap into alignments
-    """
-    if (len(sorted_algns) == 1) and (not sorted_algns[0]["is_mapped"]):
-        return
-
-    last_5_pos = 0
-    for i in range(len(sorted_algns)):
-        algn = sorted_algns[i]
-        if algn["dist_to_5"] - last_5_pos > max_inter_align_gap:
-            new_algn = empty_alignment()
-            new_algn["dist_to_5"] = last_5_pos
-            new_algn["algn_read_span"] = algn["dist_to_5"] - last_5_pos
-            new_algn["read_len"] = algn["read_len"]
-            new_algn["dist_to_3"] = new_algn["read_len"] - algn["dist_to_5"]
-
-            last_5_pos = algn["dist_to_5"] + algn["algn_read_span"]
-
-            sorted_algns.insert(i, new_algn)
-            i += 2
-        else:
-            last_5_pos = max(last_5_pos, algn["dist_to_5"] + algn["algn_read_span"])
-            i += 1
-
-
 def parse_complex_walk(
     algns1,
     algns2,

From f61451ba679abc13fa173a583cf9a5de6a55ab68 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 25 Mar 2024 12:06:32 +0100
Subject: [PATCH 5/8] parse: update align index col names, update
 pairsam_format

---
 pairtools/lib/pairsam_format.py | 7 +++++++
 pairtools/lib/parse.py          | 5 ++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/pairtools/lib/pairsam_format.py b/pairtools/lib/pairsam_format.py
index a472e9f5..7383bab1 100644
--- a/pairtools/lib/pairsam_format.py
+++ b/pairtools/lib/pairsam_format.py
@@ -100,6 +100,10 @@
     "dist_to_3",
     "seq",
     "mismatches",  # Format: "{ref_letter}:{mut_letter}:{phred}:{ref_position}:{read_position}"
+    "read_side",
+    "algn_idx",
+    "same_side_algn_count"
+    
 ]
 
 DTYPES_EXTRA_COLUMNS = {
@@ -115,4 +119,7 @@
     "dist_to_3": int,
     "seq": str,
     "mismatches": str,
+    "read_side": int,
+    "algn_idx": int,
+    "same_side_algn_count": int,
 }
diff --git a/pairtools/lib/parse.py b/pairtools/lib/parse.py
index f6382c20..12da73bf 100644
--- a/pairtools/lib/parse.py
+++ b/pairtools/lib/parse.py
@@ -443,11 +443,10 @@ def normalize_alignment_list(algns, side, sort_by="dist_to_5", max_inter_align_g
     if sort_by:
         algns = sorted(algns, key=lambda algn: algn[sort_by])
     
-
     for i, algn in enumerate(algns):
         algn["read_side"] = side
-        algn["align_idx"] = i
-        algn["tot_align_count"] = len(algns)
+        algn["algn_idx"] = i
+        algn["same_side_algn_count"] = len(algns)
 
     return algns
             

From e472c97724b4e7bc8f0a8625349b10ef523d60f3 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 25 Mar 2024 12:28:09 +0100
Subject: [PATCH 6/8] api.parse: rearrange code

---
 pairtools/lib/parse.py | 43 +++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/pairtools/lib/parse.py b/pairtools/lib/parse.py
index 12da73bf..f09caadd 100644
--- a/pairtools/lib/parse.py
+++ b/pairtools/lib/parse.py
@@ -392,6 +392,28 @@ def flip_alignment(hic_algn):
     return hic_algn
 
 
+def flip_orientation(hic_algn):
+    """
+    Flip orientation of a single alignment
+    :param hic_algn: Alignment to be modified
+    :return:
+    """
+    hic_algn = dict(hic_algn)  # overwrite the variable with the copy of dictionary
+    hic_algn["strand"] = "+" if (hic_algn["strand"] == "-") else "-"
+    return hic_algn
+
+
+def flip_position(hic_algn):
+    """
+    Flip ends of a single alignment
+    :param hic_algn: Alignment to be modified
+    :return:
+    """
+    hic_algn = dict(hic_algn)  # overwrite the variable with the copy of dictionary
+    hic_algn["pos5"], hic_algn["pos3"] = hic_algn["pos3"], hic_algn["pos5"]
+    return hic_algn
+
+
 
 def _convert_gaps_into_alignments(sorted_algns, max_inter_align_gap):
     """
@@ -451,27 +473,6 @@ def normalize_alignment_list(algns, side, sort_by="dist_to_5", max_inter_align_g
     return algns
             
 
-def flip_orientation(hic_algn):
-    """
-    Flip orientation of a single alignment
-    :param hic_algn: Alignment to be modified
-    :return:
-    """
-    hic_algn = dict(hic_algn)  # overwrite the variable with the copy of dictionary
-    hic_algn["strand"] = "+" if (hic_algn["strand"] == "-") else "-"
-    return hic_algn
-
-
-def flip_position(hic_algn):
-    """
-    Flip ends of a single alignment
-    :param hic_algn: Alignment to be modified
-    :return:
-    """
-    hic_algn = dict(hic_algn)  # overwrite the variable with the copy of dictionary
-    hic_algn["pos5"], hic_algn["pos3"] = hic_algn["pos3"], hic_algn["pos5"]
-    return hic_algn
-
 
 ####################
 ### Parsing utilities:

From 1e5459cd2ebf5406294797da79760c42bb02a8d2 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 25 Mar 2024 12:46:58 +0100
Subject: [PATCH 7/8] api.parse: fix recently introduced bugs

---
 pairtools/lib/parse.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pairtools/lib/parse.py b/pairtools/lib/parse.py
index f09caadd..33fb8a13 100644
--- a/pairtools/lib/parse.py
+++ b/pairtools/lib/parse.py
@@ -459,12 +459,12 @@ def normalize_alignment_list(algns, side, sort_by="dist_to_5", max_inter_align_g
     if len(algns) == 0:
         algns = [empty_alignment()]
 
+    if sort_by:
+        algns = sorted(algns, key=lambda algn: algn[sort_by])
+
     if max_inter_align_gap is not None:
         _convert_gaps_into_alignments(algns, max_inter_align_gap)
 
-    if sort_by:
-        algns = sorted(algns, key=lambda algn: algn[sort_by])
-    
     for i, algn in enumerate(algns):
         algn["read_side"] = side
         algn["algn_idx"] = i

From e9250f9ee9adeaede0f25d166a41bcce4e1bd84f Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Sun, 7 Apr 2024 16:07:47 +0200
Subject: [PATCH 8/8] parse: change the default walk policy to 5unique

---
 pairtools/cli/parse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pairtools/cli/parse.py b/pairtools/cli/parse.py
index e1eb6741..e9541956 100644
--- a/pairtools/cli/parse.py
+++ b/pairtools/cli/parse.py
@@ -123,7 +123,7 @@
 @click.option(
     "--walks-policy",
     type=click.Choice(["mask", "5any", "5unique", "3any", "3unique", "all"]),
-    default="mask",
+    default="5unique",
     help="the policy for reporting unrescuable walks (reads containing more"
     " than one alignment on one or both sides, that can not be explained by a"
     " single ligation between two mappable DNA fragments)."