Skip to content

Commit

Permalink
Test structures with missing atoms/residues
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Oct 16, 2024
1 parent edda690 commit 5f34d28
Showing 1 changed file with 35 additions and 8 deletions.
43 changes: 35 additions & 8 deletions tests/structure/test_i3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,42 @@ def test_to_3di(path):
assert str(test) == str(ref), f"3Di sequence of chain {chain_id} does not match"


def test_missing():
def test_missing_residues():
"""
Test if missing or non-peptide residues within a chain are correctly handled.
Like, `test_to_protein_blocks()`, but in some residues backbone atoms are missing.
Expect that these and adjacent residues get the unknown symbol 'Z' in the
PB sequence.
"""
pass
PDB_ID = "1aki"
N_DELETIONS = 5
MAX_MISMATCH_PERCENTAGE = 0.1
UKNOWN_SYMBOL = strucalph.I3DSequence.unknown_symbol

pdbx_file = pdbx.BinaryCIFFile.read(Path(data_dir("structure")) / f"{PDB_ID}.bcif")
atoms = pdbx.get_structure(pdbx_file, model=1)
atoms = atoms[struc.filter_amino_acids(atoms)]

def test_empty():
"""
Test if an empty structure is correctly handled.
"""
pass
rng = np.random.default_rng(1)
del_backbone_residue_ids = rng.choice(
np.unique(atoms.res_id), N_DELETIONS, replace=False
)
atoms = atoms[
~np.isin(atoms.res_id, del_backbone_residue_ids)
| ~np.isin(atoms.atom_name, ("N", "CA", "CB", "C"))
]
test_sequences, _ = strucalph.to_3di(atoms)

ref_sequence = _get_ref_3di_sequence(PDB_ID, atoms.chain_id[0])
for res_id in del_backbone_residue_ids:
seq_index = res_id - atoms.res_id[0]
# Convert the PDB symbol for residue and adjacent ones to 'Z'
start_index = max(0, seq_index - 1)
end_index = min(len(ref_sequence), seq_index + 1)
ref_sequence[start_index : end_index + 1] = UKNOWN_SYMBOL

assert len(test_sequences) == 1
# 3Di sequences are quite complex, i.e. removing backbone atoms at some position
# might alter the symbols in remote positions
# -> Allow for mismatches
n_mismatches = np.count_nonzero(test_sequences[0].code != ref_sequence.code)
assert n_mismatches / len(ref_sequence) <= MAX_MISMATCH_PERCENTAGE

0 comments on commit 5f34d28

Please sign in to comment.