Skip to content

Commit

Permalink
check pdb topology differences
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon Duane Walker authored and Brandon Duane Walker committed May 7, 2024
1 parent a02e17d commit 6539626
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 0 deletions.
71 changes: 71 additions & 0 deletions cwl_adapters/topology_check.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: Check for topology changes in PDB files

doc: |-
Check for topology changes in PDB files

baseCommand: ["python", "/topology_check.py"]

hints:
DockerRequirement:
dockerPull: mrbrandonwalker/topology_check

requirements:
InlineJavascriptRequirement: {}

inputs:

file1:
type: File
inputBinding:
prefix: --file1
format:
- edam:format_1476

file2:
type: File
inputBinding:
prefix: --file2
format:
- edam:format_1476

intended_changes:
type: string?
inputBinding:
prefix: --intended_changes

topology_changed:
type: string?

outputs:

topology_changed:
type: boolean
outputBinding:
glob: valid.txt
loadContents: true
outputEval: |
${
// Read the contents of the file
const lines = self[0].contents.split("\n");
// Read boolean value from the first line
const valid = lines[0].trim() === "True";
return valid;
}

stdout:
type: File
outputBinding:
glob: stdout

stdout: stdout

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
14 changes: 14 additions & 0 deletions examples/scripts/Dockerfile_topology_check
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# docker build -f Dockerfile_topology_check -t mrbrandonwalker/topology_check .

FROM condaforge/mambaforge
# NOT mambaforge-pypy3 (rdkit is incompatible with pypy)

RUN mamba install -c conda-forge rdkit --yes

RUN mamba init bash

COPY topology_check.py /.

RUN mamba clean --all --yes

ADD Dockerfile_topology_check .
158 changes: 158 additions & 0 deletions examples/scripts/topology_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import argparse
from rdkit import Chem
from rdkit.Chem.rdchem import Atom, Mol


def map_atoms(mol1: Mol, mol2: Mol) -> dict:
"""
Map atoms from the first molecule to corresponding atoms in the second molecule.
Args:
mol1 (Mol): RDKit molecule from the first PDB file.
mol2 (Mol): RDKit molecule from the second PDB file.
Returns:
dict: Mapping of atoms from mol1 to mol2.
"""
match_atoms = mol1.GetSubstructMatch(mol2)
if len(match_atoms) == 0:
raise ValueError("No atoms matched between the two molecules.")
mol2_index_to_mol1_index = {mol2_index: mol1_index for mol2_index, mol1_index in enumerate(match_atoms)}
mol1_index_to_mol2_index = {k: v for v, k in mol2_index_to_mol1_index.items()}
return mol1_index_to_mol2_index


def compare_atom_properties(atom1: Atom, atom2: Atom) -> dict:
"""
Compare properties of two atoms and return a dictionary of differences.
Args:
atom1 (Atom): RDKit atom from the first molecule.
atom2 (Atom): RDKit atom from the second molecule.
Returns:
dict: Dictionary of property differences between the two atoms.
"""
properties_diff = {}
atom1_properties = get_properties(atom1)
atom2_properties = get_properties(atom2)
for key, atom1_value in atom1_properties.items():
if atom1_value != atom2_properties[key]:
properties_diff[key] = (atom1_value, atom2_properties[key])
return properties_diff


def compare_atoms(mol1: Mol, mol2: Mol, atom_map: dict, intended_changes: list = []) -> dict:
"""
Compare properties of mapped atoms in two molecules and return a dictionary of differences.
Args:
mol1 (Mol): RDKit molecule from the first PDB file.
mol2 (Mol): RDKit molecule from the second PDB file.
atom_map (dict): Mapping of atoms from mol1 to mol2.
intended_changes (list): List of intended changes (e.g., "added_hydrogen").
Returns:
dict: Dictionary of property differences between mapped atoms in the two molecules.
"""
properties_diff = {}
deleted_atoms = set(atom.GetIdx() for atom in mol1.GetAtoms()) - set(atom_map.keys())
for atom1_idx, atom2_idx in atom_map.items():
atom1 = mol1.GetAtomWithIdx(atom1_idx)
atom2 = mol2.GetAtomWithIdx(atom2_idx)

# Check if the difference is related to intended changes
if "added_hydrogens" in intended_changes:
diff_implicit_valence = atom2.GetImplicitValence() - atom1.GetImplicitValence()
diff_num_implicit_hs = atom2.GetNumImplicitHs() - atom1.GetNumImplicitHs()

# If the difference in implicit valence and num implicit Hs is the same, ignore the difference
if diff_implicit_valence == diff_num_implicit_hs:
continue

atom_diff = compare_atom_properties(atom1, atom2)
# check if the difference is related to changed residue label, then remove from difference
if "GetResidueName" in atom_diff and "changed_residue_label" in intended_changes:
atom_diff.pop("GetResidueName")

if atom_diff:
properties_diff[atom1_idx] = atom_diff

if "ignore_deleted" not in intended_changes:
# Add deleted atoms to the differences
for atom_idx in deleted_atoms:
properties_diff[atom_idx] = {"DeletedAtom": True}

return properties_diff


def get_properties(atom: Atom) -> dict:
"""Get properties of an RDKit atom."""
res_dict = {}
res = atom.GetPDBResidueInfo()
res_dict['GetResidueName'] = res.GetResidueName()
res_dict['GetResidueNumber'] = res.GetResidueNumber()
res_dict['GetChainId'] = res.GetChainId()
res_dict['GetName'] = res.GetName()
prop_dict = {
"GetSymbol": atom.GetSymbol(),
"GetIdx": atom.GetIdx(),
"GetAtomicNum": atom.GetAtomicNum(),
"GetBonds": sorted([(sorted([bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()])) for bond in atom.GetBonds()]),
"GetChiralTag": atom.GetChiralTag(),
"GetDegree": atom.GetDegree(),
"GetExplicitValence": atom.GetExplicitValence(),
"GetFormalCharge": atom.GetFormalCharge(),
"GetHybridization": atom.GetHybridization(),
"GetImplicitValence": atom.GetImplicitValence(),
"GetIsAromatic": atom.GetIsAromatic(),
"GetIsotope": atom.GetIsotope(),
"GetMass": atom.GetMass(),
"GetNumExplicitHs": atom.GetNumExplicitHs(),
"GetNumImplicitHs": atom.GetNumImplicitHs(),
"IsInRing": atom.IsInRing()
}
prop_dict.update(res_dict)
return prop_dict


def main() -> None:
"""Main function."""
parser = argparse.ArgumentParser(description='Check if the topology has changed between two PDB files.')
parser.add_argument('--file1', type=str, help='Path to the first PDB file')
parser.add_argument('--file2', type=str, help='Path to the second PDB file')
parser.add_argument('--intended_changes', nargs='+', type=str, default=[],
help='List of intended changes', choices=['added_hydrogens', 'changed_residue_label', 'ignore_deleted', 'atom_order_change'])
args = parser.parse_args()
mol1 = Chem.MolFromPDBFile(args.file1, removeHs=False, sanitize=False)
mol2 = Chem.MolFromPDBFile(args.file2, removeHs=False, sanitize=False)

mol1_index_to_mol2_index = map_atoms(mol1, mol2)
atom_differences = compare_atoms(mol1, mol2, mol1_index_to_mol2_index, intended_changes=args.intended_changes)
if atom_differences:
for mol1_idx, diff in atom_differences.items():
if mol1_idx in mol1_index_to_mol2_index: # otherwise was deleted
mol2_idx = mol1_index_to_mol2_index[mol1_idx]
print(f"File 1 Atom Idx {mol1_idx}, File 2 Atom Idx {mol2_idx}")
for key, value in diff.items():
file1_value, file2_value = value
print(f"Property: {key} File 1: {file1_value}, File 2: {file2_value}")
else:
print(f"File 1 Atom Idx {mol1_idx} differences: {diff}")
topology_change = True
else:
topology_change = False

# NOTE: MDAnalysis changes original atom label and also atom order!!
if 'atom_order_change' not in args.intended_changes:
# Check if the atom order has changed
if list(mol1_index_to_mol2_index.keys()) != list(mol1_index_to_mol2_index.values()):
topology_change = True

# Write the boolean value to the output file
with open('valid.txt', 'w', encoding='utf-8') as file:
file.write(str(topology_change))


if __name__ == '__main__':
main()

0 comments on commit 6539626

Please sign in to comment.