Skip to content

Commit

Permalink
Remove explicit hydrogens from smarts
Browse files Browse the repository at this point in the history
  • Loading branch information
Brian Kelley committed Feb 23, 2023
1 parent 5f70235 commit 853e1e3
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 14 deletions.
28 changes: 14 additions & 14 deletions rd_filters/data/alert_collection.csv
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ rule_id,rule_set,description,smarts,rule_set_name,priority,max
218,3,branched_polycyclic_aromatic,a1(a2aa(a3aaaaa3)aa(a4aaaaa4)a2)aaaaa1,BMS,7,0
219,3,carbodiimide_iso#thio#cyanate,"N=C=[N,O,S]",BMS,7,0
220,3,carbonyl_halide,"O=C[F,Cl,Br,I]",BMS,7,0
221,3,contains_metal,"[$([Ru]),$([Rh]),$([#34]),$([Pd]),$([Sc]),$([Bi]),$([Sb]),$([Ag]),$([Ti]),$([Al]),$([Cd]),$([V]),$([In]),$([Cr]),$([Sn]),$([Mn]),$([La]),$([Fe]),$([Er]),$([Tm]),$([Yb]),$([Lu]),$([Hf]),$([Ta]),$([W]),$([Re]),$([Co]),$([Os]),$([Ni]),$([Ir]),$([Cu]),$([Zn]),$([Ga]),$([Ge]),$([#33]),$([Y]),$([Zr]),$([Nb]),$([Ce]),$([Pr]),$([Nd]),$([Sm]),$([Eu]),$([Gd]),$([Tb]),$([Dy]),$([Ho]),$([Pt]),$([Au]),$([Hg]),$([Tl]),$([Pb]),$([Ac]),$([Th]),$([Pa]),$([Mo]),$([U]),$([Tc]),$([Te]),$([Po]),$([At])]",BMS,7,0
221,3,contains_metal,"[$([Ru]),$([Rh]),$([Se]),$([se]),$([Pd]),$([Sc]),$([Bi]),$([Sb]),$([Ag]),$([Ti]),$([Al]),$([Cd]),$([V]),$([In]),$([Cr]),$([Sn]),$([Mn]),$([La]),$([Fe]),$([Er]),$([Tm]),$([Yb]),$([Lu]),$([Hf]),$([Ta]),$([W]),$([Re]),$([Co]),$([Os]),$([Ni]),$([Ir]),$([Cu]),$([Zn]),$([Ga]),$([Ge]),$([As]),$([as]),$([Y]),$([Zr]),$([Nb]),$([Ce]),$([Pr]),$([Nd]),$([Sm]),$([Eu]),$([Gd]),$([Tb]),$([Dy]),$([Ho]),$([Pt]),$([Au]),$([Hg]),$([Tl]),$([Pb]),$([Ac]),$([Th]),$([Pa]),$([Mo]),$([U]),$([Tc]),$([Te]),$([Po]),$([At])]",BMS,7,0
222,3,crown_ether,"[$([O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18]),$([O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18]),$([O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][CH,CH2;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18][O,S,#7;R1;r9,r10,r11,r12,r13,r14,r15,r16,r17,r18])]",BMS,7,0
223,3,cyano_phosphonate,"P(O[A,a])(O[A,a])(=O)C#N",BMS,7,0
224,3,cyanohydrin,"[C;X4](-[OH,NH1,NH2,SH])(-C#N)",BMS,7,0
Expand Down Expand Up @@ -1106,20 +1106,20 @@ rule_id,rule_set,description,smarts,rule_set_name,priority,max
1105,7,Filter2_acyl_phosphyl_sulfonyl_halide,"[C,S,P](=O)[F,Cl,Br,I]",Inpharmatica,1,0
1106,7,Filter3_allyl_halide,"[F,Cl,Br,I][CX4]C=C",Inpharmatica,1,0
1107,7,Filter4_alpha_halo_carbonyl,"[Br,Cl,I][C!H0]C=[O,S]",Inpharmatica,1,0
1108,7,Filter5_azo,"[!#7,#1]~[NX2R0]=[NX2R0]~[!#7,#1]",Inpharmatica,1,0
1108,7,Filter5_azo,"[$([NX2R0]-[!#7]),$([NX2R0H1])]=[$([NX2R0]-[!#7]),$([NX2R0H1])]",Inpharmatica,1,0
1109,7,Filter6_benzyl_halide,"[Br,Cl,I][CX4]c",Inpharmatica,1,0
1110,7,Filter7_diazo,[!#7]~[NX2]~[NX1],Inpharmatica,1,0
1111,7,Filter8_thio_isocyanat_diimin,"N=C=[N,O,P,S]",Inpharmatica,1,0
1112,7,Filter9_metal,"[!#1!#6!#7!#8!#9!#15!#16!#17]~[*,#1]",Inpharmatica,1,0
1112,7,Filter9_metal,"[$([!#1!#6!#7!#8!#9!#15!#16!#17!#35!#53]~[*]),$([!#1!#6!#7!#8!#9!#15!#16!#17!#35!#53;h])]",Inpharmatica,1,0
1113,7,Filter10_Terminal_vinyl,"[CH2]=[CH][N,O,P,S;R0]",Inpharmatica,1,0
1114,7,Filter11_nitrosamin,[NR0]~[NX2]~[OX1],Inpharmatica,1,0
1115,7,Filter12_nitroso,"[*,#1]~[NX2]~[OX1]",Inpharmatica,1,0
1116,7,Filter13_PS_double_bond,"S=P~[*,#1]",Inpharmatica,1,0
1115,7,Filter12_nitroso,"[$([NX2]~*),$([NX2H])]~[OX1]",Inpharmatica,1,0
1116,7,Filter13_PS_double_bond,"S=[$([PX2]~*),$([PX2H])]",Inpharmatica,1,0
1117,7,Filter14_thio_oxopyrylium_salt,"c1ccc[s,o;X2]c1",Inpharmatica,1,0
1118,7,Filter15_thiosulfate,[SX4](~S)(~O)(~O)~*,Inpharmatica,1,0
1119,7,Filter16_trialkyl_phosphin,[PX3]([#6])([#6])[#6],Inpharmatica,1,0
1120,7,Filter17_trialkyl_phosphin2,[PX4](~[!O!S])([#6])([#6])[#6],Inpharmatica,1,0
1121,7,Filter18_oxime_ester,"[!#7,#1]C=NO[C,S,P](=O)*",Inpharmatica,1,0
1121,7,Filter18_oxime_ester,"[$([CX3H1]-[!#7]),$([CX3H2])]=NO[C,S,P](=O)*",Inpharmatica,1,0
1122,7,Filter19_hydroxyimide_ester,O=C[NX3](C=O)OC(=O)*,Inpharmatica,1,0
1123,7,Filter20_hydrazine,[Nv3X3][Nv3X3!H0],Inpharmatica,1,0
1124,7,Filter21_cyanhydrin,[NX1]#C[CX4][OH],Inpharmatica,1,0
Expand All @@ -1130,10 +1130,10 @@ rule_id,rule_set,description,smarts,rule_set_name,priority,max
1129,7,Filter26_alkyl_halide,"AA[CH2][F,Cl,Br,I,$(OS(=O)(=O)*)]",Inpharmatica,1,0
1130,7,Filter27_anhydride,"*[C,S](=O)O[C,S](=O)*",Inpharmatica,1,0
1131,7,Filter28_halo_pyrimidine,"c1nc(ncc1)[Br,I]",Inpharmatica,1,0
1132,7,Filter29_thioester,"CC(=[OX1,SX1,NH])[SX2][*,#1]",Inpharmatica,1,0
1133,7,Filter30_beta_halo_carbonyl,"[#6,#1]C(=O)CC[Br,I]",Inpharmatica,1,0
1134,7,Filter31_so_bond,"[*,#1][SX2R0]O[*,#1]",Inpharmatica,1,0
1135,7,Filter32_oo_bond,"[*,#1][OR0]O[*,#1]",Inpharmatica,1,0
1132,7,Filter29_thioester,"CC(=[OX1,SX1,NH])[$([SX2]-*),$([SX2H])]",Inpharmatica,1,0
1133,7,Filter30_beta_halo_carbonyl,"[$(C(-C)(-C)(=O)),$([CH](=O)-C)](=O)CC[Br,I]",Inpharmatica,1,0
1134,7,Filter31_so_bond,"[SX2R0][OX2]",Inpharmatica,1,0
1135,7,Filter32_oo_bond,"[OX2R0][OX2]",Inpharmatica,1,0
1136,7,Filter33_c10_alkyl,[CH3][CH2][CH2][CH2][CH2][CH2][CH2][CH2][CH2][CH2],Inpharmatica,1,0
1137,7,Filter34_isotope,"[2#1,3#1,13C,14C,15N,125I,23F,22Na,32P,33P,35S,45Ca,57Co,103Ru,141Ce]",Inpharmatica,1,0
1138,7,Filter35_pp_bond,P-P,Inpharmatica,1,0
Expand All @@ -1145,12 +1145,12 @@ rule_id,rule_set,description,smarts,rule_set_name,priority,max
1144,7,Filter41_12_dicarbonyl,*C(=O)C(=O)*,Inpharmatica,1,0
1145,7,Filter42_12_dicarbonyl_tautomer,[*!$(C[OH])]=C([OH])C(=O)*,Inpharmatica,1,0
1146,7,Filter43_michael_acceptor_sp1,C#CC=O,Inpharmatica,1,0
1147,7,Filter44_michael_acceptor2,"[#6,#1,OH0][C!$(C1(=O)C=CC(=O)C=C1)](=O)C=[C!$(C[Nv3X3,OH])]",Inpharmatica,1,0
1147,7,Filter44_michael_acceptor2,"[$([C!$(C1(=O)C=CC(=O)C=C1)]C),$([Ch]),$(C[OH0])](=O)C=[C!$(C[Nv3X3,OH])]",Inpharmatica,1,0
1148,7,Filter45_allyl_halide2,"[Br,Cl,I][CX4]C=[C,N,P]",Inpharmatica,1,0
1149,7,Filter46_nhalide,"N[F,Cl,Br,I]",Inpharmatica,1,0
1150,7,Filter47_so2f,O=S(=O)(*)F,Inpharmatica,1,0
1151,7,Filter48_foso,O=S(*)OF,Inpharmatica,1,0
1152,7,Filter49_halogen,"[*,#1]~[F,Cl,Br,I]~[*,#1]",Inpharmatica,1,0
1152,7,Filter49_halogen,"[FX2,ClX2,BrX2,IX2,IX3,IX4,IX5]",Inpharmatica,1,0
1153,7,Filter50_grignard,"C[Mg][F,Cl,Br,I]",Inpharmatica,1,0
1154,7,Filter51_pn3,N[PX3](N)N,Inpharmatica,1,0
1155,7,Filter52_NC_haloamine,"NC[F,Cl,Br,I]",Inpharmatica,1,0
Expand Down Expand Up @@ -1188,7 +1188,7 @@ rule_id,rule_set,description,smarts,rule_set_name,priority,max
1187,7,Filter86_cyanamide,N#C[#7!$(N(C#N)=C(N)NC)],Inpharmatica,1,0
1188,7,Filter87_crowns,"[N,O,S]-@[#6!$(*(~@*)(~@*)~@*)]@[#6!$(*(~@*)(~@*)~@*)]-@[N,O,S]-@[#6!$(*(~@*)(~@*)~@*)]@[#6!$(*(~@*)(~@*)~@*)]-@[O,N]-@[#6!$(*(~@*)(~@*)~@*)]@[#6!$(*(~@*)(~@*)~@*)]-@[N,O,S]-@C",Inpharmatica,1,0
1189,7,Filter88_ene_sulfone,[C!H0]=CS(=O)(=O)*,Inpharmatica,1,0
1190,7,Filter89_hydroxylamine,"[*!$(C=O)]!@N!@O[*,#1]",Inpharmatica,1,0
1190,7,Filter89_hydroxylamine,"[*!$(C=O)]!@N!@[$([OX2])]",Inpharmatica,1,0
1191,7,Filter90_N_double_bond_S,N=!@S,Inpharmatica,1,0
1192,7,Filter92_trityl,*(c1ccccc1)(c1ccccc1)(c1ccccc1),Inpharmatica,1,0
1193,7,Filter93_acetyl_urea,C(=O)!@N!@C(=O)[#7],Inpharmatica,1,0
Expand All @@ -1201,7 +1201,7 @@ rule_id,rule_set,description,smarts,rule_set_name,priority,max
1200,8,"Si,B,Se atoms","[Si,B,Se]",LINT,2,0
1201,8,hetero imides,[!#6]-[CH2]-N1C(=O)CCC(=O)1,LINT,2,0
1202,8,poly ethers,O[CH2][CH2]O-!@[CH2][CH2]O,LINT,2,0
1203,8,acyclic imines,"C-!@[NX2]=[C!R,#1]-C",LINT,2,0
1203,8,acyclic imines,"[$([CX3R0]([#6])[#6]),$([CX3HR0][#6])]=[$([NX2R0][#6]),$([NX2HR0])]",LINT,2,0
1204,8,alkyl esters of S or P,"[S,P](=O)OC",LINT,2,0
1205,8,ugly P compounds,"P(=[O,S])[C,N]([C,N])[C,N]",LINT,2,0
1206,8,"acyclic N-,=N and not N bound to carbonyl or sulfone","[N;!$(N-[C,S]=*)]-,=;!@[N;!$(N-[C,S]=*)]",LINT,2,0
Expand Down
64 changes: 64 additions & 0 deletions tests/test_rdfilter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from rd_filters import rd_filters
import pkg_resources

# these just stop on the first filter
test_lint = [
('C1N=C1', 'aziridine-like N in 3-membered ring > 0'),
('NN=N', 'acyclic N-,=N and not N bound to carbonyl or sulfone > 0'),
]

test_inpharmatica = [
('CN=C', 'OK'),
('CN=CC', 'Filter39_imine > 0'),
('CN=C(C)', 'Filter39_imine > 0'),
('CN=NC', 'Filter5_azo > 0'),
('CN=N', 'Filter5_azo > 0'),
('N=N', 'Filter5_azo > 0'),
('[Fe]C', 'Filter9_metal > 0'),
('[FeH]', 'Filter9_metal > 0'),
('[Fe]', 'OK'),
('CN=O', 'Filter12_nitroso > 0'),
('N=O', 'Filter12_nitroso > 0'),
('S=P', 'Filter13_PS_double_bond > 0'),
('S=PC', 'Filter13_PS_double_bond > 0'),
('CC(=O)SC', 'Filter29_thioester > 0'),
('CC(=S)SC', 'Filter29_thioester > 0'),
('CC(=N)SC', 'Filter29_thioester > 0'),
('CC(=O)S', 'Filter29_thioester > 0'),
('CC(=S)S', 'Filter29_thioester > 0'),
('CC(=N)S', 'Filter29_thioester > 0'),
('CC(=O)CCBr', 'Filter26_alkyl_halide > 0'),
('CC(=O)CCI', 'Filter26_alkyl_halide > 0'),
('C(=O)CCI', 'Filter26_alkyl_halide > 0'),
('NC(=O)CCI', 'Filter26_alkyl_halide > 0'),
('CC=NOS(=O)N', 'Filter18_oxime_ester > 0'),
('NC=NOP(=O)N', 'Filter89_hydroxylamine > 0'),
('C=NOP(=O)N', 'Filter18_oxime_ester > 0'),
('SO', 'Filter31_so_bond > 0'),
('SOC', 'Filter31_so_bond > 0'),
('c1csocc1', 'OK'),
('OO', 'Filter32_oo_bond > 0'),
('COO', 'Filter32_oo_bond > 0'),
('c1coocc1', 'OK'),
('CC(=O)C=C', 'Filter44_michael_acceptor2 > 0'),
('C(=O)C=C', 'Filter38_aldehyde > 0'),
('OC(=O)C=C', 'Filter44_michael_acceptor2 > 0'),
('C1(=O)C=CC(=O)C=C1', 'Filter53_para_quinones > 0'),
('CIC', 'Filter49_halogen > 0'),
('CI(C)C', 'Filter49_halogen > 0'),
('CC=NOS(=O)N', 'Filter18_oxime_ester > 0'),
('NC=NO', 'Filter89_hydroxylamine > 0'),
('C(=O)NOS', 'Filter31_so_bond > 0'),
]

def test_hydrogen_suppression():
alert_file_name = pkg_resources.resource_filename('rd_filters',
"data/alert_collection.csv")
for rule_list, tests in [(["Inpharmatica"], test_inpharmatica),
(["LINT"], test_lint)]:
rf = rd_filters.RDFilters(alert_file_name)
rf.build_rule_list(rule_list)
for smi, res in tests:
result = rf.evaluate((smi,smi))
assert result[2] == res, repr((result[:3], res))

0 comments on commit 853e1e3

Please sign in to comment.