Skip to content

Commit

Permalink
fix dbxref db prefixes for UniParc and UniRef
Browse files Browse the repository at this point in the history
  • Loading branch information
oschwengers committed Sep 30, 2021
1 parent 2ba985a commit 02b5dc8
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 14 deletions.
2 changes: 2 additions & 0 deletions bakta/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
# DB identifiers
############################################################################
DB_XREF_UNIPROTKB = 'UniProtKB/TrEMBL'
DB_XREF_UNIPARC = 'UniParc'
DB_XREF_UNIREF = 'UniRef'
DB_XREF_REFSEQ_NRP = 'RefSeq'
DB_XREF_EC = 'EC'
DB_XREF_COG = 'COG'
Expand Down
14 changes: 7 additions & 7 deletions bakta/io/insdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,18 +121,18 @@ def write_insdc(genome, features, genbank_output_path, embl_output_path):
if('ncbi_nrp_id' in feature.get('ups', {})):
nrp_id = feature['ups']['ncbi_nrp_id']
inference.append(f'similar to AA sequence:{bc.DB_XREF_REFSEQ_NRP}:{nrp_id}')
elif('uniparc_id' in feature.get('ups', {})):
uniparc_id = feature['ups']['uniparc_id']
inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIPROTKB}:{uniparc_id}')
elif('uniref100_id' in feature.get('ips', {})):
ips_subject_id = feature['ips']['uniref100_id']
inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIPROTKB}:{ips_subject_id}')
inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIREF}:{ips_subject_id}')
elif('uniparc_id' in feature.get('ups', {})):
uniparc_id = feature['ups']['uniparc_id']
inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIPARC}:{uniparc_id}')
elif('uniref90_id' in feature.get('psc', {}) and feature.get('psc', {}).get('valid', False)):
psc_subject_id = feature['psc']['uniref90_id']
inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIPROTKB}:{psc_subject_id}')
inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIREF}:{psc_subject_id}')
elif('uniref50_id' in feature.get('pscc', {})):
pscc_subject_id = feature['psc']['uniref50_id']
inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIPROTKB}:{pscc_subject_id}')
inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIREF}:{pscc_subject_id}')
qualifiers['inference'] = inference
if(cfg.compliant):
for note in qualifiers['note']: # move EC numbers from note to EC_number
Expand Down Expand Up @@ -296,7 +296,7 @@ def revise_product_insdc(feature):

def revise_dbxref_insdc(dbxrefs):
"""Remove INSDC non-compliant DbXrefs."""
insdc_valid_dbxrefs = [bc.DB_XREF_UNIPROTKB, bc.DB_XREF_GO, bc.DB_XREF_IS, bc.DB_XREF_PFAM, bc.DB_XREF_RFAM]
insdc_valid_dbxrefs = [bc.DB_XREF_UNIPROTKB, bc.DB_XREF_GO, bc.DB_XREF_PFAM, bc.DB_XREF_RFAM]
valid_dbxrefs = []
invalid_dbxrefs = []
for dbxref in dbxrefs:
Expand Down
4 changes: 2 additions & 2 deletions bakta/ips.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def parse_annotation(rec):
}
db_xrefs = [
'SO:0001217',
f'{bc.DB_XREF_UNIPROTKB}:{ips[DB_IPS_COL_UNIREF100]}'
f'{bc.DB_XREF_UNIREF}:{ips[DB_IPS_COL_UNIREF100]}'
]

# add non-empty PSC annotations and attach database prefixes to identifiers
Expand All @@ -86,7 +86,7 @@ def parse_annotation(rec):
ips[DB_IPS_COL_PRODUCT] = rec[DB_IPS_COL_PRODUCT]
if(rec[DB_IPS_COL_UNIREF90]):
ips[DB_IPS_COL_UNIREF90] = bc.DB_PREFIX_UNIREF_90 + rec[DB_IPS_COL_UNIREF90]
db_xrefs.append(f'{bc.DB_XREF_UNIPROTKB}:{ips[DB_IPS_COL_UNIREF90]}')
db_xrefs.append(f'{bc.DB_XREF_UNIREF}:{ips[DB_IPS_COL_UNIREF90]}')
if(rec[DB_IPS_COL_EC]):
ips[DB_IPS_COL_EC] = rec[DB_IPS_COL_EC]
ecs = []
Expand Down
4 changes: 2 additions & 2 deletions bakta/psc.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def parse_annotation(rec):
}
db_xrefs = [
'SO:0001217',
f'{bc.DB_XREF_UNIPROTKB}:{psc[DB_PSC_COL_UNIREF90]}'
f'{bc.DB_XREF_UNIREF}:{psc[DB_PSC_COL_UNIREF90]}'
]

# add non-empty PSC annotations and attach database prefixes to identifiers
Expand All @@ -169,7 +169,7 @@ def parse_annotation(rec):
psc[DB_PSC_COL_EC] = ecs
if(rec[DB_PSC_COL_UNIREF50]):
psc[DB_PSC_COL_UNIREF50] = bc.DB_PREFIX_UNIREF_50 + rec[DB_PSC_COL_UNIREF50]
db_xrefs.append(f'{bc.DB_XREF_UNIPROTKB}:{psc[DB_PSC_COL_UNIREF50]}')
db_xrefs.append(f'{bc.DB_XREF_UNIREF}:{psc[DB_PSC_COL_UNIREF50]}')
if(rec[DB_PSC_COL_COG_ID]):
psc[DB_PSC_COL_COG_ID] = bc.DB_PREFIX_COG + rec[DB_PSC_COL_COG_ID]
db_xrefs.append(f'{bc.DB_XREF_COG}:{psc[DB_PSC_COL_COG_ID]}')
Expand Down
2 changes: 1 addition & 1 deletion bakta/pscc.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def parse_annotation(rec):
DB_PSCC_COL_UNIREF50: uniref_full_id, # must not be NULL/None
'db_xrefs': [
'SO:0001217',
f'{bc.DB_XREF_UNIPROTKB}:{uniref_full_id}'
f'{bc.DB_XREF_UNIREF}:{uniref_full_id}'
]
}
# add non-empty PSCC annotations and attach database prefixes to identifiers
Expand Down
4 changes: 2 additions & 2 deletions bakta/ups.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ def parse_annotation(rec):
# add non-empty PSC annotations and attach database prefixes to identifiers
if(rec[DB_UPS_COL_UNIPARC]):
ups[DB_UPS_COL_UNIPARC] = bc.DB_PREFIX_UNIPARC + rec[DB_UPS_COL_UNIPARC]
db_xrefs.append(f'{bc.DB_XREF_UNIPROTKB}:{ups[DB_UPS_COL_UNIPARC]}')
db_xrefs.append(f'{bc.DB_XREF_UNIPARC}:{ups[DB_UPS_COL_UNIPARC]}')
if(rec[DB_UPS_COL_REFSEQ_NRP]):
ups[DB_UPS_COL_REFSEQ_NRP] = bc.DB_PREFIX_REFSEQ_NRP + rec[DB_UPS_COL_REFSEQ_NRP]
db_xrefs.append(f'{bc.DB_XREF_REFSEQ_NRP}:{ups[DB_UPS_COL_REFSEQ_NRP]}')
if(rec[DB_UPS_COL_UNIREF100]):
ups[DB_UPS_COL_UNIREF100] = bc.DB_PREFIX_UNIREF_100 + rec[DB_UPS_COL_UNIREF100]
db_xrefs.append(f'{bc.DB_XREF_UNIPROTKB}:{ups[DB_UPS_COL_UNIREF100]}')
db_xrefs.append(f'{bc.DB_XREF_UNIREF}:{ups[DB_UPS_COL_UNIREF100]}')

ups['db_xrefs'] = db_xrefs
return ups

0 comments on commit 02b5dc8

Please sign in to comment.