diff --git a/bakta/constants.py b/bakta/constants.py index 685bb765..dfe18611 100644 --- a/bakta/constants.py +++ b/bakta/constants.py @@ -29,6 +29,8 @@ # DB identifiers ############################################################################ DB_XREF_UNIPROTKB = 'UniProtKB/TrEMBL' +DB_XREF_UNIPARC = 'UniParc' +DB_XREF_UNIREF = 'UniRef' DB_XREF_REFSEQ_NRP = 'RefSeq' DB_XREF_EC = 'EC' DB_XREF_COG = 'COG' diff --git a/bakta/io/insdc.py b/bakta/io/insdc.py index 7ed7b79f..6ce989de 100644 --- a/bakta/io/insdc.py +++ b/bakta/io/insdc.py @@ -121,18 +121,18 @@ def write_insdc(genome, features, genbank_output_path, embl_output_path): if('ncbi_nrp_id' in feature.get('ups', {})): nrp_id = feature['ups']['ncbi_nrp_id'] inference.append(f'similar to AA sequence:{bc.DB_XREF_REFSEQ_NRP}:{nrp_id}') - elif('uniparc_id' in feature.get('ups', {})): - uniparc_id = feature['ups']['uniparc_id'] - inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIPROTKB}:{uniparc_id}') elif('uniref100_id' in feature.get('ips', {})): ips_subject_id = feature['ips']['uniref100_id'] - inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIPROTKB}:{ips_subject_id}') + inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIREF}:{ips_subject_id}') + elif('uniparc_id' in feature.get('ups', {})): + uniparc_id = feature['ups']['uniparc_id'] + inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIPARC}:{uniparc_id}') elif('uniref90_id' in feature.get('psc', {}) and feature.get('psc', {}).get('valid', False)): psc_subject_id = feature['psc']['uniref90_id'] - inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIPROTKB}:{psc_subject_id}') + inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIREF}:{psc_subject_id}') elif('uniref50_id' in feature.get('pscc', {})): pscc_subject_id = feature['psc']['uniref50_id'] - inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIPROTKB}:{pscc_subject_id}') + inference.append(f'similar to AA sequence:{bc.DB_XREF_UNIREF}:{pscc_subject_id}') qualifiers['inference'] = inference if(cfg.compliant): for note in qualifiers['note']: # move EC numbers from note to EC_number @@ -296,7 +296,7 @@ def revise_product_insdc(feature): def revise_dbxref_insdc(dbxrefs): """Remove INSDC non-compliant DbXrefs.""" - insdc_valid_dbxrefs = [bc.DB_XREF_UNIPROTKB, bc.DB_XREF_GO, bc.DB_XREF_IS, bc.DB_XREF_PFAM, bc.DB_XREF_RFAM] + insdc_valid_dbxrefs = [bc.DB_XREF_UNIPROTKB, bc.DB_XREF_GO, bc.DB_XREF_PFAM, bc.DB_XREF_RFAM] valid_dbxrefs = [] invalid_dbxrefs = [] for dbxref in dbxrefs: diff --git a/bakta/ips.py b/bakta/ips.py index b7fad347..4961b19c 100644 --- a/bakta/ips.py +++ b/bakta/ips.py @@ -76,7 +76,7 @@ def parse_annotation(rec): } db_xrefs = [ 'SO:0001217', - f'{bc.DB_XREF_UNIPROTKB}:{ips[DB_IPS_COL_UNIREF100]}' + f'{bc.DB_XREF_UNIREF}:{ips[DB_IPS_COL_UNIREF100]}' ] # add non-empty PSC annotations and attach database prefixes to identifiers @@ -86,7 +86,7 @@ def parse_annotation(rec): ips[DB_IPS_COL_PRODUCT] = rec[DB_IPS_COL_PRODUCT] if(rec[DB_IPS_COL_UNIREF90]): ips[DB_IPS_COL_UNIREF90] = bc.DB_PREFIX_UNIREF_90 + rec[DB_IPS_COL_UNIREF90] - db_xrefs.append(f'{bc.DB_XREF_UNIPROTKB}:{ips[DB_IPS_COL_UNIREF90]}') + db_xrefs.append(f'{bc.DB_XREF_UNIREF}:{ips[DB_IPS_COL_UNIREF90]}') if(rec[DB_IPS_COL_EC]): ips[DB_IPS_COL_EC] = rec[DB_IPS_COL_EC] ecs = [] diff --git a/bakta/psc.py b/bakta/psc.py index 14e542c1..1ad7c127 100644 --- a/bakta/psc.py +++ b/bakta/psc.py @@ -151,7 +151,7 @@ def parse_annotation(rec): } db_xrefs = [ 'SO:0001217', - f'{bc.DB_XREF_UNIPROTKB}:{psc[DB_PSC_COL_UNIREF90]}' + f'{bc.DB_XREF_UNIREF}:{psc[DB_PSC_COL_UNIREF90]}' ] # add non-empty PSC annotations and attach database prefixes to identifiers @@ -169,7 +169,7 @@ def parse_annotation(rec): psc[DB_PSC_COL_EC] = ecs if(rec[DB_PSC_COL_UNIREF50]): psc[DB_PSC_COL_UNIREF50] = bc.DB_PREFIX_UNIREF_50 + rec[DB_PSC_COL_UNIREF50] - db_xrefs.append(f'{bc.DB_XREF_UNIPROTKB}:{psc[DB_PSC_COL_UNIREF50]}') + db_xrefs.append(f'{bc.DB_XREF_UNIREF}:{psc[DB_PSC_COL_UNIREF50]}') if(rec[DB_PSC_COL_COG_ID]): psc[DB_PSC_COL_COG_ID] = bc.DB_PREFIX_COG + rec[DB_PSC_COL_COG_ID] db_xrefs.append(f'{bc.DB_XREF_COG}:{psc[DB_PSC_COL_COG_ID]}') diff --git a/bakta/pscc.py b/bakta/pscc.py index 08285225..211d6d0b 100644 --- a/bakta/pscc.py +++ b/bakta/pscc.py @@ -67,7 +67,7 @@ def parse_annotation(rec): DB_PSCC_COL_UNIREF50: uniref_full_id, # must not be NULL/None 'db_xrefs': [ 'SO:0001217', - f'{bc.DB_XREF_UNIPROTKB}:{uniref_full_id}' + f'{bc.DB_XREF_UNIREF}:{uniref_full_id}' ] } # add non-empty PSCC annotations and attach database prefixes to identifiers diff --git a/bakta/ups.py b/bakta/ups.py index f488dbaf..974ed6db 100644 --- a/bakta/ups.py +++ b/bakta/ups.py @@ -72,13 +72,13 @@ def parse_annotation(rec): # add non-empty PSC annotations and attach database prefixes to identifiers if(rec[DB_UPS_COL_UNIPARC]): ups[DB_UPS_COL_UNIPARC] = bc.DB_PREFIX_UNIPARC + rec[DB_UPS_COL_UNIPARC] - db_xrefs.append(f'{bc.DB_XREF_UNIPROTKB}:{ups[DB_UPS_COL_UNIPARC]}') + db_xrefs.append(f'{bc.DB_XREF_UNIPARC}:{ups[DB_UPS_COL_UNIPARC]}') if(rec[DB_UPS_COL_REFSEQ_NRP]): ups[DB_UPS_COL_REFSEQ_NRP] = bc.DB_PREFIX_REFSEQ_NRP + rec[DB_UPS_COL_REFSEQ_NRP] db_xrefs.append(f'{bc.DB_XREF_REFSEQ_NRP}:{ups[DB_UPS_COL_REFSEQ_NRP]}') if(rec[DB_UPS_COL_UNIREF100]): ups[DB_UPS_COL_UNIREF100] = bc.DB_PREFIX_UNIREF_100 + rec[DB_UPS_COL_UNIREF100] - db_xrefs.append(f'{bc.DB_XREF_UNIPROTKB}:{ups[DB_UPS_COL_UNIREF100]}') + db_xrefs.append(f'{bc.DB_XREF_UNIREF}:{ups[DB_UPS_COL_UNIREF100]}') ups['db_xrefs'] = db_xrefs return ups