Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev-GCGI-1446_fusion-total #498

Merged
merged 4 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/lib/djerba/plugins/fusion/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,18 @@ def sort_by_actionable_level(row):
total_fusion_genes = fus_reader.get_total_fusion_genes()
gene_pair_fusions = fus_reader.get_fusions()
if gene_pair_fusions is not None:

outputs = fus_reader.fusions_to_json(gene_pair_fusions, wrapper.get_my_string(fc.ONCOTREE_CODE))
[rows, gene_info, treatment_opts] = outputs

#sort by OncoKB level
rows = sorted(rows, key=sort_by_actionable_level)
rows = oncokb_levels.filter_reportable(rows)
unique_rows = set(map(lambda x: x['fusion'], rows))

results = {
fc.TOTAL_VARIANTS: total_fusion_genes,
fc.CLINICALLY_RELEVANT_VARIANTS: fus_reader.get_total_oncokb_fusions(),
fc.CLINICALLY_RELEVANT_VARIANTS: len(unique_rows),
fc.NCCN_RELEVANT_VARIANTS: fus_reader.get_total_nccn_fusions(),
fc.BODY: rows
}
Expand Down
2 changes: 1 addition & 1 deletion src/lib/djerba/plugins/fusion/test/plugin_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test(self):
params = {
self.INI: self.INI_NAME,
self.JSON: self.JSON_NAME,
self.MD5: '3e5bb853abd4a76dbd45414ea1a9af52'
self.MD5: '6b80957262c258a0a0641b9ab9652725'
}
self.run_basic_test(input_dir, params, 'fusion', logging.ERROR, work_dir)

Expand Down
58 changes: 39 additions & 19 deletions src/lib/djerba/plugins/fusion/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,51 +42,68 @@ def __init__(self, input_dir, log_level=logging.WARNING, log_path=None):
[fusions, self.total_fusion_genes, self.total_oncokb_fusions, self.total_nccn_fusions] = self._collate_row_data(fusion_data, annotations)
# sort the fusions by fusion ID
self.fusions = sorted(fusions, key=lambda f: f.get_fusion_id_new())

def _collate_row_data(self, fusion_data, annotations):
fusions = []
fusion_genes = set()
fusions = [] # List to store valid fusion entries
fusion_genes = set() # Set to track distinct genes involved in fusions
self.logger.debug("Starting to collate fusion table data.")
intragenic = 0
nccn_fusion_total = 0
NCCN_fusions = set()
intragenic = 0 # Counter for intragenic fusions
nccn_fusion_total = 0 # Counter for fusions rescued by NCCN annotation
NCCN_fusions = set() # Set to store NCCN-annotated fusions

# Read NCCN-annotated fusions from a file
with open(os.path.join(self.input_dir, fc.DATA_FUSIONS_NCCN_ANNOTATED)) as data_file:
for row in csv.DictReader(data_file, delimiter="\t"):
NCCN_fusions.add(row['Fusion'])
NCCN_fusions.add(row['Fusion']) # Add each fusion ID to the set

# Iterate over all fusion IDs in fusion_data
for fusion_id in fusion_data.keys():
gene2_exists = True
if len(fusion_data[fusion_id])==1:
# skip intragenic fusions, but add to the gene count
gene2_exists = True # Assume a second gene exists initially
# Case: Intragenic fusions (only one gene involved)
if len(fusion_data[fusion_id]) == 1:
# Skip intragenic fusions, but add to the gene count
fusion_genes.add(fusion_data[fusion_id][0][fc.HUGO_SYMBOL])
if fusion_id in NCCN_fusions:
self.logger.debug("Fusion {0} rescued by NCCN annotation".format(fusion))
gene2_exists = False
# If the fusion is in the NCCN-annotated list, it's "rescued"
self.logger.debug("Fusion {0} rescued by NCCN annotation".format(fusion_id))
gene2_exists = False # No second gene; marked as "Intergenic"
gene2 = "Intergenic"
nccn_fusion_total += 1
nccn_fusion_total += 1 # Increment NCCN-rescued fusion count
else:
intragenic += 1
intragenic += 1 # Increment intragenic count and skip processing
continue
elif len(fusion_data[fusion_id]) >= 3:
# Error case: More than two genes for a single fusion ID
msg = "More than 2 fusions with the same name: {0}".format(fusion_id)
self.logger.error(msg)
raise RuntimeError(msg)

# Normal case: Valid fusion data with one or two genes
gene1 = fusion_data[fusion_id][0][fc.HUGO_SYMBOL]
if gene2_exists:
# If a second gene exists, retrieve it
gene2 = fusion_data[fusion_id][1][fc.HUGO_SYMBOL]
# Add both genes to the set
fusion_genes.add(gene1)
fusion_genes.add(gene2)

# Case: Two genes exist for the fusion
if gene2_exists:
for row_input in annotations[fusion_id]:
effect = row_input['MUTATION_EFFECT']
level = oncokb_levels.parse_oncokb_level(row_input)
effect = row_input['MUTATION_EFFECT'] # Get mutation effect
level = oncokb_levels.parse_oncokb_level(row_input) # Parse oncokb level
else:
# Case: No second gene (rescued by NCCN)
effect = "Undetermined"
level = "P"

# If the level is valid, add therapies information
if level not in ['Unknown', 'NA']:
if gene2_exists:
therapies = oncokb_levels.parse_actionable_therapies(row_input)
else:
therapies = {"P": "Prognostic"}
# Append a new fusion object to the list
fusions.append(
fusion(
fusion_id,
Expand All @@ -101,13 +118,16 @@ def _collate_row_data(self, fusion_data, annotations):
)
)
total = len(fusions) - nccn_fusion_total
total_fusion_genes = len(fusion_genes)
msg = "Finished collating fusion table data. "+\
"Found {0} fusion rows for {1} distinct genes; ".format(total, total_fusion_genes)+\
total_fusion_genes = len(fusion_genes) # Count distinct genes

msg = "Finished collating fusion table data. " + \
"Found {0} fusion rows for {1} distinct genes; ".format(total, total_fusion_genes) + \
"excluded {0} intragenic rows.".format(intragenic)
self.logger.info(msg)

for fusion_row in fusions:
self.logger.debug("Fusions: {0}".format(fusion_row.get_genes()))

return [fusions, total_fusion_genes, total, nccn_fusion_total]

def build_treatment_entries(self, fusion, therapies, oncotree_code):
Expand Down
Loading