Skip to content

Commit

Permalink
TCR chain type tokens: alpha, gamma, delta and their CDR3 regions
Browse files Browse the repository at this point in the history
  • Loading branch information
VADIM RATNER [email protected] committed Jun 2, 2024
1 parent 14e1b7b commit b86dc56
Show file tree
Hide file tree
Showing 11 changed files with 16 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9008,4 +9008,4 @@
"c2n c3n("
]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3865,4 +3865,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -97980,4 +97980,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3102,4 +3102,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9008,4 +9008,4 @@
"c2n c3n("
]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3865,4 +3865,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3102,4 +3102,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9008,4 +9008,4 @@
"c2n c3n("
]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3865,4 +3865,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3102,4 +3102,4 @@
},
"unk_token": "<UNK>"
}
}
}
12 changes: 6 additions & 6 deletions fusedrug/data/tokenizer/modulartokenizer/special_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@
"MOLECULAR_ENTITY_ANTIBODY_HEAVY_CHAIN_CDR3",
"MOLECULAR_ENTITY_TCR_ALPHA_CHAIN", # TCR "light" chain - only V, J and C segments (variable region)
"MOLECULAR_ENTITY_TCR_BETA_VDJ", # TCR "heavy" chain - V(ariable), D(iversity), and J(oining) segments, as well as the C(onstant) segment
"MOLECULAR_ENTITY_TCR_BETA_CDR3", # TCR beta chain CDR3 region
"MOLECULAR_ENTITY_TCR_GAMMA_VAR", # TCR gamma chain variable region
"MOLECULAR_ENTITY_TCR_DELTA_VAR", # TCR delta chain variable region
"MOLECULAR_ENTITY_TCR_ALPHA_CDR3", # TCR alpha chain CDR3 region
"MOLECULAR_ENTITY_TCR_GAMMA_CDR3", # TCR gamma chain CDR3 region
"MOLECULAR_ENTITY_TCR_DELTA_CDR3", # TCR delta chain CDR3 region
"MOLECULAR_ENTITY_TCR_BETA_CDR3", # TCR beta chain CDR3 region
"MOLECULAR_ENTITY_TCR_GAMMA_VAR", # TCR gamma chain variable region
"MOLECULAR_ENTITY_TCR_DELTA_VAR", # TCR delta chain variable region
"MOLECULAR_ENTITY_TCR_ALPHA_CDR3", # TCR alpha chain CDR3 region
"MOLECULAR_ENTITY_TCR_GAMMA_CDR3", # TCR gamma chain CDR3 region
"MOLECULAR_ENTITY_TCR_DELTA_CDR3", # TCR delta chain CDR3 region
"TARGETED_ANTIBODY_DESIGN_ENCODER_ONLY_MODE", # A prefix to our T5 model to inform it that it will run in "encoder only" mode (so only
# the encoder-stack is used, plus the encoder-output-tokens-classification-head)
"DECODER_START",
Expand Down

0 comments on commit b86dc56

Please sign in to comment.