diff --git a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
index 60790c73..18ba1702 100644
--- a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
+++ b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
@@ -2702,6 +2702,51 @@
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 300,
+      "content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 301,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 302,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 303,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 304,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
@@ -3017,6 +3062,11 @@
       "<GENERAL_CHAIN>": 297,
       "<SUBMOLECULAR_ENTITY>": 298,
       "<MUTATED>": 299,
+      "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
+      "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
+      "<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
       "#": 527,
       "%": 528,
       "(": 529,
@@ -8958,4 +9008,4 @@
       "c2n c3n("
     ]
   }
-}
+}
\ No newline at end of file
diff --git a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/cell_attributes_tokenizer.json b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/cell_attributes_tokenizer.json
index a7d4e535..75b888e2 100644
--- a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/cell_attributes_tokenizer.json
+++ b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/cell_attributes_tokenizer.json
@@ -2702,6 +2702,51 @@
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 300,
+      "content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 301,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 302,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 303,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 304,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
@@ -3023,6 +3068,11 @@
       "<GENERAL_CHAIN>": 297,
       "<SUBMOLECULAR_ENTITY>": 298,
       "<MUTATED>": 299,
+      "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
+      "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
+      "<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
       "[CL:0000499]": 3522,
       "[CL:2000060]": 3523,
       "[CL:0000235]": 3524,
@@ -3815,4 +3865,4 @@
     },
     "unk_token": "<UNK>"
   }
-}
+}
\ No newline at end of file
diff --git a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/gene_tokenizer.json b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/gene_tokenizer.json
index 64375885..6f1c56d7 100644
--- a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/gene_tokenizer.json
+++ b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/gene_tokenizer.json
@@ -2702,6 +2702,51 @@
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 300,
+      "content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 301,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 302,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 303,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 304,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
@@ -3023,6 +3068,11 @@
       "<GENERAL_CHAIN>": 297,
       "<SUBMOLECULAR_ENTITY>": 298,
       "<MUTATED>": 299,
+      "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
+      "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
+      "<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
       "[100130093]": 5000,
       "[100133445]": 5001,
       "[100286793]": 5002,
@@ -97930,4 +97980,4 @@
     },
     "unk_token": "<UNK>"
   }
-}
+}
\ No newline at end of file
diff --git a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/t5_tokenizer_AA_special.json b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/t5_tokenizer_AA_special.json
index dbb3d5ee..4322238a 100644
--- a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/t5_tokenizer_AA_special.json
+++ b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_extended_modular_tokenizer/t5_tokenizer_AA_special.json
@@ -2702,6 +2702,51 @@
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 300,
+      "content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 301,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 302,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 303,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 304,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
@@ -3023,6 +3068,11 @@
       "<GENERAL_CHAIN>": 297,
       "<SUBMOLECULAR_ENTITY>": 298,
       "<MUTATED>": 299,
+      "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
+      "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
+      "<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
       "A": 501,
       "B": 502,
       "C": 503,
@@ -3052,4 +3102,4 @@
     },
     "unk_token": "<UNK>"
   }
-}
+}
\ No newline at end of file
diff --git a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
index 60790c73..18ba1702 100644
--- a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
+++ b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
@@ -2702,6 +2702,51 @@
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 300,
+      "content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 301,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 302,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 303,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 304,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
@@ -3017,6 +3062,11 @@
       "<GENERAL_CHAIN>": 297,
       "<SUBMOLECULAR_ENTITY>": 298,
       "<MUTATED>": 299,
+      "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
+      "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
+      "<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
       "#": 527,
       "%": 528,
       "(": 529,
@@ -8958,4 +9008,4 @@
       "c2n c3n("
     ]
   }
-}
+}
\ No newline at end of file
diff --git a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/cell_attributes_tokenizer.json b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/cell_attributes_tokenizer.json
index a7d4e535..75b888e2 100644
--- a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/cell_attributes_tokenizer.json
+++ b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/cell_attributes_tokenizer.json
@@ -2702,6 +2702,51 @@
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 300,
+      "content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 301,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 302,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 303,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 304,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
@@ -3023,6 +3068,11 @@
       "<GENERAL_CHAIN>": 297,
       "<SUBMOLECULAR_ENTITY>": 298,
       "<MUTATED>": 299,
+      "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
+      "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
+      "<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
       "[CL:0000499]": 3522,
       "[CL:2000060]": 3523,
       "[CL:0000235]": 3524,
@@ -3815,4 +3865,4 @@
     },
     "unk_token": "<UNK>"
   }
-}
+}
\ No newline at end of file
diff --git a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/t5_tokenizer_AA_special.json b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/t5_tokenizer_AA_special.json
index dbb3d5ee..4322238a 100644
--- a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/t5_tokenizer_AA_special.json
+++ b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/bmfm_modular_tokenizer/t5_tokenizer_AA_special.json
@@ -2702,6 +2702,51 @@
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 300,
+      "content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 301,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 302,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 303,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 304,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
@@ -3023,6 +3068,11 @@
       "<GENERAL_CHAIN>": 297,
       "<SUBMOLECULAR_ENTITY>": 298,
       "<MUTATED>": 299,
+      "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
+      "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
+      "<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
       "A": 501,
       "B": 502,
       "C": 503,
@@ -3052,4 +3102,4 @@
     },
     "unk_token": "<UNK>"
   }
-}
+}
\ No newline at end of file
diff --git a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
index 60790c73..18ba1702 100644
--- a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
+++ b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
@@ -2702,6 +2702,51 @@
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 300,
+      "content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 301,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 302,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 303,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 304,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
@@ -3017,6 +3062,11 @@
       "<GENERAL_CHAIN>": 297,
       "<SUBMOLECULAR_ENTITY>": 298,
       "<MUTATED>": 299,
+      "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
+      "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
+      "<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
       "#": 527,
       "%": 528,
       "(": 529,
@@ -8958,4 +9008,4 @@
       "c2n c3n("
     ]
   }
-}
+}
\ No newline at end of file
diff --git a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/cell_attributes_tokenizer.json b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/cell_attributes_tokenizer.json
index a7d4e535..75b888e2 100644
--- a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/cell_attributes_tokenizer.json
+++ b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/cell_attributes_tokenizer.json
@@ -2702,6 +2702,51 @@
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 300,
+      "content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 301,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 302,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 303,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 304,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
@@ -3023,6 +3068,11 @@
       "<GENERAL_CHAIN>": 297,
       "<SUBMOLECULAR_ENTITY>": 298,
       "<MUTATED>": 299,
+      "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
+      "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
+      "<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
       "[CL:0000499]": 3522,
       "[CL:2000060]": 3523,
       "[CL:0000235]": 3524,
@@ -3815,4 +3865,4 @@
     },
     "unk_token": "<UNK>"
   }
-}
+}
\ No newline at end of file
diff --git a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/t5_tokenizer_AA_special.json b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/t5_tokenizer_AA_special.json
index dbb3d5ee..4322238a 100644
--- a/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/t5_tokenizer_AA_special.json
+++ b/fusedrug/data/tokenizer/modulartokenizer/pretrained_tokenizers/modular_AA_SMILES_single_path/t5_tokenizer_AA_special.json
@@ -2702,6 +2702,51 @@
       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 300,
+      "content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 301,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 302,
+      "content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 303,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 304,
+      "content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
@@ -3023,6 +3068,11 @@
       "<GENERAL_CHAIN>": 297,
       "<SUBMOLECULAR_ENTITY>": 298,
       "<MUTATED>": 299,
+      "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
+      "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
+      "<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
+      "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
       "A": 501,
       "B": 502,
       "C": 503,
@@ -3052,4 +3102,4 @@
     },
     "unk_token": "<UNK>"
   }
-}
+}
\ No newline at end of file