Skip to content

Commit

Permalink
TCR chain type tokens: alpha, gamma, delta and their CDR3 regions
Browse files Browse the repository at this point in the history
  • Loading branch information
VADIM RATNER [email protected] committed Jun 2, 2024
1 parent 2a14ae4 commit 14e1b7b
Show file tree
Hide file tree
Showing 10 changed files with 510 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3017,6 +3062,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"#": 527,
"%": 528,
"(": 529,
Expand Down Expand Up @@ -8958,4 +9008,4 @@
"c2n c3n("
]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3023,6 +3068,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"[CL:0000499]": 3522,
"[CL:2000060]": 3523,
"[CL:0000235]": 3524,
Expand Down Expand Up @@ -3815,4 +3865,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3023,6 +3068,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"[100130093]": 5000,
"[100133445]": 5001,
"[100286793]": 5002,
Expand Down Expand Up @@ -97930,4 +97980,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3023,6 +3068,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"A": 501,
"B": 502,
"C": 503,
Expand Down Expand Up @@ -3052,4 +3102,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3017,6 +3062,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"#": 527,
"%": 528,
"(": 529,
Expand Down Expand Up @@ -8958,4 +9008,4 @@
"c2n c3n("
]
}
}
}
Loading

0 comments on commit 14e1b7b

Please sign in to comment.