Skip to content

Commit

Permalink
TCR chain type special tokens (#127)
Browse files Browse the repository at this point in the history
Co-authored-by: VADIM RATNER [email protected] <[email protected]>
Co-authored-by: VADIM RATNER [email protected] <[email protected]>
Co-authored-by: VADIM RATNER [email protected] <[email protected]>
Co-authored-by: VADIM RATNER [email protected] <[email protected]>
Co-authored-by: VADIM RATNER [email protected] <[email protected]>
Co-authored-by: VADIM RATNER [email protected] <[email protected]>
Co-authored-by: VADIM RATNER [email protected] <[email protected]>
Co-authored-by: VADIM RATNER [email protected] <[email protected]>
  • Loading branch information
9 people authored Jun 2, 2024
1 parent 4a1208a commit 7fc56d9
Show file tree
Hide file tree
Showing 11 changed files with 507 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3017,6 +3062,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"#": 527,
"%": 528,
"(": 529,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3023,6 +3068,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"[CL:0000499]": 3522,
"[CL:2000060]": 3523,
"[CL:0000235]": 3524,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3023,6 +3068,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"[100130093]": 5000,
"[100133445]": 5001,
"[100286793]": 5002,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3023,6 +3068,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"A": 501,
"B": 502,
"C": 503,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3017,6 +3062,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"#": 527,
"%": 528,
"(": 529,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,51 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3023,6 +3068,11 @@
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"[CL:0000499]": 3522,
"[CL:2000060]": 3523,
"[CL:0000235]": 3524,
Expand Down
Loading

0 comments on commit 7fc56d9

Please sign in to comment.