Skip to content

Commit

Permalink
Fix unknown tokens (huggingface#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
xenova committed Mar 9, 2023
1 parent 55b27fc commit e98915d
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/tokenizers.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ class TokenizerModel extends Callable {
throw Error("encode should be implemented in subclass.")
}
convert_tokens_to_ids(tokens) {
return tokens.map(t => this.tokens_to_ids[t] ?? this.config.unk_token_id);
return tokens.map(t => this.tokens_to_ids[t] ?? this.unk_token_id);
}

convert_ids_to_tokens(ids) {
return ids.map(i => this.vocab[i] ?? this.config.unk_token);
return ids.map(i => this.vocab[i] ?? this.unk_token);
}
}

Expand Down

0 comments on commit e98915d

Please sign in to comment.