Skip to content

Commit

Permalink
convert_hf : reduce usages of UNKNOWN for InternLM2
Browse files Browse the repository at this point in the history
This makes the changes from #8321 more consistent
with the other changes made here.
  • Loading branch information
compilade committed Jul 10, 2024
1 parent afa6119 commit 1caa20f
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2189,7 +2189,7 @@ def set_vocab(self):
toktype = SentencePieceTokenTypes.BYTE
# take care of ununsed raw token
if piece.startswith('[UNUSED'):
toktype = SentencePieceTokenTypes.UNKNOWN
toktype = SentencePieceTokenTypes.UNUSED

tokens.append(text)
scores.append(score)
Expand Down Expand Up @@ -2219,7 +2219,7 @@ def set_vocab(self):
if token == chat_eos_token:
chat_eos_token_id = token_id
token = token.encode("utf-8")
if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN:
if toktypes[token_id] != SentencePieceTokenTypes.UNUSED:
assert(tokens[token_id] == token)
tokens[token_id] = token
scores[token_id] = -1000.0
Expand All @@ -2238,7 +2238,7 @@ def set_vocab(self):
if token == chat_eos_token:
chat_eos_token_id = token_id
token = token.encode("utf-8")
if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN:
if toktypes[token_id] != SentencePieceTokenTypes.UNUSED:
assert(tokens[token_id] == token)
tokens[token_id] = token
scores[token_id] = -1000.0
Expand Down

0 comments on commit 1caa20f

Please sign in to comment.