1
mirror of https://github.com/comfyanonymous/ComfyUI.git synced 2025-08-02 23:14:49 +08:00

Rename LLAMATokenizer to SPieceTokenizer.

This commit is contained in:
comfyanonymous
2024-07-22 12:21:45 -04:00
parent b2c995f623
commit 14764aa2e2
2 changed files with 4 additions and 4 deletions

View File

@@ -0,0 +1,22 @@
import os
class SPieceTokenizer:
@staticmethod
def from_pretrained(path):
return SPieceTokenizer(path)
def __init__(self, tokenizer_path):
import sentencepiece
self.tokenizer = sentencepiece.SentencePieceProcessor(model_file=tokenizer_path)
self.end = self.tokenizer.eos_id()
def get_vocab(self):
out = {}
for i in range(self.tokenizer.get_piece_size()):
out[self.tokenizer.id_to_piece(i)] = i
return out
def __call__(self, string):
out = self.tokenizer.encode(string)
out += [self.end]
return {"input_ids": out}