mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-08-02 23:14:49 +08:00
More flexible long clip support.
Add clip g long clip support. Text encoder refactor. Support llama models with different vocab sizes.
This commit is contained in:
@@ -1,30 +1,29 @@
|
||||
from comfy import sd1_clip
|
||||
import os
|
||||
|
||||
class LongClipTokenizer_(sd1_clip.SDTokenizer):
|
||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||
super().__init__(max_length=248, embedding_directory=embedding_directory, tokenizer_data=tokenizer_data)
|
||||
|
||||
class LongClipModel_(sd1_clip.SDClipModel):
|
||||
def __init__(self, *args, **kwargs):
|
||||
textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "long_clipl.json")
|
||||
super().__init__(*args, textmodel_json_config=textmodel_json_config, **kwargs)
|
||||
|
||||
class LongClipTokenizer(sd1_clip.SD1Tokenizer):
|
||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, tokenizer=LongClipTokenizer_)
|
||||
|
||||
class LongClipModel(sd1_clip.SD1ClipModel):
|
||||
def __init__(self, device="cpu", dtype=None, model_options={}, **kwargs):
|
||||
super().__init__(device=device, dtype=dtype, model_options=model_options, clip_model=LongClipModel_, **kwargs)
|
||||
|
||||
def model_options_long_clip(sd, tokenizer_data, model_options):
|
||||
w = sd.get("clip_l.text_model.embeddings.position_embedding.weight", None)
|
||||
if w is None:
|
||||
w = sd.get("clip_g.text_model.embeddings.position_embedding.weight", None)
|
||||
else:
|
||||
model_name = "clip_g"
|
||||
|
||||
if w is None:
|
||||
w = sd.get("text_model.embeddings.position_embedding.weight", None)
|
||||
if w is not None and w.shape[0] == 248:
|
||||
if w is not None:
|
||||
if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
|
||||
model_name = "clip_g"
|
||||
elif "text_model.encoder.layers.1.mlp.fc1.weight" in sd:
|
||||
model_name = "clip_l"
|
||||
else:
|
||||
model_name = "clip_l"
|
||||
|
||||
if w is not None:
|
||||
tokenizer_data = tokenizer_data.copy()
|
||||
model_options = model_options.copy()
|
||||
tokenizer_data["clip_l_tokenizer_class"] = LongClipTokenizer_
|
||||
model_options["clip_l_class"] = LongClipModel_
|
||||
model_config = model_options.get("model_config", {})
|
||||
model_config["max_position_embeddings"] = w.shape[0]
|
||||
model_options["{}_model_config".format(model_name)] = model_config
|
||||
tokenizer_data["{}_max_length".format(model_name)] = w.shape[0]
|
||||
return tokenizer_data, model_options
|
||||
|
Reference in New Issue
Block a user