1
mirror of https://github.com/comfyanonymous/ComfyUI.git synced 2025-08-02 15:04:50 +08:00

Basic Genmo Mochi video model support.

To use:
"Load CLIP" node with t5xxl + type mochi
"Load Diffusion Model" node with the mochi dit file.
"Load VAE" with the mochi vae file.

EmptyMochiLatentVideo node for the latent.
euler + linear_quadratic in the KSampler node.
This commit is contained in:
comfyanonymous
2024-10-26 06:54:00 -04:00
parent c3ffbae067
commit 5cbb01bc2f
18 changed files with 1677 additions and 24 deletions

View File

@@ -145,6 +145,34 @@ def detect_unet_config(state_dict, key_prefix):
dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
return dit_config
if '{}t5_yproj.weight'.format(key_prefix) in state_dict_keys: #Genmo mochi preview
dit_config = {}
dit_config["image_model"] = "mochi_preview"
dit_config["depth"] = 48
dit_config["patch_size"] = 2
dit_config["num_heads"] = 24
dit_config["hidden_size_x"] = 3072
dit_config["hidden_size_y"] = 1536
dit_config["mlp_ratio_x"] = 4.0
dit_config["mlp_ratio_y"] = 4.0
dit_config["learn_sigma"] = False
dit_config["in_channels"] = 12
dit_config["qk_norm"] = True
dit_config["qkv_bias"] = False
dit_config["out_bias"] = True
dit_config["attn_drop"] = 0.0
dit_config["patch_embed_bias"] = True
dit_config["posenc_preserve_area"] = True
dit_config["timestep_mlp_bias"] = True
dit_config["attend_to_padding"] = False
dit_config["timestep_scale"] = 1000.0
dit_config["use_t5"] = True
dit_config["t5_feat_dim"] = 4096
dit_config["t5_token_length"] = 256
dit_config["rope_theta"] = 10000.0
return dit_config
if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys:
return None