1
mirror of https://github.com/comfyanonymous/ComfyUI.git synced 2025-08-02 23:14:49 +08:00

Implement Cosmos Image/Video to World (Video) diffusion models.

Use CosmosImageToVideoLatent to set the input image/video.
This commit is contained in:
comfyanonymous
2025-01-14 05:14:10 -05:00
parent 1f1c7b7b56
commit 3aaabb12d4
6 changed files with 84 additions and 12 deletions

View File

@@ -534,7 +534,7 @@ class VAE:
def encode(self, pixel_samples):
pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
pixel_samples = pixel_samples.movedim(-1, 1)
if self.latent_dim == 3:
if self.latent_dim == 3 and pixel_samples.ndim < 5:
pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
try:
memory_used = self.memory_used_encode(pixel_samples.shape, self.vae_dtype)