ComfyUI version 0.3.47

Make wan2.2 5B i2v take a lot less memory. (#9102 )
Extra reserved vram on large cards on windows. (#9093 )
2025-08-02 15:04:50 +08:00 · 2025-07-29 20:08:25 -04:00 · 2025-07-29 19:44:18 -04:00 · 2025-07-29 04:07:45 -04:00 · 2025-07-28 14:48:19 -04:00
6 changed files with 25 additions and 12 deletions
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@@ -146,6 +146,15 @@ WAN_CROSSATTENTION_CLASSES = {
 }


+def repeat_e(e, x):
+    repeats = 1
+    if e.shape[1] > 1:
+        repeats = x.shape[1] // e.shape[1]
+    if repeats == 1:
+        return e
+    return torch.repeat_interleave(e, repeats, dim=1)
+
+
 class WanAttentionBlock(nn.Module):

    def __init__(self,
@@ -201,6 +210,7 @@ class WanAttentionBlock(nn.Module):
            freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2]
        """
        # assert e.dtype == torch.float32
+
        if e.ndim < 4:
            e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e).chunk(6, dim=1)
        else:
@@ -209,15 +219,15 @@ class WanAttentionBlock(nn.Module):

        # self-attention
        y = self.self_attn(
-            self.norm1(x) * (1 + e[1]) + e[0],
+            self.norm1(x) * (1 + repeat_e(e[1], x)) + repeat_e(e[0], x),
            freqs)

-        x = x + y * e[2]
+        x = x + y * repeat_e(e[2], x)

        # cross-attention & ffn
        x = x + self.cross_attn(self.norm3(x), context, context_img_len=context_img_len)
-        y = self.ffn(self.norm2(x) * (1 + e[4]) + e[3])
-        x = x + y * e[5]
+        y = self.ffn(self.norm2(x) * (1 + repeat_e(e[4], x)) + repeat_e(e[3], x))
+        x = x + y * repeat_e(e[5], x)
        return x


@@ -331,7 +341,8 @@ class Head(nn.Module):
            e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e.unsqueeze(1)).chunk(2, dim=1)
        else:
            e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device).unsqueeze(0) + e.unsqueeze(2)).unbind(2)
-        x = (self.head(self.norm(x) * (1 + e[1]) + e[0]))
+
+        x = (self.head(self.norm(x) * (1 + repeat_e(e[1], x)) + repeat_e(e[0], x)))
        return x


--- a/comfy/ldm/wan/vae2_2.py
+++ b/comfy/ldm/wan/vae2_2.py
@@ -136,7 +136,7 @@ class ResidualBlock(nn.Module):
            if in_dim != out_dim else nn.Identity())

    def forward(self, x, feat_cache=None, feat_idx=[0]):
-        h = self.shortcut(x)
+        old_x = x
        for layer in self.residual:
            if isinstance(layer, CausalConv3d) and feat_cache is not None:
                idx = feat_idx[0]
@@ -156,7 +156,7 @@ class ResidualBlock(nn.Module):
                feat_idx[0] += 1
            else:
                x = layer(x)
-        return x + h
+        return x + self.shortcut(old_x)


 def patchify(x, patch_size):
@@ -327,7 +327,7 @@ class Down_ResidualBlock(nn.Module):
        self.downsamples = nn.Sequential(*downsamples)

    def forward(self, x, feat_cache=None, feat_idx=[0]):
-        x_copy = x.clone()
+        x_copy = x
        for module in self.downsamples:
            x = module(x, feat_cache, feat_idx)

@@ -369,7 +369,7 @@ class Up_ResidualBlock(nn.Module):
        self.upsamples = nn.Sequential(*upsamples)

    def forward(self, x, feat_cache=None, feat_idx=[0], first_chunk=False):
-        x_main = x.clone()
+        x_main = x
        for module in self.upsamples:
            x_main = module(x_main, feat_cache, feat_idx)
        if self.avg_shortcut is not None:
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -1202,7 +1202,7 @@ class WAN22(BaseModel):
    def process_timestep(self, timestep, x, denoise_mask=None, **kwargs):
        if denoise_mask is None:
            return timestep
-        temp_ts = (torch.mean(denoise_mask[:, :, :, ::2, ::2], dim=1, keepdim=True) * timestep.view([timestep.shape[0]] + [1] * (denoise_mask.ndim - 1))).reshape(timestep.shape[0], -1)
+        temp_ts = (torch.mean(denoise_mask[:, :, :, :, :], dim=(1, 3, 4), keepdim=True) * timestep.view([timestep.shape[0]] + [1] * (denoise_mask.ndim - 1))).reshape(timestep.shape[0], -1)
        return temp_ts

    def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -529,6 +529,8 @@ WINDOWS = any(platform.win32_ver())
 EXTRA_RESERVED_VRAM = 400 * 1024 * 1024
 if WINDOWS:
    EXTRA_RESERVED_VRAM = 600 * 1024 * 1024 #Windows is higher because of the shared vram issue
+    if total_vram > (15 * 1024):  # more extra reserved vram on 16GB+ cards
+        EXTRA_RESERVED_VRAM += 100 * 1024 * 1024

 if args.reserve_vram is not None:
    EXTRA_RESERVED_VRAM = args.reserve_vram * 1024 * 1024 * 1024
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.46"
+__version__ = "0.3.47"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.46"
+version = "0.3.47"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"
Author	SHA1	Message	Date
comfyanonymous	2f74e17975	ComfyUI version 0.3.47	2025-07-29 20:08:25 -04:00
comfyanonymous	dca6bdd4fa	Make wan2.2 5B i2v take a lot less memory. (#9102 )	2025-07-29 19:44:18 -04:00
comfyanonymous	7d593baf91	Extra reserved vram on large cards on windows. (#9093 )	2025-07-29 04:07:45 -04:00
comfyanonymous	c60dc4177c	Remove unecessary clones in the wan2.2 VAE. (#9083 )	2025-07-28 14:48:19 -04:00