Memory tweaks.

add DS_Store to gitignore (#4324 )
Automatically link the Comfy CI page on PRs (#4326 )
2025-08-02 23:14:49 +08:00 · 2024-08-12 15:07:11 -04:00 · 2024-08-12 12:32:34 -04:00 · 2024-08-12 12:32:16 -04:00 · 2024-08-12 12:27:54 -04:00 · 2024-08-12 11:50:32 -04:00
7 changed files with 120 additions and 24 deletions
--- a/.github/workflows/pullrequest-ci-run.yml
+++ b/.github/workflows/pullrequest-ci-run.yml
@@ -35,3 +35,19 @@ jobs:
          torch_version: ${{ matrix.torch_version }}
          google_credentials: ${{ secrets.GCS_SERVICE_ACCOUNT_JSON }}
          comfyui_flags: ${{ matrix.flags }}
+          use_prior_commit: 'true'
+  comment:
+    if: ${{ github.event.label.name == 'Run-CI-Test' }}
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+    steps:
+      - uses: actions/github-script@v6
+        with:
+          script: |
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: '(Automated Bot Message) CI Tests are running, you can view the results at https://ci.comfy.org/?branch=${{ github.event.pull_request.number }}%2Fmerge'
+            })
--- a/.gitignore
+++ b/.gitignore
@@ -18,4 +18,5 @@ venv/
 /tests-ui/data/object_info.json
 /user/
 *.log
-web_custom_versions/
+web_custom_versions/
+.DS_Store
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -1,3 +1,21 @@
+"""
+    This file is part of ComfyUI.
+    Copyright (C) 2024 Comfy
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
 import torch
 import logging
 from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel, Timestep
@@ -77,10 +95,13 @@ class BaseModel(torch.nn.Module):
        self.device = device

        if not unet_config.get("disable_unet_model_creation", False):
-            if self.manual_cast_dtype is not None:
-                operations = comfy.ops.manual_cast
+            if model_config.custom_operations is None:
+                if self.manual_cast_dtype is not None:
+                    operations = comfy.ops.manual_cast
+                else:
+                    operations = comfy.ops.disable_weight_init
            else:
-                operations = comfy.ops.disable_weight_init
+                operations = model_config.custom_operations
            self.diffusion_model = unet_model(**unet_config, device=device, operations=operations)
            if comfy.model_management.force_channels_last():
                self.diffusion_model.to(memory_format=torch.channels_last)
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -438,11 +438,11 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
    global vram_state

    inference_memory = minimum_inference_memory()
-    extra_mem = max(inference_memory, memory_required) + 100 * 1024 * 1024
+    extra_mem = max(inference_memory, memory_required + 300 * 1024 * 1024)
    if minimum_memory_required is None:
        minimum_memory_required = extra_mem
    else:
-        minimum_memory_required = max(inference_memory, minimum_memory_required) + 100 * 1024 * 1024
+        minimum_memory_required = max(inference_memory, minimum_memory_required + 300 * 1024 * 1024)

    models = set(models)

@@ -684,6 +684,20 @@ def text_encoder_device():
    else:
        return torch.device("cpu")

+def text_encoder_initial_device(load_device, offload_device, model_size=0):
+    if load_device == offload_device or model_size <= 1024 * 1024 * 1024:
+        return offload_device
+
+    if is_device_mps(load_device):
+        return offload_device
+
+    mem_l = get_free_memory(load_device)
+    mem_o = get_free_memory(offload_device)
+    if mem_l > (mem_o * 0.5) and model_size * 1.2 < mem_l:
+        return load_device
+    else:
+        return offload_device
+
 def text_encoder_dtype(device=None):
    if args.fp8_e4m3fn_text_enc:
        return torch.float8_e4m3fn
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -355,13 +355,14 @@ class ModelPatcher:

        return self.model

-    def lowvram_load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False):
+    def lowvram_load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False, full_load=False):
        mem_counter = 0
        patch_counter = 0
        lowvram_counter = 0
        for n, m in self.model.named_modules():
            lowvram_weight = False
-            if hasattr(m, "comfy_cast_weights"):
+
+            if not full_load and hasattr(m, "comfy_cast_weights"):
                module_mem = comfy.model_management.module_size(m)
                if mem_counter + module_mem >= lowvram_model_memory:
                    lowvram_weight = True
@@ -401,13 +402,16 @@ class ModelPatcher:
                        if weight.device == device_to:
                            continue

-                    self.patch_weight_to_device(weight_key) #TODO: speed this up without OOM
-                    self.patch_weight_to_device(bias_key)
+                    weight_to = None
+                    if full_load:#TODO
+                        weight_to = device_to
+                    self.patch_weight_to_device(weight_key, device_to=weight_to) #TODO: speed this up without OOM
+                    self.patch_weight_to_device(bias_key, device_to=weight_to)
                    m.to(device_to)
                    logging.debug("lowvram: loaded module regularly {} {}".format(n, m))

        if lowvram_counter > 0:
-            logging.info("loaded in lowvram mode {}".format(lowvram_model_memory / (1024 * 1024)))
+            logging.info("loaded partially {} {}".format(lowvram_model_memory / (1024 * 1024), patch_counter))
            self.model.model_lowvram = True
        else:
            logging.info("loaded completely {} {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024)))
@@ -665,12 +669,15 @@ class ModelPatcher:
        return memory_freed

    def partially_load(self, device_to, extra_memory=0):
+        self.unpatch_model(unpatch_weights=False)
+        self.patch_model(patch_weights=False)
+        full_load = False
        if self.model.model_lowvram == False:
            return 0
        if self.model.model_loaded_weight_memory + extra_memory > self.model_size():
-            pass #TODO: Full load
+            full_load = True
        current_used = self.model.model_loaded_weight_memory
-        self.lowvram_load(device_to, lowvram_model_memory=current_used + extra_memory)
+        self.lowvram_load(device_to, lowvram_model_memory=current_used + extra_memory, full_load=full_load)
        return self.model.model_loaded_weight_memory - current_used

    def current_loaded_device(self):
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -62,7 +62,7 @@ def load_lora_for_models(model, clip, lora, strength_model, strength_clip):


 class CLIP:
-    def __init__(self, target=None, embedding_directory=None, no_init=False, tokenizer_data={}):
+    def __init__(self, target=None, embedding_directory=None, no_init=False, tokenizer_data={}, parameters=0):
        if no_init:
            return
        params = target.params.copy()
@@ -71,20 +71,24 @@ class CLIP:

        load_device = model_management.text_encoder_device()
        offload_device = model_management.text_encoder_offload_device()
-        params['device'] = offload_device
        dtype = model_management.text_encoder_dtype(load_device)
        params['dtype'] = dtype
-
+        params['device'] = model_management.text_encoder_initial_device(load_device, offload_device, parameters * model_management.dtype_size(dtype))
        self.cond_stage_model = clip(**(params))

        for dt in self.cond_stage_model.dtypes:
            if not model_management.supports_cast(load_device, dt):
                load_device = offload_device
+                if params['device'] != offload_device:
+                    self.cond_stage_model.to(offload_device)
+                    logging.warning("Had to shift TE back.")

        self.tokenizer = tokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data)
        self.patcher = comfy.model_patcher.ModelPatcher(self.cond_stage_model, load_device=load_device, offload_device=offload_device)
+        if params['device'] == load_device:
+            model_management.load_model_gpu(self.patcher)
        self.layer_idx = None
-        logging.debug("CLIP model load device: {}, offload device: {}".format(load_device, offload_device))
+        logging.debug("CLIP model load device: {}, offload device: {}, current: {}".format(load_device, offload_device, params['device']))

    def clone(self):
        n = CLIP(no_init=True)
@@ -456,7 +460,11 @@ def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DI
        clip_target.clip = comfy.text_encoders.sd3_clip.SD3ClipModel
        clip_target.tokenizer = comfy.text_encoders.sd3_clip.SD3Tokenizer

-    clip = CLIP(clip_target, embedding_directory=embedding_directory)
+    parameters = 0
+    for c in clip_data:
+        parameters += comfy.utils.calculate_parameters(c)
+
+    clip = CLIP(clip_target, embedding_directory=embedding_directory, parameters=parameters)
    for c in clip_data:
        m, u = clip.load_sd(c)
        if len(m) > 0:
@@ -498,15 +506,19 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl

    return (model, clip, vae)

-def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True):
+def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}):
    sd = comfy.utils.load_torch_file(ckpt_path)
-    sd_keys = sd.keys()
+    out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options)
+    if out is None:
+        raise RuntimeError("ERROR: Could not detect model type of: {}".format(ckpt_path))
+    return out
+
+def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}):
    clip = None
    clipvision = None
    vae = None
    model = None
    model_patcher = None
-    clip_target = None

    diffusion_model_prefix = model_detection.unet_prefix_from_state_dict(sd)
    parameters = comfy.utils.calculate_parameters(sd, diffusion_model_prefix)
@@ -515,13 +527,18 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o

    model_config = model_detection.model_config_from_unet(sd, diffusion_model_prefix)
    if model_config is None:
-        raise RuntimeError("ERROR: Could not detect model type of: {}".format(ckpt_path))
+        return None

    unet_weight_dtype = list(model_config.supported_inference_dtypes)
    if weight_dtype is not None:
        unet_weight_dtype.append(weight_dtype)

-    unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype)
+    model_config.custom_operations = model_options.get("custom_operations", None)
+    unet_dtype = model_options.get("weight_dtype", None)
+
+    if unet_dtype is None:
+        unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype)
+
    manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device, model_config.supported_inference_dtypes)
    model_config.set_inference_dtype(unet_dtype, manual_cast_dtype)

@@ -545,7 +562,8 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
        if clip_target is not None:
            clip_sd = model_config.process_clip_state_dict(sd)
            if len(clip_sd) > 0:
-                clip = CLIP(clip_target, embedding_directory=embedding_directory, tokenizer_data=clip_sd)
+                parameters = comfy.utils.calculate_parameters(clip_sd)
+                clip = CLIP(clip_target, embedding_directory=embedding_directory, tokenizer_data=clip_sd, parameters=parameters)
                m, u = clip.load_sd(clip_sd, full_model=True)
                if len(m) > 0:
                    m_filter = list(filter(lambda a: ".logit_scale" not in a and ".transformer.text_projection.weight" not in a, m))
--- a/comfy/supported_models_base.py
+++ b/comfy/supported_models_base.py
@@ -1,3 +1,21 @@
+"""
+    This file is part of ComfyUI.
+    Copyright (C) 2024 Comfy
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
 import torch
 from . import model_base
 from . import utils
@@ -30,6 +48,7 @@ class BASE:
    memory_usage_factor = 2.0

    manual_cast_dtype = None
+    custom_operations = None

    @classmethod
    def matches(s, unet_config, state_dict=None):
Author	SHA1	Message	Date
comfyanonymous	b8ffb2937f	Memory tweaks.	2024-08-12 15:07:11 -04:00
Vladimir Semyonov	ce37c11164	add DS_Store to gitignore (#4324 )	2024-08-12 12:32:34 -04:00
Alex "mcmonkey" Goodwin	b5c3906b38	Automatically link the Comfy CI page on PRs (#4326 ) also use_prior_commit so it doesn't get a janked merge commit instead of the real one	2024-08-12 12:32:16 -04:00
comfyanonymous	5d43e75e5b	Fix some issues with the model sometimes not getting patched.	2024-08-12 12:27:54 -04:00
comfyanonymous	517f4a94e4	Fix some lora loading slowdowns.	2024-08-12 11:50:32 -04:00
comfyanonymous	52a471c5c7	Change name of log.	2024-08-12 10:35:06 -04:00
comfyanonymous	ad76574cb8	Fix some potential issues with the previous commits.	2024-08-12 00:23:29 -04:00
comfyanonymous	9acfe4df41	Support loading directly to vram with CLIPLoader node.	2024-08-12 00:06:01 -04:00
comfyanonymous	9829b013ea	Fix mistake in last commit.	2024-08-12 00:00:17 -04:00
comfyanonymous	5c69cde037	Load TE model straight to vram if certain conditions are met.	2024-08-11 23:52:43 -04:00
comfyanonymous	e9589d6d92	Add a way to set model dtype and ops from load_checkpoint_guess_config.	2024-08-11 08:50:34 -04:00
comfyanonymous	0d82a798a5	Remove the ckpt_path from load_state_dict_guess_config.	2024-08-11 08:37:35 -04:00
ljleb	925fff26fd	alternative to `load_checkpoint_guess_config` that accepts a loaded state dict (#4249 ) * make alternative fn * add back ckpt path as 2nd argument?	2024-08-11 08:36:52 -04:00