Add a weight_dtype fp8_e4m3fn_fast to the Diffusion Model Loader node.

This is used to load weights in fp8 and use fp8 matrix multiplication.
2025-08-02 23:14:49 +08:00 · 2024-10-09 19:43:17 -04:00
parent 203942c8b2
commit e38c94228b
6 changed files with 27 additions and 5 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -145,7 +145,7 @@ total_ram = psutil.virtual_memory().total / (1024 * 1024)
 logging.info("Total VRAM {:0.0f} MB, total RAM {:0.0f} MB".format(total_vram, total_ram))

 try:
-    logging.info("pytorch version: {}".format(torch.version.__version__))
+    logging.info("pytorch version: {}".format(torch_version))
 except:
    pass

@@ -1065,6 +1065,9 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
    return False

 def supports_fp8_compute(device=None):
+    if not is_nvidia():
+        return False
+
    props = torch.cuda.get_device_properties(device)
    if props.major >= 9:
        return True
@@ -1072,6 +1075,14 @@ def supports_fp8_compute(device=None):
        return False
    if props.minor < 9:
        return False
+
+    if int(torch_version[0]) < 2 or (int(torch_version[0]) == 2 and int(torch_version[2]) < 3):
+        return False
+
+    if WINDOWS:
+        if (int(torch_version[0]) == 2 and int(torch_version[2]) < 4):
+            return False
+
    return True

 def soft_empty_cache(force=False):