Add a weight_dtype fp8_e4m3fn_fast to the Diffusion Model Loader node.

This is used to load weights in fp8 and use fp8 matrix multiplication.
2025-08-02 23:14:49 +08:00 · 2024-10-09 19:43:17 -04:00
parent 203942c8b2
commit e38c94228b
6 changed files with 27 additions and 5 deletions
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -299,7 +299,11 @@ class fp8_ops(manual_cast):
            return torch.nn.functional.linear(input, weight, bias)


-def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False):
+def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False):
+    if comfy.model_management.supports_fp8_compute(load_device):
+        if (fp8_optimizations or args.fast) and not disable_fast_fp8:
+            return fp8_ops
+
    if compute_dtype is None or weight_dtype == compute_dtype:
        return disable_weight_init
    if args.fast and not disable_fast_fp8: