Mixed precision diffusion models with scaled fp8.

This change allows supports for diffusion models where all the linears are scaled fp8 while the other weights are the original precision.
2025-08-02 15:04:50 +08:00 · 2024-10-21 18:12:51 -04:00
parent 83ca891118
commit 0075c6d096
5 changed files with 14 additions and 11 deletions
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -334,10 +334,10 @@ def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None

    return scaled_fp8_op

-def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=False):
+def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=None):
    fp8_compute = comfy.model_management.supports_fp8_compute(load_device)
-    if scaled_fp8:
-        return scaled_fp8_ops(fp8_matrix_mult=fp8_compute, scale_input=True)
+    if scaled_fp8 is not None:
+        return scaled_fp8_ops(fp8_matrix_mult=fp8_compute, scale_input=True, override_dtype=scaled_fp8)

    if fp8_compute and (fp8_optimizations or args.fast) and not disable_fast_fp8:
        return fp8_ops