mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-08-02 23:14:49 +08:00
Add --fast argument to enable experimental optimizations.
Optimizations that might break things/lower quality will be put behind this flag first and might be enabled by default in the future. Currently the only optimization is float8_e4m3fn matrix multiplication on 4000/ADA series Nvidia cards or later. If you have one of these cards you will see a speed boost when using fp8_e4m3fn flux for example.
This commit is contained in:
@@ -1048,6 +1048,16 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
|
||||
|
||||
return False
|
||||
|
||||
def supports_fp8_compute(device=None):
|
||||
props = torch.cuda.get_device_properties(device)
|
||||
if props.major >= 9:
|
||||
return True
|
||||
if props.major < 8:
|
||||
return False
|
||||
if props.minor < 9:
|
||||
return False
|
||||
return True
|
||||
|
||||
def soft_empty_cache(force=False):
|
||||
global cpu_state
|
||||
if cpu_state == CPUState.MPS:
|
||||
|
Reference in New Issue
Block a user