Add --use-flash-attention flag. (#7223)

* Add --use-flash-attention flag. This is useful on AMD systems, as FA builds are still 10% faster than Pytorch cross-attention.
2025-08-02 23:14:49 +08:00 · 2025-03-14 08:22:41 +01:00
parent 35504e2f93
commit 7aceb9f91c
3 changed files with 64 additions and 0 deletions
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -106,6 +106,7 @@ attn_group.add_argument("--use-split-cross-attention", action="store_true", help
 attn_group.add_argument("--use-quad-cross-attention", action="store_true", help="Use the sub-quadratic cross attention optimization . Ignored when xformers is used.")
 attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.")
 attn_group.add_argument("--use-sage-attention", action="store_true", help="Use sage attention.")
+attn_group.add_argument("--use-flash-attention", action="store_true", help="Use FlashAttention.")

 parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.")