Add support for API Nodes in ComfyUI. (#7726)

* Add Ideogram generate node. * Add staging api. * COMFY_API_NODE_NAME node property * switch to boolean flag and use original node name for id * add optional to type * Add API_NODE and common error for missing auth token (#5) * Add Minimax Video Generation + Async Task queue polling example (#6) * [Minimax] Show video preview and embed workflow in ouput (#7) * [API Nodes] Send empty request body instead of empty dictionary. (#8) * Fixed: removed function from rebase. * Add pydantic. * Remove uv.lock * Remove polling operations. * Update stubs workflow. * Remove polling comments. * Update stubs. * Use pydantic v2. * Use pydantic v2. * Add basic OpenAITextToImage node * Add. * convert image to tensor. * Improve types. * Ruff. * Push tests. * Handle multi-form data. - Don't set content-type for multi-part/form - Use data field instead of JSON * Change to api.comfy.org * Handle error code 409. * separate out nodes per openai model * Update error message. * fix wrong output type * re-categorize nodes, remove ideogram (for now) * oops, fix mappings * fix ruff * Update frontend to 1.17.9 * embargo lift rename nodes * remove unused autogenerated model code * fix API type error and add b64 support for 4o * fix ruff * oops forgot mask scaling code * Remove unused types. --------- Co-authored-by: bymyself <cbyrne@comfy.org> Co-authored-by: Yoland Y <4950057+yoland68@users.noreply.github.com> Co-authored-by: thot-experiment <thot@thiic.cc>
2025-08-03 07:26:31 +08:00 · 2025-04-23 12:38:34 -07:00
parent 154f2911aa
commit dea1c7474a
8 changed files with 979 additions and 2 deletions
--- a/comfy_api_nodes/nodes_api.py
+++ b/comfy_api_nodes/nodes_api.py
@@ -0,0 +1,425 @@
+import io
+from inspect import cleandoc
+
+from comfy.utils import common_upscale
+from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeDict
+from comfy_api_nodes.apis import (
+    OpenAIImageGenerationRequest,
+    OpenAIImageEditRequest,
+    OpenAIImageGenerationResponse
+)
+from comfy_api_nodes.apis.client import ApiEndpoint, HttpMethod, SynchronousOperation
+
+import numpy as np
+from PIL import Image
+import requests
+import torch
+import math
+import base64
+
+def downscale_input(image):
+    samples = image.movedim(-1,1)
+    #downscaling input images to roughly the same size as the outputs
+    total = int(1536 * 1024)
+    scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
+    if scale_by >= 1:
+        return image
+    width = round(samples.shape[3] * scale_by)
+    height = round(samples.shape[2] * scale_by)
+
+    s = common_upscale(samples, width, height, "lanczos", "disabled")
+    s = s.movedim(1,-1)
+    return s
+
+def validate_and_cast_response (response):
+    # validate raw JSON response
+    data = response.data
+    if not data or len(data) == 0:
+        raise Exception("No images returned from API endpoint")
+
+    # Get base64 image data
+    image_url = data[0].url
+    b64_data = data[0].b64_json
+    if not image_url and not b64_data:
+        raise Exception("No image was generated in the response")
+
+    if b64_data:
+        img_data = base64.b64decode(b64_data)
+        img = Image.open(io.BytesIO(img_data))
+
+    elif image_url:
+        img_response = requests.get(image_url)
+        if img_response.status_code != 200:
+            raise Exception("Failed to download the image")
+        img = Image.open(io.BytesIO(img_response.content))
+
+    img = img.convert("RGB")  # Ensure RGB format
+
+    # Convert to numpy array, normalize to float32 between 0 and 1
+    img_array = np.array(img).astype(np.float32) / 255.0
+
+    # Convert to torch tensor and add batch dimension
+    return torch.from_numpy(img_array)[None,]
+
+class OpenAIDalle2(ComfyNodeABC):
+    """
+    Generates images synchronously via OpenAI's DALL·E 2 endpoint.
+
+    Uses the proxy at /proxy/openai/images/generations. Returned URLs are short‑lived,
+    so download or cache results if you need to keep them.
+    """
+    def __init__(self):
+        pass
+
+    @classmethod
+    def INPUT_TYPES(cls) -> InputTypeDict:
+        return {
+            "required": {
+                "prompt": (IO.STRING, {
+                    "multiline": True,
+                    "default": "",
+                    "tooltip": "Text prompt for DALL·E",
+                }),
+            },
+            "optional": {
+                "seed": (IO.INT, {
+                    "default": 0,
+                    "min": 0,
+                    "max": 2**31-1,
+                    "step": 1,
+                    "display": "number",
+                    "tooltip": "not implemented yet in backend",
+                }),
+                "size": (IO.COMBO, {
+                    "options": ["256x256", "512x512", "1024x1024"],
+                    "default": "1024x1024",
+                    "tooltip": "Image size",
+                }),
+                "n": (IO.INT, {
+                    "default": 1,
+                    "min": 1,
+                    "max": 8,
+                    "step": 1,
+                    "display": "number",
+                    "tooltip": "How many images to generate",
+                }),
+                "image": (IO.IMAGE, {
+                    "default": None,
+                    "tooltip": "Optional reference image for image editing.",
+                }),
+                "mask": (IO.MASK, {
+                    "default": None,
+                    "tooltip": "Optional mask for inpainting (white areas will be replaced)",
+                }),
+            },
+            "hidden": {
+                "auth_token": "AUTH_TOKEN_COMFY_ORG"
+            }
+        }
+
+    RETURN_TYPES = (IO.IMAGE,)
+    FUNCTION = "api_call"
+    CATEGORY = "api node"
+    DESCRIPTION = cleandoc(__doc__ or "")
+    API_NODE = True
+
+    def api_call(self, prompt, seed=0, image=None, mask=None, n=1, size="1024x1024", auth_token=None):
+        model = "dall-e-2"
+        path = "/proxy/openai/images/generations"
+        request_class = OpenAIImageGenerationRequest
+        img_binary = None
+
+        if image is not None and mask is not None:
+            path = "/proxy/openai/images/edits"
+            request_class = OpenAIImageEditRequest
+
+            input_tensor = image.squeeze().cpu()
+            height, width, channels = input_tensor.shape
+            rgba_tensor = torch.ones(height, width, 4, device="cpu")
+            rgba_tensor[:, :, :channels] = input_tensor
+
+            if mask.shape[1:] != image.shape[1:-1]:
+                raise Exception("Mask and Image must be the same size")
+            rgba_tensor[:,:,3] = (1-mask.squeeze().cpu())
+
+            rgba_tensor = downscale_input(rgba_tensor.unsqueeze(0)).squeeze()
+
+            image_np = (rgba_tensor.numpy() * 255).astype(np.uint8)
+            img = Image.fromarray(image_np)
+            img_byte_arr = io.BytesIO()
+            img.save(img_byte_arr, format='PNG')
+            img_byte_arr.seek(0)
+            img_binary = img_byte_arr#.getvalue()
+            img_binary.name = "image.png"
+        elif image is not None or mask is not None:
+            raise Exception("Dall-E 2 image editing requires an image AND a mask")
+
+        # Build the operation
+        operation = SynchronousOperation(
+            endpoint=ApiEndpoint(
+                path=path,
+                method=HttpMethod.POST,
+                request_model=request_class,
+                response_model=OpenAIImageGenerationResponse
+            ),
+            request=request_class(
+                model=model,
+                prompt=prompt,
+                n=n,
+                size=size,
+                seed=seed,
+            ),
+            files={
+                "image": img_binary,
+            } if img_binary else None,
+            auth_token=auth_token
+        )
+
+        response = operation.execute()
+
+        img_tensor = validate_and_cast_response(response)
+        return (img_tensor,)
+
+class OpenAIDalle3(ComfyNodeABC):
+    """
+    Generates images synchronously via OpenAI's DALL·E 3 endpoint.
+
+    Uses the proxy at /proxy/openai/images/generations. Returned URLs are short‑lived,
+    so download or cache results if you need to keep them.
+    """
+    def __init__(self):
+        pass
+
+    @classmethod
+    def INPUT_TYPES(cls) -> InputTypeDict:
+        return {
+            "required": {
+                "prompt": (IO.STRING, {
+                    "multiline": True,
+                    "default": "",
+                    "tooltip": "Text prompt for DALL·E",
+                }),
+            },
+            "optional": {
+                "seed": (IO.INT, {
+                    "default": 0,
+                    "min": 0,
+                    "max": 2**31-1,
+                    "step": 1,
+                    "display": "number",
+                    "tooltip": "not implemented yet in backend",
+                }),
+                "quality" : (IO.COMBO, {
+                    "options": ["standard","hd"],
+                    "default": "standard",
+                    "tooltip": "Image quality",
+                }),
+                "style": (IO.COMBO, {
+                    "options": ["natural","vivid"],
+                    "default": "natural",
+                    "tooltip": "Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images.",
+                }),
+                "size": (IO.COMBO, {
+                    "options": ["1024x1024", "1024x1792", "1792x1024"],
+                    "default": "1024x1024",
+                    "tooltip": "Image size",
+                }),
+            },
+            "hidden": {
+                "auth_token": "AUTH_TOKEN_COMFY_ORG"
+            }
+        }
+
+    RETURN_TYPES = (IO.IMAGE,)
+    FUNCTION = "api_call"
+    CATEGORY = "api node"
+    DESCRIPTION = cleandoc(__doc__ or "")
+    API_NODE = True
+
+    def api_call(self, prompt, seed=0, style="natural", quality="standard", size="1024x1024", auth_token=None):
+        model = "dall-e-3"
+
+        # build the operation
+        operation = SynchronousOperation(
+            endpoint=ApiEndpoint(
+                path="/proxy/openai/images/generations",
+                method=HttpMethod.POST,
+                request_model=OpenAIImageGenerationRequest,
+                response_model=OpenAIImageGenerationResponse
+            ),
+            request=OpenAIImageGenerationRequest(
+                model=model,
+                prompt=prompt,
+                quality=quality,
+                size=size,
+                style=style,
+                seed=seed,
+            ),
+            auth_token=auth_token
+        )
+
+        response = operation.execute()
+
+        img_tensor = validate_and_cast_response(response)
+        return (img_tensor,)
+
+class OpenAIGPTImage1(ComfyNodeABC):
+    """
+    Generates images synchronously via OpenAI's GPT Image 1 endpoint.
+
+    Uses the proxy at /proxy/openai/images/generations. Returned URLs are short‑lived,
+    so download or cache results if you need to keep them.
+    """
+    def __init__(self):
+        pass
+
+    @classmethod
+    def INPUT_TYPES(cls) -> InputTypeDict:
+        return {
+            "required": {
+                "prompt": (IO.STRING, {
+                    "multiline": True,
+                    "default": "",
+                    "tooltip": "Text prompt for GPT Image 1",
+                }),
+            },
+            "optional": {
+                "seed": (IO.INT, {
+                    "default": 0,
+                    "min": 0,
+                    "max": 2**31-1,
+                    "step": 1,
+                    "display": "number",
+                    "tooltip": "not implemented yet in backend",
+                }),
+                "quality": (IO.COMBO, {
+                    "options": ["low","medium","high"],
+                    "default": "low",
+                    "tooltip": "Image quality, affects cost and generation time.",
+                }),
+                "background": (IO.COMBO, {
+                    "options": ["opaque","transparent"],
+                    "default": "opaque",
+                    "tooltip": "Return image with or without background",
+                }),
+                "size": (IO.COMBO, {
+                    "options": ["auto", "1024x1024", "1024x1536", "1536x1024"],
+                    "default": "auto",
+                    "tooltip": "Image size",
+                }),
+                "n": (IO.INT, {
+                    "default": 1,
+                    "min": 1,
+                    "max": 8,
+                    "step": 1,
+                    "display": "number",
+                    "tooltip": "How many images to generate",
+                }),
+                "image": (IO.IMAGE, {
+                    "default": None,
+                    "tooltip": "Optional reference image for image editing.",
+                }),
+                "mask": (IO.MASK, {
+                    "default": None,
+                    "tooltip": "Optional mask for inpainting (white areas will be replaced)",
+                }),
+            },
+            "hidden": {
+                "auth_token": "AUTH_TOKEN_COMFY_ORG"
+            }
+        }
+
+    RETURN_TYPES = (IO.IMAGE,)
+    FUNCTION = "api_call"
+    CATEGORY = "api node"
+    DESCRIPTION = cleandoc(__doc__ or "")
+    API_NODE = True
+
+    def api_call(self, prompt, seed=0, quality="low", background="opaque", image=None, mask=None, n=1, size="1024x1024", auth_token=None):
+        model = "gpt-image-1"
+        path = "/proxy/openai/images/generations"
+        request_class = OpenAIImageGenerationRequest
+        img_binary = None
+        mask_binary = None
+
+
+        if image is not None:
+            path = "/proxy/openai/images/edits"
+            request_class = OpenAIImageEditRequest
+
+            scaled_image = downscale_input(image).squeeze()
+
+            image_np = (scaled_image.numpy() * 255).astype(np.uint8)
+            img = Image.fromarray(image_np)
+            img_byte_arr = io.BytesIO()
+            img.save(img_byte_arr, format='PNG')
+            img_byte_arr.seek(0)
+            img_binary = img_byte_arr#.getvalue()
+            img_binary.name = "image.png"
+
+        if mask is not None:
+            if image is None:
+                raise Exception("Cannot use a mask without an input image")
+            if mask.shape[1:] != image.shape[1:-1]:
+                raise Exception("Mask and Image must be the same size")
+            batch, height, width = mask.shape
+            rgba_mask = torch.zeros(height, width, 4, device="cpu")
+            rgba_mask[:,:,3] = (1-mask.squeeze().cpu())
+
+            scaled_mask = downscale_input(rgba_mask.unsqueeze(0)).squeeze()
+
+            mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
+            mask_img = Image.fromarray(mask_np)
+            mask_img_byte_arr = io.BytesIO()
+            mask_img.save(mask_img_byte_arr, format='PNG')
+            mask_img_byte_arr.seek(0)
+            mask_binary = mask_img_byte_arr#.getvalue()
+            mask_binary.name = "mask.png"
+
+        files = {}
+        if img_binary:
+            files["image"] = img_binary
+        if mask_binary:
+            files["mask"] = mask_binary
+
+        # Build the operation
+        operation = SynchronousOperation(
+            endpoint=ApiEndpoint(
+                path=path,
+                method=HttpMethod.POST,
+                request_model=request_class,
+                response_model=OpenAIImageGenerationResponse
+            ),
+            request=request_class(
+                model=model,
+                prompt=prompt,
+                quality=quality,
+                background=background,
+                n=n,
+                seed=seed,
+                size=size,
+            ),
+            files=files if files else None,
+            auth_token=auth_token
+        )
+
+        response = operation.execute()
+
+        img_tensor = validate_and_cast_response(response)
+        return (img_tensor,)
+
+
+# A dictionary that contains all nodes you want to export with their names
+# NOTE: names should be globally unique
+NODE_CLASS_MAPPINGS = {
+    "OpenAIDalle2": OpenAIDalle2,
+    "OpenAIDalle3": OpenAIDalle3,
+    "OpenAIGPTImage1": OpenAIGPTImage1,
+}
+
+# A dictionary that contains the friendly/humanly readable titles for the nodes
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "OpenAIDalle2": "OpenAI DALL·E 2",
+    "OpenAIDalle3": "OpenAI DALL·E 3",
+    "OpenAIGPTImage1": "OpenAI GPT Image 1",
+}