1
mirror of https://github.com/comfyanonymous/ComfyUI.git synced 2025-08-02 23:14:49 +08:00

Add support for Comfy API keys (#8041)

* Handle Comfy API key based authorizaton (#167)

Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>

* Bump frontend version to include API key features (#170)

* bump templates version

---------

Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>
This commit is contained in:
Christian Byrne
2025-05-10 19:10:58 -07:00
committed by GitHub
parent 235d3901fc
commit 3535909eb8
15 changed files with 319 additions and 224 deletions

View File

@@ -95,7 +95,7 @@ class KlingApiError(Exception):
pass
def poll_until_finished(auth_token: str, api_endpoint: ApiEndpoint[Any, R]) -> R:
def poll_until_finished(auth_kwargs: dict[str,str], api_endpoint: ApiEndpoint[Any, R]) -> R:
"""Polls the Kling API endpoint until the task reaches a terminal state, then returns the response."""
return PollingOperation(
poll_endpoint=api_endpoint,
@@ -108,7 +108,7 @@ def poll_until_finished(auth_token: str, api_endpoint: ApiEndpoint[Any, R]) -> R
if response.data and response.data.task_status
else None
),
auth_token=auth_token,
auth_kwargs=auth_kwargs,
).execute()
@@ -418,16 +418,19 @@ class KlingTextToVideoNode(KlingNodeBase):
},
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
RETURN_TYPES = ("VIDEO", "STRING", "STRING")
RETURN_NAMES = ("VIDEO", "video_id", "duration")
DESCRIPTION = "Kling Text to Video Node"
def get_response(self, task_id: str, auth_token: str) -> KlingText2VideoResponse:
def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingText2VideoResponse:
return poll_until_finished(
auth_token,
auth_kwargs,
ApiEndpoint(
path=f"{PATH_TEXT_TO_VIDEO}/{task_id}",
method=HttpMethod.GET,
@@ -446,7 +449,7 @@ class KlingTextToVideoNode(KlingNodeBase):
camera_control: Optional[KlingCameraControl] = None,
model_name: Optional[str] = None,
duration: Optional[str] = None,
auth_token: Optional[str] = None,
**kwargs,
) -> tuple[VideoFromFile, str, str]:
validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
if model_name is None:
@@ -468,14 +471,14 @@ class KlingTextToVideoNode(KlingNodeBase):
aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio),
camera_control=camera_control,
),
auth_token=auth_token,
auth_kwargs=kwargs,
)
task_creation_response = initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = self.get_response(task_id, auth_token)
final_response = self.get_response(task_id, auth_kwargs=kwargs)
validate_video_result_response(final_response)
video = get_video_from_response(final_response)
@@ -522,7 +525,10 @@ class KlingCameraControlT2VNode(KlingTextToVideoNode):
},
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
DESCRIPTION = "Transform text into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original text."
@@ -534,7 +540,7 @@ class KlingCameraControlT2VNode(KlingTextToVideoNode):
cfg_scale: float,
aspect_ratio: str,
camera_control: Optional[KlingCameraControl] = None,
auth_token: Optional[str] = None,
**kwargs,
):
return super().api_call(
model_name=KlingVideoGenModelName.kling_v1,
@@ -545,7 +551,7 @@ class KlingCameraControlT2VNode(KlingTextToVideoNode):
prompt=prompt,
negative_prompt=negative_prompt,
camera_control=camera_control,
auth_token=auth_token,
**kwargs,
)
@@ -604,16 +610,19 @@ class KlingImage2VideoNode(KlingNodeBase):
enum_type=KlingVideoGenDuration,
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
RETURN_TYPES = ("VIDEO", "STRING", "STRING")
RETURN_NAMES = ("VIDEO", "video_id", "duration")
DESCRIPTION = "Kling Image to Video Node"
def get_response(self, task_id: str, auth_token: str) -> KlingImage2VideoResponse:
def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingImage2VideoResponse:
return poll_until_finished(
auth_token,
auth_kwargs,
ApiEndpoint(
path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}",
method=HttpMethod.GET,
@@ -634,7 +643,7 @@ class KlingImage2VideoNode(KlingNodeBase):
duration: str,
camera_control: Optional[KlingCameraControl] = None,
end_frame: Optional[torch.Tensor] = None,
auth_token: Optional[str] = None,
**kwargs,
) -> tuple[VideoFromFile]:
validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_I2V)
validate_input_image(start_frame)
@@ -666,14 +675,14 @@ class KlingImage2VideoNode(KlingNodeBase):
duration=KlingVideoGenDuration(duration),
camera_control=camera_control,
),
auth_token=auth_token,
auth_kwargs=kwargs,
)
task_creation_response = initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = self.get_response(task_id, auth_token)
final_response = self.get_response(task_id, auth_kwargs=kwargs)
validate_video_result_response(final_response)
video = get_video_from_response(final_response)
@@ -723,7 +732,10 @@ class KlingCameraControlI2VNode(KlingImage2VideoNode):
},
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
DESCRIPTION = "Transform still images into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original image."
@@ -736,7 +748,7 @@ class KlingCameraControlI2VNode(KlingImage2VideoNode):
cfg_scale: float,
aspect_ratio: str,
camera_control: KlingCameraControl,
auth_token: Optional[str] = None,
**kwargs,
):
return super().api_call(
model_name=KlingVideoGenModelName.kling_v1_5,
@@ -748,7 +760,7 @@ class KlingCameraControlI2VNode(KlingImage2VideoNode):
prompt=prompt,
negative_prompt=negative_prompt,
camera_control=camera_control,
auth_token=auth_token,
**kwargs,
)
@@ -816,7 +828,10 @@ class KlingStartEndFrameNode(KlingImage2VideoNode):
},
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
DESCRIPTION = "Generate a video sequence that transitions between your provided start and end images. The node creates all frames in between, producing a smooth transformation from the first frame to the last."
@@ -830,7 +845,7 @@ class KlingStartEndFrameNode(KlingImage2VideoNode):
cfg_scale: float,
aspect_ratio: str,
mode: str,
auth_token: Optional[str] = None,
**kwargs,
):
mode, duration, model_name = KlingStartEndFrameNode.get_mode_string_mapping()[
mode
@@ -845,7 +860,7 @@ class KlingStartEndFrameNode(KlingImage2VideoNode):
aspect_ratio=aspect_ratio,
duration=duration,
end_frame=end_frame,
auth_token=auth_token,
**kwargs,
)
@@ -875,16 +890,19 @@ class KlingVideoExtendNode(KlingNodeBase):
IO.STRING, KlingVideoExtendRequest, "video_id", forceInput=True
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
RETURN_TYPES = ("VIDEO", "STRING", "STRING")
RETURN_NAMES = ("VIDEO", "video_id", "duration")
DESCRIPTION = "Kling Video Extend Node. Extend videos made by other Kling nodes. The video_id is created by using other Kling Nodes."
def get_response(self, task_id: str, auth_token: str) -> KlingVideoExtendResponse:
def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingVideoExtendResponse:
return poll_until_finished(
auth_token,
auth_kwargs,
ApiEndpoint(
path=f"{PATH_VIDEO_EXTEND}/{task_id}",
method=HttpMethod.GET,
@@ -899,7 +917,7 @@ class KlingVideoExtendNode(KlingNodeBase):
negative_prompt: str,
cfg_scale: float,
video_id: str,
auth_token: Optional[str] = None,
**kwargs,
) -> tuple[VideoFromFile, str, str]:
validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
initial_operation = SynchronousOperation(
@@ -915,14 +933,14 @@ class KlingVideoExtendNode(KlingNodeBase):
cfg_scale=cfg_scale,
video_id=video_id,
),
auth_token=auth_token,
auth_kwargs=kwargs,
)
task_creation_response = initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = self.get_response(task_id, auth_token)
final_response = self.get_response(task_id, auth_kwargs=kwargs)
validate_video_result_response(final_response)
video = get_video_from_response(final_response)
@@ -935,9 +953,9 @@ class KlingVideoEffectsBase(KlingNodeBase):
RETURN_TYPES = ("VIDEO", "STRING", "STRING")
RETURN_NAMES = ("VIDEO", "video_id", "duration")
def get_response(self, task_id: str, auth_token: str) -> KlingVideoEffectsResponse:
def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingVideoEffectsResponse:
return poll_until_finished(
auth_token,
auth_kwargs,
ApiEndpoint(
path=f"{PATH_VIDEO_EFFECTS}/{task_id}",
method=HttpMethod.GET,
@@ -955,7 +973,7 @@ class KlingVideoEffectsBase(KlingNodeBase):
image_1: torch.Tensor,
image_2: Optional[torch.Tensor] = None,
mode: Optional[KlingVideoGenMode] = None,
auth_token: Optional[str] = None,
**kwargs,
):
if dual_character:
request_input_field = KlingDualCharacterEffectInput(
@@ -985,14 +1003,14 @@ class KlingVideoEffectsBase(KlingNodeBase):
effect_scene=effect_scene,
input=request_input_field,
),
auth_token=auth_token,
auth_kwargs=kwargs,
)
task_creation_response = initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = self.get_response(task_id, auth_token)
final_response = self.get_response(task_id, auth_kwargs=kwargs)
validate_video_result_response(final_response)
video = get_video_from_response(final_response)
@@ -1033,7 +1051,10 @@ class KlingDualCharacterVideoEffectNode(KlingVideoEffectsBase):
enum_type=KlingVideoGenDuration,
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene. First image will be positioned on left side, second on right side of the composite."
@@ -1048,7 +1069,7 @@ class KlingDualCharacterVideoEffectNode(KlingVideoEffectsBase):
model_name: KlingCharacterEffectModelName,
mode: KlingVideoGenMode,
duration: KlingVideoGenDuration,
auth_token: Optional[str] = None,
**kwargs,
):
video, _, duration = super().api_call(
dual_character=True,
@@ -1058,7 +1079,7 @@ class KlingDualCharacterVideoEffectNode(KlingVideoEffectsBase):
duration=duration,
image_1=image_left,
image_2=image_right,
auth_token=auth_token,
**kwargs,
)
return video, duration
@@ -1094,7 +1115,10 @@ class KlingSingleImageVideoEffectNode(KlingVideoEffectsBase):
enum_type=KlingVideoGenDuration,
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene."
@@ -1105,7 +1129,7 @@ class KlingSingleImageVideoEffectNode(KlingVideoEffectsBase):
effect_scene: KlingSingleImageEffectsScene,
model_name: KlingSingleImageEffectModelName,
duration: KlingVideoGenDuration,
auth_token: Optional[str] = None,
**kwargs,
):
return super().api_call(
dual_character=False,
@@ -1113,7 +1137,7 @@ class KlingSingleImageVideoEffectNode(KlingVideoEffectsBase):
model_name=model_name,
duration=duration,
image_1=image,
auth_token=auth_token,
**kwargs,
)
@@ -1131,10 +1155,10 @@ class KlingLipSyncBase(KlingNodeBase):
f"Text is too long. Maximum length is {MAX_PROMPT_LENGTH_LIP_SYNC} characters."
)
def get_response(self, task_id: str, auth_token: str) -> KlingLipSyncResponse:
def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingLipSyncResponse:
"""Polls the Kling API endpoint until the task reaches a terminal state."""
return poll_until_finished(
auth_token,
auth_kwargs,
ApiEndpoint(
path=f"{PATH_LIP_SYNC}/{task_id}",
method=HttpMethod.GET,
@@ -1152,18 +1176,18 @@ class KlingLipSyncBase(KlingNodeBase):
text: Optional[str] = None,
voice_speed: Optional[float] = None,
voice_id: Optional[str] = None,
auth_token: Optional[str] = None,
**kwargs
) -> tuple[VideoFromFile, str, str]:
if text:
self.validate_text(text)
# Upload video to Comfy API and get download URL
video_url = upload_video_to_comfyapi(video, auth_token)
video_url = upload_video_to_comfyapi(video, auth_kwargs=kwargs)
logging.info("Uploaded video to Comfy API. URL: %s", video_url)
# Upload the audio file to Comfy API and get download URL
if audio:
audio_url = upload_audio_to_comfyapi(audio, auth_token)
audio_url = upload_audio_to_comfyapi(audio, auth_kwargs=kwargs)
logging.info("Uploaded audio to Comfy API. URL: %s", audio_url)
else:
audio_url = None
@@ -1187,14 +1211,14 @@ class KlingLipSyncBase(KlingNodeBase):
voice_id=voice_id,
),
),
auth_token=auth_token,
auth_kwargs=kwargs,
)
task_creation_response = initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = self.get_response(task_id, auth_token)
final_response = self.get_response(task_id, auth_kwargs=kwargs)
validate_video_result_response(final_response)
video = get_video_from_response(final_response)
@@ -1217,7 +1241,10 @@ class KlingLipSyncAudioToVideoNode(KlingLipSyncBase):
enum_type=KlingLipSyncVoiceLanguage,
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file."
@@ -1227,14 +1254,14 @@ class KlingLipSyncAudioToVideoNode(KlingLipSyncBase):
video: VideoInput,
audio: AudioInput,
voice_language: str,
auth_token: Optional[str] = None,
**kwargs,
):
return super().api_call(
video=video,
audio=audio,
voice_language=voice_language,
mode="audio2video",
auth_token=auth_token,
**kwargs,
)
@@ -1323,7 +1350,10 @@ class KlingLipSyncTextToVideoNode(KlingLipSyncBase):
IO.FLOAT, KlingLipSyncInputObject, "voice_speed", slider=True
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt."
@@ -1334,7 +1364,7 @@ class KlingLipSyncTextToVideoNode(KlingLipSyncBase):
text: str,
voice: str,
voice_speed: float,
auth_token: Optional[str] = None,
**kwargs,
):
voice_id, voice_language = KlingLipSyncTextToVideoNode.get_voice_config()[voice]
return super().api_call(
@@ -1344,7 +1374,7 @@ class KlingLipSyncTextToVideoNode(KlingLipSyncBase):
voice_id=voice_id,
voice_speed=voice_speed,
mode="text2video",
auth_token=auth_token,
**kwargs,
)
@@ -1381,16 +1411,19 @@ class KlingVirtualTryOnNode(KlingImageGenerationBase):
enum_type=KlingVirtualTryOnModelName,
),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
DESCRIPTION = "Kling Virtual Try On Node. Input a human image and a cloth image to try on the cloth on the human."
def get_response(
self, task_id: str, auth_token: Optional[str] = None
self, task_id: str, auth_kwargs: dict[str,str] = None
) -> KlingVirtualTryOnResponse:
return poll_until_finished(
auth_token,
auth_kwargs,
ApiEndpoint(
path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}",
method=HttpMethod.GET,
@@ -1404,7 +1437,7 @@ class KlingVirtualTryOnNode(KlingImageGenerationBase):
human_image: torch.Tensor,
cloth_image: torch.Tensor,
model_name: KlingVirtualTryOnModelName,
auth_token: Optional[str] = None,
**kwargs,
):
initial_operation = SynchronousOperation(
endpoint=ApiEndpoint(
@@ -1418,14 +1451,14 @@ class KlingVirtualTryOnNode(KlingImageGenerationBase):
cloth_image=tensor_to_base64_string(cloth_image),
model_name=model_name,
),
auth_token=auth_token,
auth_kwargs=kwargs,
)
task_creation_response = initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = self.get_response(task_id, auth_token)
final_response = self.get_response(task_id, auth_kwargs=kwargs)
validate_image_result_response(final_response)
images = get_images_from_response(final_response)
@@ -1493,16 +1526,19 @@ class KlingImageGenerationNode(KlingImageGenerationBase):
"optional": {
"image": (IO.IMAGE, {}),
},
"hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
"hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG",
},
}
DESCRIPTION = "Kling Image Generation Node. Generate an image from a text prompt with an optional reference image."
def get_response(
self, task_id: str, auth_token: Optional[str] = None
self, task_id: str, auth_kwargs: Optional[dict[str,str]] = None
) -> KlingImageGenerationsResponse:
return poll_until_finished(
auth_token,
auth_kwargs,
ApiEndpoint(
path=f"{PATH_IMAGE_GENERATIONS}/{task_id}",
method=HttpMethod.GET,
@@ -1522,7 +1558,7 @@ class KlingImageGenerationNode(KlingImageGenerationBase):
n: int,
aspect_ratio: KlingImageGenAspectRatio,
image: Optional[torch.Tensor] = None,
auth_token: Optional[str] = None,
**kwargs,
):
self.validate_prompt(prompt, negative_prompt)
@@ -1547,14 +1583,14 @@ class KlingImageGenerationNode(KlingImageGenerationBase):
n=n,
aspect_ratio=aspect_ratio,
),
auth_token=auth_token,
auth_kwargs=kwargs,
)
task_creation_response = initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = self.get_response(task_id, auth_token)
final_response = self.get_response(task_id, auth_kwargs=kwargs)
validate_image_result_response(final_response)
images = get_images_from_response(final_response)