mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-08-02 15:04:50 +08:00
Add Moonvalley Marey V2V node with updated input validation (#9069)
* [moonvalley] Update V2V node to match API specification - Add exact resolution validation for supported resolutions (1920x1080, 1080x1920, 1152x1152, 1536x1152, 1152x1536) - Change frame count validation from divisible by 32 to 16 - Add MP4 container format validation - Remove internal parameters (steps, guidance_scale) from V2V inference params - Update video duration handling to support only 5 seconds (auto-trim if longer) - Add motion_intensity parameter (0-100) for Motion Transfer control type - Add get_container_format() method to VideoInput classes * update negative prompt
This commit is contained in:
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
import io
|
import io
|
||||||
|
import av
|
||||||
from comfy_api.util import VideoContainer, VideoCodec, VideoComponents
|
from comfy_api.util import VideoContainer, VideoCodec, VideoComponents
|
||||||
|
|
||||||
class VideoInput(ABC):
|
class VideoInput(ABC):
|
||||||
@@ -70,3 +71,15 @@ class VideoInput(ABC):
|
|||||||
components = self.get_components()
|
components = self.get_components()
|
||||||
frame_count = components.images.shape[0]
|
frame_count = components.images.shape[0]
|
||||||
return float(frame_count / components.frame_rate)
|
return float(frame_count / components.frame_rate)
|
||||||
|
|
||||||
|
def get_container_format(self) -> str:
|
||||||
|
"""
|
||||||
|
Returns the container format of the video (e.g., 'mp4', 'mov', 'avi').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Container format as string
|
||||||
|
"""
|
||||||
|
# Default implementation - subclasses should override for better performance
|
||||||
|
source = self.get_stream_source()
|
||||||
|
with av.open(source, mode="r") as container:
|
||||||
|
return container.format.name
|
||||||
|
@@ -121,6 +121,18 @@ class VideoFromFile(VideoInput):
|
|||||||
|
|
||||||
raise ValueError(f"Could not determine duration for file '{self.__file}'")
|
raise ValueError(f"Could not determine duration for file '{self.__file}'")
|
||||||
|
|
||||||
|
def get_container_format(self) -> str:
|
||||||
|
"""
|
||||||
|
Returns the container format of the video (e.g., 'mp4', 'mov', 'avi').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Container format as string
|
||||||
|
"""
|
||||||
|
if isinstance(self.__file, io.BytesIO):
|
||||||
|
self.__file.seek(0)
|
||||||
|
with av.open(self.__file, mode='r') as container:
|
||||||
|
return container.format.name
|
||||||
|
|
||||||
def get_components_internal(self, container: InputContainer) -> VideoComponents:
|
def get_components_internal(self, container: InputContainer) -> VideoComponents:
|
||||||
# Get video frames
|
# Get video frames
|
||||||
frames = []
|
frames = []
|
||||||
|
@@ -5,7 +5,6 @@ import torch
|
|||||||
from comfy_api_nodes.util.validation_utils import (
|
from comfy_api_nodes.util.validation_utils import (
|
||||||
get_image_dimensions,
|
get_image_dimensions,
|
||||||
validate_image_dimensions,
|
validate_image_dimensions,
|
||||||
validate_video_dimensions,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -176,54 +175,76 @@ def validate_input_image(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def validate_input_video(
|
def validate_video_to_video_input(video: VideoInput) -> VideoInput:
|
||||||
video: VideoInput, num_frames_out: int, with_frame_conditioning: bool = False
|
"""
|
||||||
):
|
Validates and processes video input for Moonvalley Video-to-Video generation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video: Input video to validate
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Validated and potentially trimmed video
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If video doesn't meet requirements
|
||||||
|
MoonvalleyApiError: If video duration is too short
|
||||||
|
"""
|
||||||
|
width, height = _get_video_dimensions(video)
|
||||||
|
_validate_video_dimensions(width, height)
|
||||||
|
_validate_container_format(video)
|
||||||
|
|
||||||
|
return _validate_and_trim_duration(video)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_video_dimensions(video: VideoInput) -> tuple[int, int]:
|
||||||
|
"""Extracts video dimensions with error handling."""
|
||||||
try:
|
try:
|
||||||
width, height = video.get_dimensions()
|
return video.get_dimensions()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Error getting dimensions of video: %s", e)
|
logging.error("Error getting dimensions of video: %s", e)
|
||||||
raise ValueError(f"Cannot get video dimensions: {e}") from e
|
raise ValueError(f"Cannot get video dimensions: {e}") from e
|
||||||
|
|
||||||
validate_input_media(width, height, with_frame_conditioning)
|
|
||||||
validate_video_dimensions(
|
|
||||||
video,
|
|
||||||
min_width=MIN_VID_WIDTH,
|
|
||||||
min_height=MIN_VID_HEIGHT,
|
|
||||||
max_width=MAX_VID_WIDTH,
|
|
||||||
max_height=MAX_VID_HEIGHT,
|
|
||||||
)
|
|
||||||
|
|
||||||
trimmed_video = validate_input_video_length(video, num_frames_out)
|
def _validate_video_dimensions(width: int, height: int) -> None:
|
||||||
return trimmed_video
|
"""Validates video dimensions meet Moonvalley V2V requirements."""
|
||||||
|
supported_resolutions = {
|
||||||
|
(1920, 1080), (1080, 1920), (1152, 1152),
|
||||||
|
(1536, 1152), (1152, 1536)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (width, height) not in supported_resolutions:
|
||||||
|
supported_list = ', '.join([f'{w}x{h}' for w, h in sorted(supported_resolutions)])
|
||||||
|
raise ValueError(f"Resolution {width}x{height} not supported. Supported: {supported_list}")
|
||||||
|
|
||||||
|
|
||||||
def validate_input_video_length(video: VideoInput, num_frames: int):
|
def _validate_container_format(video: VideoInput) -> None:
|
||||||
|
"""Validates video container format is MP4."""
|
||||||
|
container_format = video.get_container_format()
|
||||||
|
if container_format not in ['mp4', 'mov,mp4,m4a,3gp,3g2,mj2']:
|
||||||
|
raise ValueError(f"Only MP4 container format supported. Got: {container_format}")
|
||||||
|
|
||||||
if video.get_duration() > 60:
|
|
||||||
raise MoonvalleyApiError(
|
|
||||||
"Input Video lenth should be less than 1min. Please trim."
|
|
||||||
)
|
|
||||||
|
|
||||||
if num_frames == 128:
|
def _validate_and_trim_duration(video: VideoInput) -> VideoInput:
|
||||||
if video.get_duration() < 5:
|
"""Validates video duration and trims to 5 seconds if needed."""
|
||||||
raise MoonvalleyApiError(
|
duration = video.get_duration()
|
||||||
"Input Video length is less than 5s. Please use a video longer than or equal to 5s."
|
_validate_minimum_duration(duration)
|
||||||
)
|
return _trim_if_too_long(video, duration)
|
||||||
if video.get_duration() > 5:
|
|
||||||
# trim video to 5s
|
|
||||||
video = trim_video(video, 5)
|
def _validate_minimum_duration(duration: float) -> None:
|
||||||
if num_frames == 256:
|
"""Ensures video is at least 5 seconds long."""
|
||||||
if video.get_duration() < 10:
|
if duration < 5:
|
||||||
raise MoonvalleyApiError(
|
raise MoonvalleyApiError("Input video must be at least 5 seconds long.")
|
||||||
"Input Video length is less than 10s. Please use a video longer than or equal to 10s."
|
|
||||||
)
|
|
||||||
if video.get_duration() > 10:
|
def _trim_if_too_long(video: VideoInput, duration: float) -> VideoInput:
|
||||||
# trim video to 10s
|
"""Trims video to 5 seconds if longer."""
|
||||||
video = trim_video(video, 10)
|
if duration > 5:
|
||||||
|
return trim_video(video, 5)
|
||||||
return video
|
return video
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
|
def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
|
||||||
"""
|
"""
|
||||||
Returns a new VideoInput object trimmed from the beginning to the specified duration,
|
Returns a new VideoInput object trimmed from the beginning to the specified duration,
|
||||||
@@ -278,15 +299,13 @@ def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
|
|||||||
f"Added audio stream: {stream.sample_rate}Hz, {stream.channels} channels"
|
f"Added audio stream: {stream.sample_rate}Hz, {stream.channels} channels"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Calculate target frame count that's divisible by 32
|
# Calculate target frame count that's divisible by 16
|
||||||
fps = input_container.streams.video[0].average_rate
|
fps = input_container.streams.video[0].average_rate
|
||||||
estimated_frames = int(duration_sec * fps)
|
estimated_frames = int(duration_sec * fps)
|
||||||
target_frames = (
|
target_frames = (estimated_frames // 16) * 16 # Round down to nearest multiple of 16
|
||||||
estimated_frames // 32
|
|
||||||
) * 32 # Round down to nearest multiple of 32
|
|
||||||
|
|
||||||
if target_frames == 0:
|
if target_frames == 0:
|
||||||
raise ValueError("Video too short: need at least 32 frames for Moonvalley")
|
raise ValueError("Video too short: need at least 16 frames for Moonvalley")
|
||||||
|
|
||||||
frame_count = 0
|
frame_count = 0
|
||||||
audio_frame_count = 0
|
audio_frame_count = 0
|
||||||
@@ -353,8 +372,8 @@ class BaseMoonvalleyVideoNode:
|
|||||||
"16:9 (1920 x 1080)": {"width": 1920, "height": 1080},
|
"16:9 (1920 x 1080)": {"width": 1920, "height": 1080},
|
||||||
"9:16 (1080 x 1920)": {"width": 1080, "height": 1920},
|
"9:16 (1080 x 1920)": {"width": 1080, "height": 1920},
|
||||||
"1:1 (1152 x 1152)": {"width": 1152, "height": 1152},
|
"1:1 (1152 x 1152)": {"width": 1152, "height": 1152},
|
||||||
"4:3 (1440 x 1080)": {"width": 1440, "height": 1080},
|
"4:3 (1536 x 1152)": {"width": 1536, "height": 1152},
|
||||||
"3:4 (1080 x 1440)": {"width": 1080, "height": 1440},
|
"3:4 (1152 x 1536)": {"width": 1152, "height": 1536},
|
||||||
"21:9 (2560 x 1080)": {"width": 2560, "height": 1080},
|
"21:9 (2560 x 1080)": {"width": 2560, "height": 1080},
|
||||||
}
|
}
|
||||||
if resolution in res_map:
|
if resolution in res_map:
|
||||||
@@ -494,7 +513,6 @@ class MoonvalleyImg2VideoNode(BaseMoonvalleyVideoNode):
|
|||||||
image = kwargs.get("image", None)
|
image = kwargs.get("image", None)
|
||||||
if image is None:
|
if image is None:
|
||||||
raise MoonvalleyApiError("image is required")
|
raise MoonvalleyApiError("image is required")
|
||||||
total_frames = get_total_frames_from_length()
|
|
||||||
|
|
||||||
validate_input_image(image, True)
|
validate_input_image(image, True)
|
||||||
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
|
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
|
||||||
@@ -505,7 +523,7 @@ class MoonvalleyImg2VideoNode(BaseMoonvalleyVideoNode):
|
|||||||
steps=kwargs.get("steps"),
|
steps=kwargs.get("steps"),
|
||||||
seed=kwargs.get("seed"),
|
seed=kwargs.get("seed"),
|
||||||
guidance_scale=kwargs.get("prompt_adherence"),
|
guidance_scale=kwargs.get("prompt_adherence"),
|
||||||
num_frames=total_frames,
|
num_frames=128,
|
||||||
width=width_height.get("width"),
|
width=width_height.get("width"),
|
||||||
height=width_height.get("height"),
|
height=width_height.get("height"),
|
||||||
use_negative_prompts=True,
|
use_negative_prompts=True,
|
||||||
@@ -549,39 +567,45 @@ class MoonvalleyVideo2VideoNode(BaseMoonvalleyVideoNode):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def INPUT_TYPES(cls):
|
||||||
input_types = super().INPUT_TYPES()
|
return {
|
||||||
for param in ["resolution", "image"]:
|
"required": {
|
||||||
if param in input_types["required"]:
|
"prompt": model_field_to_node_input(
|
||||||
del input_types["required"][param]
|
IO.STRING, MoonvalleyVideoToVideoRequest, "prompt_text",
|
||||||
if param in input_types["optional"]:
|
multiline=True
|
||||||
del input_types["optional"][param]
|
),
|
||||||
input_types["optional"] = {
|
"negative_prompt": model_field_to_node_input(
|
||||||
"video": (
|
IO.STRING,
|
||||||
IO.VIDEO,
|
MoonvalleyVideoToVideoInferenceParams,
|
||||||
{
|
"negative_prompt",
|
||||||
"default": "",
|
multiline=True,
|
||||||
"multiline": False,
|
default="low-poly, flat shader, bad rigging, stiff animation, uncanny eyes, low-quality textures, looping glitch, cheap effect, overbloom, bloom spam, default lighting, game asset, stiff face, ugly specular, AI artifacts"
|
||||||
"tooltip": "The reference video used to generate the output video. Input a 5s video for 128 frames and a 10s video for 256 frames. Longer videos will be trimmed automatically.",
|
),
|
||||||
},
|
"seed": model_field_to_node_input(IO.INT,MoonvalleyVideoToVideoInferenceParams, "seed", default=random.randint(0, 2**32 - 1), min=0, max=4294967295, step=1, display="number", tooltip="Random seed value", control_after_generate=True),
|
||||||
),
|
},
|
||||||
"control_type": (
|
"hidden": {
|
||||||
["Motion Transfer", "Pose Transfer"],
|
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
||||||
{"default": "Motion Transfer"},
|
"comfy_api_key": "API_KEY_COMFY_ORG",
|
||||||
),
|
"unique_id": "UNIQUE_ID",
|
||||||
"motion_intensity": (
|
},
|
||||||
"INT",
|
"optional": {
|
||||||
{
|
"video": (IO.VIDEO, {"default": "", "multiline": False, "tooltip": "The reference video used to generate the output video. Must be at least 5 seconds long. Videos longer than 5s will be automatically trimmed. Only MP4 format supported."}),
|
||||||
"default": 100,
|
"control_type": (
|
||||||
"step": 1,
|
["Motion Transfer", "Pose Transfer"],
|
||||||
"min": 0,
|
{"default": "Motion Transfer"},
|
||||||
"max": 100,
|
),
|
||||||
"tooltip": "Only used if control_type is 'Motion Transfer'",
|
"motion_intensity": (
|
||||||
},
|
"INT",
|
||||||
),
|
{
|
||||||
|
"default": 100,
|
||||||
|
"step": 1,
|
||||||
|
"min": 0,
|
||||||
|
"max": 100,
|
||||||
|
"tooltip": "Only used if control_type is 'Motion Transfer'",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return input_types
|
|
||||||
|
|
||||||
RETURN_TYPES = ("VIDEO",)
|
RETURN_TYPES = ("VIDEO",)
|
||||||
RETURN_NAMES = ("video",)
|
RETURN_NAMES = ("video",)
|
||||||
|
|
||||||
@@ -589,15 +613,13 @@ class MoonvalleyVideo2VideoNode(BaseMoonvalleyVideoNode):
|
|||||||
self, prompt, negative_prompt, unique_id: Optional[str] = None, **kwargs
|
self, prompt, negative_prompt, unique_id: Optional[str] = None, **kwargs
|
||||||
):
|
):
|
||||||
video = kwargs.get("video")
|
video = kwargs.get("video")
|
||||||
num_frames = get_total_frames_from_length()
|
|
||||||
|
|
||||||
if not video:
|
if not video:
|
||||||
raise MoonvalleyApiError("video is required")
|
raise MoonvalleyApiError("video is required")
|
||||||
|
|
||||||
"""Validate video input"""
|
|
||||||
video_url = ""
|
video_url = ""
|
||||||
if video:
|
if video:
|
||||||
validated_video = validate_input_video(video, num_frames, False)
|
validated_video = validate_video_to_video_input(video)
|
||||||
video_url = upload_video_to_comfyapi(validated_video, auth_kwargs=kwargs)
|
video_url = upload_video_to_comfyapi(validated_video, auth_kwargs=kwargs)
|
||||||
|
|
||||||
control_type = kwargs.get("control_type")
|
control_type = kwargs.get("control_type")
|
||||||
@@ -605,12 +627,16 @@ class MoonvalleyVideo2VideoNode(BaseMoonvalleyVideoNode):
|
|||||||
|
|
||||||
"""Validate prompts and inference input"""
|
"""Validate prompts and inference input"""
|
||||||
validate_prompts(prompt, negative_prompt)
|
validate_prompts(prompt, negative_prompt)
|
||||||
inference_params = MoonvalleyVideoToVideoInferenceParams(
|
|
||||||
|
# Only include motion_intensity for Motion Transfer
|
||||||
|
control_params = {}
|
||||||
|
if control_type == "Motion Transfer" and motion_intensity is not None:
|
||||||
|
control_params['motion_intensity'] = motion_intensity
|
||||||
|
|
||||||
|
inference_params=MoonvalleyVideoToVideoInferenceParams(
|
||||||
negative_prompt=negative_prompt,
|
negative_prompt=negative_prompt,
|
||||||
steps=kwargs.get("steps"),
|
|
||||||
seed=kwargs.get("seed"),
|
seed=kwargs.get("seed"),
|
||||||
guidance_scale=kwargs.get("prompt_adherence"),
|
control_params=control_params
|
||||||
control_params={"motion_intensity": motion_intensity},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
control = self.parseControlParameter(control_type)
|
control = self.parseControlParameter(control_type)
|
||||||
@@ -667,17 +693,16 @@ class MoonvalleyTxt2VideoNode(BaseMoonvalleyVideoNode):
|
|||||||
):
|
):
|
||||||
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
|
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
|
||||||
width_height = self.parseWidthHeightFromRes(kwargs.get("resolution"))
|
width_height = self.parseWidthHeightFromRes(kwargs.get("resolution"))
|
||||||
num_frames = get_total_frames_from_length()
|
|
||||||
|
|
||||||
inference_params = MoonvalleyTextToVideoInferenceParams(
|
inference_params=MoonvalleyTextToVideoInferenceParams(
|
||||||
negative_prompt=negative_prompt,
|
negative_prompt=negative_prompt,
|
||||||
steps=kwargs.get("steps"),
|
steps=kwargs.get("steps"),
|
||||||
seed=kwargs.get("seed"),
|
seed=kwargs.get("seed"),
|
||||||
guidance_scale=kwargs.get("prompt_adherence"),
|
guidance_scale=kwargs.get("prompt_adherence"),
|
||||||
num_frames=num_frames,
|
num_frames=128,
|
||||||
width=width_height.get("width"),
|
width=width_height.get("width"),
|
||||||
height=width_height.get("height"),
|
height=width_height.get("height"),
|
||||||
)
|
)
|
||||||
request = MoonvalleyTextToVideoRequest(
|
request = MoonvalleyTextToVideoRequest(
|
||||||
prompt_text=prompt, inference_params=inference_params
|
prompt_text=prompt, inference_params=inference_params
|
||||||
)
|
)
|
||||||
@@ -707,22 +732,12 @@ class MoonvalleyTxt2VideoNode(BaseMoonvalleyVideoNode):
|
|||||||
NODE_CLASS_MAPPINGS = {
|
NODE_CLASS_MAPPINGS = {
|
||||||
"MoonvalleyImg2VideoNode": MoonvalleyImg2VideoNode,
|
"MoonvalleyImg2VideoNode": MoonvalleyImg2VideoNode,
|
||||||
"MoonvalleyTxt2VideoNode": MoonvalleyTxt2VideoNode,
|
"MoonvalleyTxt2VideoNode": MoonvalleyTxt2VideoNode,
|
||||||
# "MoonvalleyVideo2VideoNode": MoonvalleyVideo2VideoNode,
|
"MoonvalleyVideo2VideoNode": MoonvalleyVideo2VideoNode,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
"MoonvalleyImg2VideoNode": "Moonvalley Marey Image to Video",
|
"MoonvalleyImg2VideoNode": "Moonvalley Marey Image to Video",
|
||||||
"MoonvalleyTxt2VideoNode": "Moonvalley Marey Text to Video",
|
"MoonvalleyTxt2VideoNode": "Moonvalley Marey Text to Video",
|
||||||
# "MoonvalleyVideo2VideoNode": "Moonvalley Marey Video to Video",
|
"MoonvalleyVideo2VideoNode": "Moonvalley Marey Video to Video",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_total_frames_from_length(length="5s"):
|
|
||||||
# if length == '5s':
|
|
||||||
# return 128
|
|
||||||
# elif length == '10s':
|
|
||||||
# return 256
|
|
||||||
return 128
|
|
||||||
# else:
|
|
||||||
# raise MoonvalleyApiError("length is required")
|
|
||||||
|
Reference in New Issue
Block a user