diff --git a/.github/workflows/update-api-stubs.yml b/.github/workflows/update-api-stubs.yml new file mode 100644 index 00000000000..2ae99b67378 --- /dev/null +++ b/.github/workflows/update-api-stubs.yml @@ -0,0 +1,47 @@ +name: Generate Pydantic Stubs from api.comfy.org + +on: + schedule: + - cron: '0 0 * * 1' + workflow_dispatch: + +jobs: + generate-models: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install 'datamodel-code-generator[http]' + + - name: Generate API models + run: | + datamodel-codegen --use-subclass-enum --url https://api.comfy.org/openapi --output comfy_api_nodes/apis --output-model-type pydantic_v2.BaseModel + + - name: Check for changes + id: git-check + run: | + git diff --exit-code comfy_api_nodes/apis || echo "changes=true" >> $GITHUB_OUTPUT + + - name: Create Pull Request + if: steps.git-check.outputs.changes == 'true' + uses: peter-evans/create-pull-request@v5 + with: + commit-message: 'chore: update API models from OpenAPI spec' + title: 'Update API models from api.comfy.org' + body: | + This PR updates the API models based on the latest api.comfy.org OpenAPI specification. + + Generated automatically by the a Github workflow. + branch: update-api-stubs + delete-branch: true + base: main diff --git a/comfy_api_nodes/apis/PixverseController.py b/comfy_api_nodes/apis/PixverseController.py new file mode 100644 index 00000000000..29a3ab33bf9 --- /dev/null +++ b/comfy_api_nodes/apis/PixverseController.py @@ -0,0 +1,17 @@ +# generated by datamodel-codegen: +# filename: https://api.comfy.org/openapi +# timestamp: 2025-04-23T15:56:33+00:00 + +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel + +from . import PixverseDto + + +class ResponseData(BaseModel): + ErrCode: Optional[int] = None + ErrMsg: Optional[str] = None + Resp: Optional[PixverseDto.V2OpenAPII2VResp] = None diff --git a/comfy_api_nodes/apis/PixverseDto.py b/comfy_api_nodes/apis/PixverseDto.py new file mode 100644 index 00000000000..39951221440 --- /dev/null +++ b/comfy_api_nodes/apis/PixverseDto.py @@ -0,0 +1,57 @@ +# generated by datamodel-codegen: +# filename: https://api.comfy.org/openapi +# timestamp: 2025-04-23T15:56:33+00:00 + +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel, Field, constr + + +class V2OpenAPII2VResp(BaseModel): + video_id: Optional[int] = Field(None, description='Video_id') + + +class V2OpenAPIT2VReq(BaseModel): + aspect_ratio: str = Field( + ..., description='Aspect ratio (16:9, 4:3, 1:1, 3:4, 9:16)', examples=['16:9'] + ) + duration: int = Field( + ..., + description='Video duration (5, 8 seconds, --model=v3.5 only allows 5,8; --quality=1080p does not support 8s)', + examples=[5], + ) + model: str = Field( + ..., description='Model version (only supports v3.5)', examples=['v3.5'] + ) + motion_mode: Optional[str] = Field( + 'normal', + description='Motion mode (normal, fast, --fast only available when duration=5; --quality=1080p does not support fast)', + examples=['normal'], + ) + negative_prompt: Optional[constr(max_length=2048)] = Field( + None, description='Negative prompt\n' + ) + prompt: constr(max_length=2048) = Field(..., description='Prompt') + quality: str = Field( + ..., + description='Video quality ("360p"(Turbo model), "540p", "720p", "1080p")', + examples=['540p'], + ) + seed: Optional[int] = Field(None, description='Random seed, range: 0 - 2147483647') + style: Optional[str] = Field( + None, + description='Style (effective when model=v3.5, "anime", "3d_animation", "clay", "comic", "cyberpunk") Do not include style parameter unless needed', + examples=['anime'], + ) + template_id: Optional[int] = Field( + None, + description='Template ID (template_id must be activated before use)', + examples=[302325299692608], + ) + water_mark: Optional[bool] = Field( + False, + description='Watermark (true: add watermark, false: no watermark)', + examples=[False], + ) diff --git a/comfy_api_nodes/apis/__init__.py b/comfy_api_nodes/apis/__init__.py new file mode 100644 index 00000000000..e7ea9b33283 --- /dev/null +++ b/comfy_api_nodes/apis/__init__.py @@ -0,0 +1,422 @@ +# generated by datamodel-codegen: +# filename: https://api.comfy.org/openapi +# timestamp: 2025-04-23T15:56:33+00:00 + +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional + +from pydantic import AnyUrl, BaseModel, Field, confloat, conint + +class Customer(BaseModel): + createdAt: Optional[datetime] = Field( + None, description='The date and time the user was created' + ) + email: Optional[str] = Field(None, description='The email address for this user') + id: str = Field(..., description='The firebase UID of the user') + name: Optional[str] = Field(None, description='The name for this user') + updatedAt: Optional[datetime] = Field( + None, description='The date and time the user was last updated' + ) + + +class Error(BaseModel): + details: Optional[List[str]] = Field( + None, + description='Optional detailed information about the error or hints for resolving it.', + ) + message: Optional[str] = Field( + None, description='A clear and concise description of the error.' + ) + + +class ErrorResponse(BaseModel): + error: str + message: str + +class ImageRequest(BaseModel): + aspect_ratio: Optional[str] = Field( + None, + description="Optional. The aspect ratio (e.g., 'ASPECT_16_9', 'ASPECT_1_1'). Cannot be used with resolution. Defaults to 'ASPECT_1_1' if unspecified.", + ) + color_palette: Optional[Dict[str, Any]] = Field( + None, description='Optional. Color palette object. Only for V_2, V_2_TURBO.' + ) + magic_prompt_option: Optional[str] = Field( + None, description="Optional. MagicPrompt usage ('AUTO', 'ON', 'OFF')." + ) + model: str = Field(..., description="The model used (e.g., 'V_2', 'V_2A_TURBO')") + negative_prompt: Optional[str] = Field( + None, + description='Optional. Description of what to exclude. Only for V_1, V_1_TURBO, V_2, V_2_TURBO.', + ) + num_images: Optional[conint(ge=1, le=8)] = Field( + 1, description='Optional. Number of images to generate (1-8). Defaults to 1.' + ) + prompt: str = Field( + ..., description='Required. The prompt to use to generate the image.' + ) + resolution: Optional[str] = Field( + None, + description="Optional. Resolution (e.g., 'RESOLUTION_1024_1024'). Only for model V_2. Cannot be used with aspect_ratio.", + ) + seed: Optional[conint(ge=0, le=2147483647)] = Field( + None, description='Optional. A number between 0 and 2147483647.' + ) + style_type: Optional[str] = Field( + None, + description="Optional. Style type ('AUTO', 'GENERAL', 'REALISTIC', 'DESIGN', 'RENDER_3D', 'ANIME'). Only for models V_2 and above.", + ) + + +class Datum(BaseModel): + is_image_safe: Optional[bool] = Field( + None, description='Indicates whether the image is considered safe.' + ) + prompt: Optional[str] = Field( + None, description='The prompt used to generate this image.' + ) + resolution: Optional[str] = Field( + None, description="The resolution of the generated image (e.g., '1024x1024')." + ) + seed: Optional[int] = Field( + None, description='The seed value used for this generation.' + ) + style_type: Optional[str] = Field( + None, + description="The style type used for generation (e.g., 'REALISTIC', 'ANIME').", + ) + url: Optional[str] = Field(None, description='URL to the generated image.') + + +class Code(Enum): + int_1100 = 1100 + int_1101 = 1101 + int_1102 = 1102 + int_1103 = 1103 + + +class Code1(Enum): + int_1000 = 1000 + int_1001 = 1001 + int_1002 = 1002 + int_1003 = 1003 + int_1004 = 1004 + + +class AspectRatio(str, Enum): + field_16_9 = '16:9' + field_9_16 = '9:16' + field_1_1 = '1:1' + + +class Config(BaseModel): + horizontal: Optional[confloat(ge=-10.0, le=10.0)] = None + pan: Optional[confloat(ge=-10.0, le=10.0)] = None + roll: Optional[confloat(ge=-10.0, le=10.0)] = None + tilt: Optional[confloat(ge=-10.0, le=10.0)] = None + vertical: Optional[confloat(ge=-10.0, le=10.0)] = None + zoom: Optional[confloat(ge=-10.0, le=10.0)] = None + + +class Type(str, Enum): + simple = 'simple' + down_back = 'down_back' + forward_up = 'forward_up' + right_turn_forward = 'right_turn_forward' + left_turn_forward = 'left_turn_forward' + + +class CameraControl(BaseModel): + config: Optional[Config] = None + type: Optional[Type] = Field(None, description='Predefined camera movements type') + + +class Duration(str, Enum): + field_5 = 5 + field_10 = 10 + + +class Mode(str, Enum): + std = 'std' + pro = 'pro' + + +class TaskInfo(BaseModel): + external_task_id: Optional[str] = None + + +class Video(BaseModel): + duration: Optional[str] = Field(None, description='Total video duration') + id: Optional[str] = Field(None, description='Generated video ID') + url: Optional[AnyUrl] = Field(None, description='URL for generated video') + + +class TaskResult(BaseModel): + videos: Optional[List[Video]] = None + + +class TaskStatus(str, Enum): + submitted = 'submitted' + processing = 'processing' + succeed = 'succeed' + failed = 'failed' + + +class Data(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_info: Optional[TaskInfo] = None + task_result: Optional[TaskResult] = None + task_status: Optional[TaskStatus] = None + updated_at: Optional[int] = Field(None, description='Task update time') + + +class AspectRatio1(str, Enum): + field_16_9 = '16:9' + field_9_16 = '9:16' + field_1_1 = '1:1' + field_4_3 = '4:3' + field_3_4 = '3:4' + field_3_2 = '3:2' + field_2_3 = '2:3' + field_21_9 = '21:9' + + +class ImageReference(str, Enum): + subject = 'subject' + face = 'face' + + +class Image(BaseModel): + index: Optional[int] = Field(None, description='Image Number (0-9)') + url: Optional[AnyUrl] = Field(None, description='URL for generated image') + + +class TaskResult1(BaseModel): + images: Optional[List[Image]] = None + + +class Data1(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_result: Optional[TaskResult1] = None + task_status: Optional[TaskStatus] = None + task_status_msg: Optional[str] = Field(None, description='Task status information') + updated_at: Optional[int] = Field(None, description='Task update time') + + +class AspectRatio2(str, Enum): + field_16_9 = '16:9' + field_9_16 = '9:16' + field_1_1 = '1:1' + + +class CameraControl1(BaseModel): + config: Optional[Config] = None + type: Optional[Type] = Field(None, description='Predefined camera movements type') + + +class ModelName2(str, Enum): + kling_v1 = 'kling-v1' + kling_v1_6 = 'kling-v1-6' + + +class TaskResult2(BaseModel): + videos: Optional[List[Video]] = None + + +class Data2(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_info: Optional[TaskInfo] = None + task_result: Optional[TaskResult2] = None + task_status: Optional[TaskStatus] = None + updated_at: Optional[int] = Field(None, description='Task update time') + + +class Code2(Enum): + int_1200 = 1200 + int_1201 = 1201 + int_1202 = 1202 + int_1203 = 1203 + + +class ResourcePackType(str, Enum): + decreasing_total = 'decreasing_total' + constant_period = 'constant_period' + + +class Status(str, Enum): + toBeOnline = 'toBeOnline' + online = 'online' + expired = 'expired' + runOut = 'runOut' + + +class ResourcePackSubscribeInfo(BaseModel): + effective_time: Optional[int] = Field( + None, description='Effective time, Unix timestamp in ms' + ) + invalid_time: Optional[int] = Field( + None, description='Expiration time, Unix timestamp in ms' + ) + purchase_time: Optional[int] = Field( + None, description='Purchase time, Unix timestamp in ms' + ) + remaining_quantity: Optional[float] = Field( + None, description='Remaining quantity (updated with a 12-hour delay)' + ) + resource_pack_id: Optional[str] = Field(None, description='Resource package ID') + resource_pack_name: Optional[str] = Field(None, description='Resource package name') + resource_pack_type: Optional[ResourcePackType] = Field( + None, + description='Resource package type (decreasing_total=decreasing total, constant_period=constant periodicity)', + ) + status: Optional[Status] = Field(None, description='Resource Package Status') + total_quantity: Optional[float] = Field(None, description='Total quantity') + +class Background(str, Enum): + transparent = 'transparent' + opaque = 'opaque' + + +class Moderation(str, Enum): + low = 'low' + auto = 'auto' + + +class OutputFormat(str, Enum): + png = 'png' + webp = 'webp' + jpeg = 'jpeg' + + +class Quality(str, Enum): + low = 'low' + medium = 'medium' + high = 'high' + + +class OpenAIImageEditRequest(BaseModel): + background: Optional[str] = Field( + None, description='Background transparency', examples=['opaque'] + ) + model: str = Field( + ..., description='The model to use for image editing', examples=['gpt-image-1'] + ) + moderation: Optional[Moderation] = Field( + None, description='Content moderation setting', examples=['auto'] + ) + n: Optional[int] = Field( + None, description='The number of images to generate', examples=[1] + ) + output_compression: Optional[int] = Field( + None, description='Compression level for JPEG or WebP (0-100)', examples=[100] + ) + output_format: Optional[OutputFormat] = Field( + None, description='Format of the output image', examples=['png'] + ) + prompt: str = Field( + ..., + description='A text description of the desired edit', + examples=['Give the rocketship rainbow coloring'], + ) + quality: Optional[str] = Field( + None, description='The quality of the edited image', examples=['low'] + ) + size: Optional[str] = Field( + None, description='Size of the output image', examples=['1024x1024'] + ) + user: Optional[str] = Field( + None, + description='A unique identifier for end-user monitoring', + examples=['user-1234'], + ) + + +class Quality1(str, Enum): + low = 'low' + medium = 'medium' + high = 'high' + standard = 'standard' + hd = 'hd' + + +class ResponseFormat(str, Enum): + url = 'url' + b64_json = 'b64_json' + + +class Style(str, Enum): + vivid = 'vivid' + natural = 'natural' + + +class OpenAIImageGenerationRequest(BaseModel): + background: Optional[Background] = Field( + None, description='Background transparency', examples=['opaque'] + ) + model: Optional[str] = Field( + None, description='The model to use for image generation', examples=['dall-e-3'] + ) + moderation: Optional[Moderation] = Field( + None, description='Content moderation setting', examples=['auto'] + ) + n: Optional[int] = Field( + None, + description='The number of images to generate (1-10). Only 1 supported for dall-e-3.', + examples=[1], + ) + output_compression: Optional[int] = Field( + None, description='Compression level for JPEG or WebP (0-100)', examples=[100] + ) + output_format: Optional[OutputFormat] = Field( + None, description='Format of the output image', examples=['png'] + ) + prompt: str = Field( + ..., + description='A text description of the desired image', + examples=['Draw a rocket in front of a blackhole in deep space'], + ) + quality: Optional[Quality1] = Field( + None, description='The quality of the generated image', examples=['high'] + ) + response_format: Optional[ResponseFormat] = Field( + None, description='Response format of image data', examples=['b64_json'] + ) + size: Optional[str] = Field( + None, + description='Size of the image (e.g., 1024x1024, 1536x1024, auto)', + examples=['1024x1536'], + ) + style: Optional[Style] = Field( + None, description='Style of the image (only for dall-e-3)', examples=['vivid'] + ) + user: Optional[str] = Field( + None, + description='A unique identifier for end-user monitoring', + examples=['user-1234'], + ) + + +class Datum1(BaseModel): + b64_json: Optional[str] = Field(None, description='Base64 encoded image data') + revised_prompt: Optional[str] = Field(None, description='Revised prompt') + url: Optional[str] = Field(None, description='URL of the image') + + +class OpenAIImageGenerationResponse(BaseModel): + data: Optional[List[Datum1]] = None +class User(BaseModel): + email: Optional[str] = Field(None, description='The email address for this user.') + id: Optional[str] = Field(None, description='The unique id for this user.') + isAdmin: Optional[bool] = Field( + None, description='Indicates if the user has admin privileges.' + ) + isApproved: Optional[bool] = Field( + None, description='Indicates if the user is approved.' + ) + name: Optional[str] = Field(None, description='The name for this user.') diff --git a/comfy_api_nodes/apis/client.py b/comfy_api_nodes/apis/client.py index cd81d5a1d35..9bc3d76d51d 100644 --- a/comfy_api_nodes/apis/client.py +++ b/comfy_api_nodes/apis/client.py @@ -226,7 +226,7 @@ def request( def check_auth_token(self, auth_token): """Verify that an auth token is present.""" if auth_token is None: - raise Exception("Please login first to use this node.") + raise Exception("Unauthorized: Please login first to use this node.") return auth_token diff --git a/comfy_api_nodes/nodes_api.py b/comfy_api_nodes/nodes_api.py new file mode 100644 index 00000000000..92f4a0c87b9 --- /dev/null +++ b/comfy_api_nodes/nodes_api.py @@ -0,0 +1,425 @@ +import io +from inspect import cleandoc + +from comfy.utils import common_upscale +from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeDict +from comfy_api_nodes.apis import ( + OpenAIImageGenerationRequest, + OpenAIImageEditRequest, + OpenAIImageGenerationResponse +) +from comfy_api_nodes.apis.client import ApiEndpoint, HttpMethod, SynchronousOperation + +import numpy as np +from PIL import Image +import requests +import torch +import math +import base64 + +def downscale_input(image): + samples = image.movedim(-1,1) + #downscaling input images to roughly the same size as the outputs + total = int(1536 * 1024) + scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) + if scale_by >= 1: + return image + width = round(samples.shape[3] * scale_by) + height = round(samples.shape[2] * scale_by) + + s = common_upscale(samples, width, height, "lanczos", "disabled") + s = s.movedim(1,-1) + return s + +def validate_and_cast_response (response): + # validate raw JSON response + data = response.data + if not data or len(data) == 0: + raise Exception("No images returned from API endpoint") + + # Get base64 image data + image_url = data[0].url + b64_data = data[0].b64_json + if not image_url and not b64_data: + raise Exception("No image was generated in the response") + + if b64_data: + img_data = base64.b64decode(b64_data) + img = Image.open(io.BytesIO(img_data)) + + elif image_url: + img_response = requests.get(image_url) + if img_response.status_code != 200: + raise Exception("Failed to download the image") + img = Image.open(io.BytesIO(img_response.content)) + + img = img.convert("RGB") # Ensure RGB format + + # Convert to numpy array, normalize to float32 between 0 and 1 + img_array = np.array(img).astype(np.float32) / 255.0 + + # Convert to torch tensor and add batch dimension + return torch.from_numpy(img_array)[None,] + +class OpenAIDalle2(ComfyNodeABC): + """ + Generates images synchronously via OpenAI's DALL·E 2 endpoint. + + Uses the proxy at /proxy/openai/images/generations. Returned URLs are short‑lived, + so download or cache results if you need to keep them. + """ + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": (IO.STRING, { + "multiline": True, + "default": "", + "tooltip": "Text prompt for DALL·E", + }), + }, + "optional": { + "seed": (IO.INT, { + "default": 0, + "min": 0, + "max": 2**31-1, + "step": 1, + "display": "number", + "tooltip": "not implemented yet in backend", + }), + "size": (IO.COMBO, { + "options": ["256x256", "512x512", "1024x1024"], + "default": "1024x1024", + "tooltip": "Image size", + }), + "n": (IO.INT, { + "default": 1, + "min": 1, + "max": 8, + "step": 1, + "display": "number", + "tooltip": "How many images to generate", + }), + "image": (IO.IMAGE, { + "default": None, + "tooltip": "Optional reference image for image editing.", + }), + "mask": (IO.MASK, { + "default": None, + "tooltip": "Optional mask for inpainting (white areas will be replaced)", + }), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG" + } + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call(self, prompt, seed=0, image=None, mask=None, n=1, size="1024x1024", auth_token=None): + model = "dall-e-2" + path = "/proxy/openai/images/generations" + request_class = OpenAIImageGenerationRequest + img_binary = None + + if image is not None and mask is not None: + path = "/proxy/openai/images/edits" + request_class = OpenAIImageEditRequest + + input_tensor = image.squeeze().cpu() + height, width, channels = input_tensor.shape + rgba_tensor = torch.ones(height, width, 4, device="cpu") + rgba_tensor[:, :, :channels] = input_tensor + + if mask.shape[1:] != image.shape[1:-1]: + raise Exception("Mask and Image must be the same size") + rgba_tensor[:,:,3] = (1-mask.squeeze().cpu()) + + rgba_tensor = downscale_input(rgba_tensor.unsqueeze(0)).squeeze() + + image_np = (rgba_tensor.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format='PNG') + img_byte_arr.seek(0) + img_binary = img_byte_arr#.getvalue() + img_binary.name = "image.png" + elif image is not None or mask is not None: + raise Exception("Dall-E 2 image editing requires an image AND a mask") + + # Build the operation + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=request_class, + response_model=OpenAIImageGenerationResponse + ), + request=request_class( + model=model, + prompt=prompt, + n=n, + size=size, + seed=seed, + ), + files={ + "image": img_binary, + } if img_binary else None, + auth_token=auth_token + ) + + response = operation.execute() + + img_tensor = validate_and_cast_response(response) + return (img_tensor,) + +class OpenAIDalle3(ComfyNodeABC): + """ + Generates images synchronously via OpenAI's DALL·E 3 endpoint. + + Uses the proxy at /proxy/openai/images/generations. Returned URLs are short‑lived, + so download or cache results if you need to keep them. + """ + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": (IO.STRING, { + "multiline": True, + "default": "", + "tooltip": "Text prompt for DALL·E", + }), + }, + "optional": { + "seed": (IO.INT, { + "default": 0, + "min": 0, + "max": 2**31-1, + "step": 1, + "display": "number", + "tooltip": "not implemented yet in backend", + }), + "quality" : (IO.COMBO, { + "options": ["standard","hd"], + "default": "standard", + "tooltip": "Image quality", + }), + "style": (IO.COMBO, { + "options": ["natural","vivid"], + "default": "natural", + "tooltip": "Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images.", + }), + "size": (IO.COMBO, { + "options": ["1024x1024", "1024x1792", "1792x1024"], + "default": "1024x1024", + "tooltip": "Image size", + }), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG" + } + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call(self, prompt, seed=0, style="natural", quality="standard", size="1024x1024", auth_token=None): + model = "dall-e-3" + + # build the operation + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/openai/images/generations", + method=HttpMethod.POST, + request_model=OpenAIImageGenerationRequest, + response_model=OpenAIImageGenerationResponse + ), + request=OpenAIImageGenerationRequest( + model=model, + prompt=prompt, + quality=quality, + size=size, + style=style, + seed=seed, + ), + auth_token=auth_token + ) + + response = operation.execute() + + img_tensor = validate_and_cast_response(response) + return (img_tensor,) + +class OpenAIGPTImage1(ComfyNodeABC): + """ + Generates images synchronously via OpenAI's GPT Image 1 endpoint. + + Uses the proxy at /proxy/openai/images/generations. Returned URLs are short‑lived, + so download or cache results if you need to keep them. + """ + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": (IO.STRING, { + "multiline": True, + "default": "", + "tooltip": "Text prompt for GPT Image 1", + }), + }, + "optional": { + "seed": (IO.INT, { + "default": 0, + "min": 0, + "max": 2**31-1, + "step": 1, + "display": "number", + "tooltip": "not implemented yet in backend", + }), + "quality": (IO.COMBO, { + "options": ["low","medium","high"], + "default": "low", + "tooltip": "Image quality, affects cost and generation time.", + }), + "background": (IO.COMBO, { + "options": ["opaque","transparent"], + "default": "opaque", + "tooltip": "Return image with or without background", + }), + "size": (IO.COMBO, { + "options": ["auto", "1024x1024", "1024x1536", "1536x1024"], + "default": "auto", + "tooltip": "Image size", + }), + "n": (IO.INT, { + "default": 1, + "min": 1, + "max": 8, + "step": 1, + "display": "number", + "tooltip": "How many images to generate", + }), + "image": (IO.IMAGE, { + "default": None, + "tooltip": "Optional reference image for image editing.", + }), + "mask": (IO.MASK, { + "default": None, + "tooltip": "Optional mask for inpainting (white areas will be replaced)", + }), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG" + } + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call(self, prompt, seed=0, quality="low", background="opaque", image=None, mask=None, n=1, size="1024x1024", auth_token=None): + model = "gpt-image-1" + path = "/proxy/openai/images/generations" + request_class = OpenAIImageGenerationRequest + img_binary = None + mask_binary = None + + + if image is not None: + path = "/proxy/openai/images/edits" + request_class = OpenAIImageEditRequest + + scaled_image = downscale_input(image).squeeze() + + image_np = (scaled_image.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format='PNG') + img_byte_arr.seek(0) + img_binary = img_byte_arr#.getvalue() + img_binary.name = "image.png" + + if mask is not None: + if image is None: + raise Exception("Cannot use a mask without an input image") + if mask.shape[1:] != image.shape[1:-1]: + raise Exception("Mask and Image must be the same size") + batch, height, width = mask.shape + rgba_mask = torch.zeros(height, width, 4, device="cpu") + rgba_mask[:,:,3] = (1-mask.squeeze().cpu()) + + scaled_mask = downscale_input(rgba_mask.unsqueeze(0)).squeeze() + + mask_np = (scaled_mask.numpy() * 255).astype(np.uint8) + mask_img = Image.fromarray(mask_np) + mask_img_byte_arr = io.BytesIO() + mask_img.save(mask_img_byte_arr, format='PNG') + mask_img_byte_arr.seek(0) + mask_binary = mask_img_byte_arr#.getvalue() + mask_binary.name = "mask.png" + + files = {} + if img_binary: + files["image"] = img_binary + if mask_binary: + files["mask"] = mask_binary + + # Build the operation + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=request_class, + response_model=OpenAIImageGenerationResponse + ), + request=request_class( + model=model, + prompt=prompt, + quality=quality, + background=background, + n=n, + seed=seed, + size=size, + ), + files=files if files else None, + auth_token=auth_token + ) + + response = operation.execute() + + img_tensor = validate_and_cast_response(response) + return (img_tensor,) + + +# A dictionary that contains all nodes you want to export with their names +# NOTE: names should be globally unique +NODE_CLASS_MAPPINGS = { + "OpenAIDalle2": OpenAIDalle2, + "OpenAIDalle3": OpenAIDalle3, + "OpenAIGPTImage1": OpenAIGPTImage1, +} + +# A dictionary that contains the friendly/humanly readable titles for the nodes +NODE_DISPLAY_NAME_MAPPINGS = { + "OpenAIDalle2": "OpenAI DALL·E 2", + "OpenAIDalle3": "OpenAI DALL·E 3", + "OpenAIGPTImage1": "OpenAI GPT Image 1", +} diff --git a/nodes.py b/nodes.py index b1ab62aade5..73a62d93035 100644 --- a/nodes.py +++ b/nodes.py @@ -2260,11 +2260,20 @@ def init_builtin_extra_nodes(): "nodes_fresca.py", ] + api_nodes_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_api_nodes") + api_nodes_files = [ + "nodes_api.py", + ] + import_failed = [] for node_file in extras_files: if not load_custom_node(os.path.join(extras_dir, node_file), module_parent="comfy_extras"): import_failed.append(node_file) + for node_file in api_nodes_files: + if not load_custom_node(os.path.join(api_nodes_dir, node_file), module_parent="comfy_api_nodes"): + import_failed.append(node_file) + return import_failed diff --git a/requirements.txt b/requirements.txt index f8ad908ca93..2ac2412617c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.16.9 +comfyui-frontend-package==1.17.9 comfyui-workflow-templates==0.1.3 torch torchsde